summaryrefslogtreecommitdiff
path: root/drivers/nvme/host/pci.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/nvme/host/pci.c')
-rw-r--r--drivers/nvme/host/pci.c126
1 files changed, 81 insertions, 45 deletions
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 5a0bf6a24d50..7fee665ec45e 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -95,6 +95,7 @@ struct nvme_dev;
struct nvme_queue;
static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown);
+static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode);
/*
* Represents an NVM Express device. Each nvme_dev is a PCI function.
@@ -1019,9 +1020,11 @@ static void nvme_complete_cqes(struct nvme_queue *nvmeq, u16 start, u16 end)
static inline void nvme_update_cq_head(struct nvme_queue *nvmeq)
{
- if (++nvmeq->cq_head == nvmeq->q_depth) {
+ if (nvmeq->cq_head == nvmeq->q_depth - 1) {
nvmeq->cq_head = 0;
nvmeq->cq_phase = !nvmeq->cq_phase;
+ } else {
+ nvmeq->cq_head++;
}
}
@@ -1420,6 +1423,14 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq)
return 0;
}
+static void nvme_suspend_io_queues(struct nvme_dev *dev)
+{
+ int i;
+
+ for (i = dev->ctrl.queue_count - 1; i > 0; i--)
+ nvme_suspend_queue(&dev->queues[i]);
+}
+
static void nvme_disable_admin_queue(struct nvme_dev *dev, bool shutdown)
{
struct nvme_queue *nvmeq = &dev->queues[0];
@@ -1485,8 +1496,8 @@ static int nvme_alloc_queue(struct nvme_dev *dev, int qid, int depth)
if (dev->ctrl.queue_count > qid)
return 0;
- nvmeq->cqes = dma_zalloc_coherent(dev->dev, CQ_SIZE(depth),
- &nvmeq->cq_dma_addr, GFP_KERNEL);
+ nvmeq->cqes = dma_alloc_coherent(dev->dev, CQ_SIZE(depth),
+ &nvmeq->cq_dma_addr, GFP_KERNEL);
if (!nvmeq->cqes)
goto free_nvmeq;
@@ -1885,8 +1896,9 @@ static void nvme_free_host_mem(struct nvme_dev *dev)
struct nvme_host_mem_buf_desc *desc = &dev->host_mem_descs[i];
size_t size = le32_to_cpu(desc->size) * dev->ctrl.page_size;
- dma_free_coherent(dev->dev, size, dev->host_mem_desc_bufs[i],
- le64_to_cpu(desc->addr));
+ dma_free_attrs(dev->dev, size, dev->host_mem_desc_bufs[i],
+ le64_to_cpu(desc->addr),
+ DMA_ATTR_NO_KERNEL_MAPPING | DMA_ATTR_NO_WARN);
}
kfree(dev->host_mem_desc_bufs);
@@ -1915,8 +1927,8 @@ static int __nvme_alloc_host_mem(struct nvme_dev *dev, u64 preferred,
if (dev->ctrl.hmmaxd && dev->ctrl.hmmaxd < max_entries)
max_entries = dev->ctrl.hmmaxd;
- descs = dma_zalloc_coherent(dev->dev, max_entries * sizeof(*descs),
- &descs_dma, GFP_KERNEL);
+ descs = dma_alloc_coherent(dev->dev, max_entries * sizeof(*descs),
+ &descs_dma, GFP_KERNEL);
if (!descs)
goto out;
@@ -1952,8 +1964,9 @@ out_free_bufs:
while (--i >= 0) {
size_t size = le32_to_cpu(descs[i].size) * dev->ctrl.page_size;
- dma_free_coherent(dev->dev, size, bufs[i],
- le64_to_cpu(descs[i].addr));
+ dma_free_attrs(dev->dev, size, bufs[i],
+ le64_to_cpu(descs[i].addr),
+ DMA_ATTR_NO_KERNEL_MAPPING | DMA_ATTR_NO_WARN);
}
kfree(bufs);
@@ -2028,14 +2041,18 @@ static int nvme_setup_host_mem(struct nvme_dev *dev)
return ret;
}
+/* irq_queues covers admin queue */
static void nvme_calc_io_queues(struct nvme_dev *dev, unsigned int irq_queues)
{
unsigned int this_w_queues = write_queues;
+ WARN_ON(!irq_queues);
+
/*
- * Setup read/write queue split
+ * Setup read/write queue split, assign admin queue one independent
+ * irq vector if irq_queues is > 1.
*/
- if (irq_queues == 1) {
+ if (irq_queues <= 2) {
dev->io_queues[HCTX_TYPE_DEFAULT] = 1;
dev->io_queues[HCTX_TYPE_READ] = 0;
return;
@@ -2043,21 +2060,21 @@ static void nvme_calc_io_queues(struct nvme_dev *dev, unsigned int irq_queues)
/*
* If 'write_queues' is set, ensure it leaves room for at least
- * one read queue
+ * one read queue and one admin queue
*/
if (this_w_queues >= irq_queues)
- this_w_queues = irq_queues - 1;
+ this_w_queues = irq_queues - 2;
/*
* If 'write_queues' is set to zero, reads and writes will share
* a queue set.
*/
if (!this_w_queues) {
- dev->io_queues[HCTX_TYPE_DEFAULT] = irq_queues;
+ dev->io_queues[HCTX_TYPE_DEFAULT] = irq_queues - 1;
dev->io_queues[HCTX_TYPE_READ] = 0;
} else {
dev->io_queues[HCTX_TYPE_DEFAULT] = this_w_queues;
- dev->io_queues[HCTX_TYPE_READ] = irq_queues - this_w_queues;
+ dev->io_queues[HCTX_TYPE_READ] = irq_queues - this_w_queues - 1;
}
}
@@ -2082,7 +2099,7 @@ static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues)
this_p_queues = nr_io_queues - 1;
irq_queues = 1;
} else {
- irq_queues = nr_io_queues - this_p_queues;
+ irq_queues = nr_io_queues - this_p_queues + 1;
}
dev->io_queues[HCTX_TYPE_POLL] = this_p_queues;
@@ -2102,8 +2119,9 @@ static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues)
* If we got a failure and we're down to asking for just
* 1 + 1 queues, just ask for a single vector. We'll share
* that between the single IO queue and the admin queue.
+ * Otherwise, we assign one independent vector to admin queue.
*/
- if (result >= 0 && irq_queues > 1)
+ if (irq_queues > 1)
irq_queues = irq_sets[0] + irq_sets[1] + 1;
result = pci_alloc_irq_vectors_affinity(pdev, irq_queues,
@@ -2132,6 +2150,12 @@ static int nvme_setup_irqs(struct nvme_dev *dev, unsigned int nr_io_queues)
return result;
}
+static void nvme_disable_io_queues(struct nvme_dev *dev)
+{
+ if (__nvme_disable_io_queues(dev, nvme_admin_delete_sq))
+ __nvme_disable_io_queues(dev, nvme_admin_delete_cq);
+}
+
static int nvme_setup_io_queues(struct nvme_dev *dev)
{
struct nvme_queue *adminq = &dev->queues[0];
@@ -2168,6 +2192,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
} while (1);
adminq->q_db = dev->dbs;
+ retry:
/* Deregister the admin queue's interrupt */
pci_free_irq(pdev, 0, adminq);
@@ -2185,25 +2210,34 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
result = max(result - 1, 1);
dev->max_qid = result + dev->io_queues[HCTX_TYPE_POLL];
- dev_info(dev->ctrl.device, "%d/%d/%d default/read/poll queues\n",
- dev->io_queues[HCTX_TYPE_DEFAULT],
- dev->io_queues[HCTX_TYPE_READ],
- dev->io_queues[HCTX_TYPE_POLL]);
-
/*
* Should investigate if there's a performance win from allocating
* more queues than interrupt vectors; it might allow the submission
* path to scale better, even if the receive path is limited by the
* number of interrupts.
*/
-
result = queue_request_irq(adminq);
if (result) {
adminq->cq_vector = -1;
return result;
}
set_bit(NVMEQ_ENABLED, &adminq->flags);
- return nvme_create_io_queues(dev);
+
+ result = nvme_create_io_queues(dev);
+ if (result || dev->online_queues < 2)
+ return result;
+
+ if (dev->online_queues - 1 < dev->max_qid) {
+ nr_io_queues = dev->online_queues - 1;
+ nvme_disable_io_queues(dev);
+ nvme_suspend_io_queues(dev);
+ goto retry;
+ }
+ dev_info(dev->ctrl.device, "%d/%d/%d default/read/poll queues\n",
+ dev->io_queues[HCTX_TYPE_DEFAULT],
+ dev->io_queues[HCTX_TYPE_READ],
+ dev->io_queues[HCTX_TYPE_POLL]);
+ return 0;
}
static void nvme_del_queue_end(struct request *req, blk_status_t error)
@@ -2248,7 +2282,7 @@ static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode)
return 0;
}
-static bool nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode)
+static bool __nvme_disable_io_queues(struct nvme_dev *dev, u8 opcode)
{
int nr_queues = dev->online_queues - 1, sent = 0;
unsigned long timeout;
@@ -2294,7 +2328,6 @@ static int nvme_dev_add(struct nvme_dev *dev)
dev->tagset.nr_maps = 2; /* default + read */
if (dev->io_queues[HCTX_TYPE_POLL])
dev->tagset.nr_maps++;
- dev->tagset.nr_maps = HCTX_MAX_TYPES;
dev->tagset.timeout = NVME_IO_TIMEOUT;
dev->tagset.numa_node = dev_to_node(dev->dev);
dev->tagset.queue_depth =
@@ -2410,7 +2443,6 @@ static void nvme_pci_disable(struct nvme_dev *dev)
static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
{
- int i;
bool dead = true;
struct pci_dev *pdev = to_pci_dev(dev->dev);
@@ -2437,13 +2469,11 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
nvme_stop_queues(&dev->ctrl);
if (!dead && dev->ctrl.queue_count > 0) {
- if (nvme_disable_io_queues(dev, nvme_admin_delete_sq))
- nvme_disable_io_queues(dev, nvme_admin_delete_cq);
+ nvme_disable_io_queues(dev);
nvme_disable_admin_queue(dev, shutdown);
}
- for (i = dev->ctrl.queue_count - 1; i >= 0; i--)
- nvme_suspend_queue(&dev->queues[i]);
-
+ nvme_suspend_io_queues(dev);
+ nvme_suspend_queue(&dev->queues[0]);
nvme_pci_disable(dev);
blk_mq_tagset_busy_iter(&dev->tagset, nvme_cancel_request, &dev->ctrl);
@@ -2527,27 +2557,18 @@ static void nvme_reset_work(struct work_struct *work)
if (dev->ctrl.ctrl_config & NVME_CC_ENABLE)
nvme_dev_disable(dev, false);
- /*
- * Introduce CONNECTING state from nvme-fc/rdma transports to mark the
- * initializing procedure here.
- */
- if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_CONNECTING)) {
- dev_warn(dev->ctrl.device,
- "failed to mark controller CONNECTING\n");
- goto out;
- }
-
+ mutex_lock(&dev->shutdown_lock);
result = nvme_pci_enable(dev);
if (result)
- goto out;
+ goto out_unlock;
result = nvme_pci_configure_admin_queue(dev);
if (result)
- goto out;
+ goto out_unlock;
result = nvme_alloc_admin_tags(dev);
if (result)
- goto out;
+ goto out_unlock;
/*
* Limit the max command size to prevent iod->sg allocations going
@@ -2555,6 +2576,17 @@ static void nvme_reset_work(struct work_struct *work)
*/
dev->ctrl.max_hw_sectors = NVME_MAX_KB_SZ << 1;
dev->ctrl.max_segments = NVME_MAX_SEGS;
+ mutex_unlock(&dev->shutdown_lock);
+
+ /*
+ * Introduce CONNECTING state from nvme-fc/rdma transports to mark the
+ * initializing procedure here.
+ */
+ if (!nvme_change_ctrl_state(&dev->ctrl, NVME_CTRL_CONNECTING)) {
+ dev_warn(dev->ctrl.device,
+ "failed to mark controller CONNECTING\n");
+ goto out;
+ }
result = nvme_init_identify(&dev->ctrl);
if (result)
@@ -2619,6 +2651,8 @@ static void nvme_reset_work(struct work_struct *work)
nvme_start_ctrl(&dev->ctrl);
return;
+ out_unlock:
+ mutex_unlock(&dev->shutdown_lock);
out:
nvme_remove_dead_ctrl(dev, result);
}
@@ -2946,6 +2980,8 @@ static const struct pci_device_id nvme_id_table[] = {
{ PCI_VDEVICE(INTEL, 0xf1a5), /* Intel 600P/P3100 */
.driver_data = NVME_QUIRK_NO_DEEPEST_PS |
NVME_QUIRK_MEDIUM_PRIO_SQ },
+ { PCI_VDEVICE(INTEL, 0xf1a6), /* Intel 760p/Pro 7600p */
+ .driver_data = NVME_QUIRK_IGNORE_DEV_SUBNQN, },
{ PCI_VDEVICE(INTEL, 0x5845), /* Qemu emulated controller */
.driver_data = NVME_QUIRK_IDENTIFY_CNS, },
{ PCI_DEVICE(0x1bb1, 0x0100), /* Seagate Nytro Flash Storage */