diff options
Diffstat (limited to 'drivers/nvme/host/pci.c')
-rw-r--r-- | drivers/nvme/host/pci.c | 207 |
1 files changed, 96 insertions, 111 deletions
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 60f7eab11865..5e52034ab010 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -16,6 +16,7 @@ #include <linux/bitops.h> #include <linux/blkdev.h> #include <linux/blk-mq.h> +#include <linux/blk-mq-pci.h> #include <linux/cpu.h> #include <linux/delay.h> #include <linux/errno.h> @@ -88,7 +89,6 @@ struct nvme_dev { unsigned max_qid; int q_depth; u32 db_stride; - struct msix_entry *entry; void __iomem *bar; struct work_struct reset_work; struct work_struct remove_work; @@ -99,6 +99,7 @@ struct nvme_dev { dma_addr_t cmb_dma_addr; u64 cmb_size; u32 cmbsz; + u32 cmbloc; struct nvme_ctrl ctrl; struct completion ioq_wait; }; @@ -201,6 +202,11 @@ static unsigned int nvme_cmd_size(struct nvme_dev *dev) nvme_iod_alloc_size(dev, NVME_INT_BYTES(dev), NVME_INT_PAGES); } +static int nvmeq_irq(struct nvme_queue *nvmeq) +{ + return pci_irq_vector(to_pci_dev(nvmeq->dev->dev), nvmeq->cq_vector); +} + static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data, unsigned int hctx_idx) { @@ -263,6 +269,13 @@ static int nvme_init_request(void *data, struct request *req, return 0; } +static int nvme_pci_map_queues(struct blk_mq_tag_set *set) +{ + struct nvme_dev *dev = set->driver_data; + + return blk_mq_pci_map_queues(set, to_pci_dev(dev->dev)); +} + /** * __nvme_submit_cmd() - Copy a command into a queue and ring the doorbell * @nvmeq: The queue to use @@ -503,7 +516,8 @@ static int nvme_map_data(struct nvme_dev *dev, struct request *req, goto out; ret = BLK_MQ_RQ_QUEUE_BUSY; - if (!dma_map_sg(dev->dev, iod->sg, iod->nents, dma_dir)) + if (!dma_map_sg_attrs(dev->dev, iod->sg, iod->nents, dma_dir, + DMA_ATTR_NO_WARN)) goto out; if (!nvme_setup_prps(dev, req, size)) @@ -880,7 +894,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved) "I/O %d QID %d timeout, reset controller\n", req->tag, nvmeq->qid); nvme_dev_disable(dev, false); - queue_work(nvme_workq, &dev->reset_work); + nvme_reset(dev); /* * Mark the request as handled, since the inline shutdown @@ -960,7 +974,7 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq) spin_unlock_irq(&nvmeq->q_lock); return 1; } - vector = nvmeq->dev->entry[nvmeq->cq_vector].vector; + vector = nvmeq_irq(nvmeq); nvmeq->dev->online_queues--; nvmeq->cq_vector = -1; spin_unlock_irq(&nvmeq->q_lock); @@ -968,7 +982,6 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq) if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q) blk_mq_stop_hw_queues(nvmeq->dev->ctrl.admin_q); - irq_set_affinity_hint(vector, NULL); free_irq(vector, nvmeq); return 0; @@ -1075,15 +1088,14 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid, return NULL; } -static int queue_request_irq(struct nvme_dev *dev, struct nvme_queue *nvmeq, - const char *name) +static int queue_request_irq(struct nvme_queue *nvmeq) { if (use_threaded_interrupts) - return request_threaded_irq(dev->entry[nvmeq->cq_vector].vector, - nvme_irq_check, nvme_irq, IRQF_SHARED, - name, nvmeq); - return request_irq(dev->entry[nvmeq->cq_vector].vector, nvme_irq, - IRQF_SHARED, name, nvmeq); + return request_threaded_irq(nvmeq_irq(nvmeq), nvme_irq_check, + nvme_irq, IRQF_SHARED, nvmeq->irqname, nvmeq); + else + return request_irq(nvmeq_irq(nvmeq), nvme_irq, IRQF_SHARED, + nvmeq->irqname, nvmeq); } static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid) @@ -1114,7 +1126,7 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) if (result < 0) goto release_cq; - result = queue_request_irq(dev, nvmeq, nvmeq->irqname); + result = queue_request_irq(nvmeq); if (result < 0) goto release_sq; @@ -1131,7 +1143,6 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid) static struct blk_mq_ops nvme_mq_admin_ops = { .queue_rq = nvme_queue_rq, .complete = nvme_complete_rq, - .map_queue = blk_mq_map_queue, .init_hctx = nvme_admin_init_hctx, .exit_hctx = nvme_admin_exit_hctx, .init_request = nvme_admin_init_request, @@ -1141,9 +1152,9 @@ static struct blk_mq_ops nvme_mq_admin_ops = { static struct blk_mq_ops nvme_mq_ops = { .queue_rq = nvme_queue_rq, .complete = nvme_complete_rq, - .map_queue = blk_mq_map_queue, .init_hctx = nvme_init_hctx, .init_request = nvme_init_request, + .map_queues = nvme_pci_map_queues, .timeout = nvme_timeout, .poll = nvme_poll, }; @@ -1204,7 +1215,7 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) u64 cap = lo_hi_readq(dev->bar + NVME_REG_CAP); struct nvme_queue *nvmeq; - dev->subsystem = readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1) ? + dev->subsystem = readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1, 0) ? NVME_CAP_NSSRC(cap) : 0; if (dev->subsystem && @@ -1231,20 +1242,16 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev) result = nvme_enable_ctrl(&dev->ctrl, cap); if (result) - goto free_nvmeq; + return result; nvmeq->cq_vector = 0; - result = queue_request_irq(dev, nvmeq, nvmeq->irqname); + result = queue_request_irq(nvmeq); if (result) { nvmeq->cq_vector = -1; - goto free_nvmeq; + return result; } return result; - - free_nvmeq: - nvme_free_queues(dev, 0); - return result; } static bool nvme_should_reset(struct nvme_dev *dev, u32 csts) @@ -1281,7 +1288,7 @@ static void nvme_watchdog_timer(unsigned long data) /* Skip controllers under certain specific conditions. */ if (nvme_should_reset(dev, csts)) { - if (queue_work(nvme_workq, &dev->reset_work)) + if (!nvme_reset(dev)) dev_warn(dev->dev, "Failed status: 0x%x, reset controller.\n", csts); @@ -1306,10 +1313,8 @@ static int nvme_create_io_queues(struct nvme_dev *dev) max = min(dev->max_qid, dev->queue_count - 1); for (i = dev->online_queues; i <= max; i++) { ret = nvme_create_queue(dev->queues[i], i); - if (ret) { - nvme_free_queues(dev, i); + if (ret) break; - } } /* @@ -1321,28 +1326,37 @@ static int nvme_create_io_queues(struct nvme_dev *dev) return ret >= 0 ? 0 : ret; } +static ssize_t nvme_cmb_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct nvme_dev *ndev = to_nvme_dev(dev_get_drvdata(dev)); + + return snprintf(buf, PAGE_SIZE, "cmbloc : x%08x\ncmbsz : x%08x\n", + ndev->cmbloc, ndev->cmbsz); +} +static DEVICE_ATTR(cmb, S_IRUGO, nvme_cmb_show, NULL); + static void __iomem *nvme_map_cmb(struct nvme_dev *dev) { u64 szu, size, offset; - u32 cmbloc; resource_size_t bar_size; struct pci_dev *pdev = to_pci_dev(dev->dev); void __iomem *cmb; dma_addr_t dma_addr; - if (!use_cmb_sqes) - return NULL; - dev->cmbsz = readl(dev->bar + NVME_REG_CMBSZ); if (!(NVME_CMB_SZ(dev->cmbsz))) return NULL; + dev->cmbloc = readl(dev->bar + NVME_REG_CMBLOC); - cmbloc = readl(dev->bar + NVME_REG_CMBLOC); + if (!use_cmb_sqes) + return NULL; szu = (u64)1 << (12 + 4 * NVME_CMB_SZU(dev->cmbsz)); size = szu * NVME_CMB_SZ(dev->cmbsz); - offset = szu * NVME_CMB_OFST(cmbloc); - bar_size = pci_resource_len(pdev, NVME_CMB_BIR(cmbloc)); + offset = szu * NVME_CMB_OFST(dev->cmbloc); + bar_size = pci_resource_len(pdev, NVME_CMB_BIR(dev->cmbloc)); if (offset > bar_size) return NULL; @@ -1355,7 +1369,7 @@ static void __iomem *nvme_map_cmb(struct nvme_dev *dev) if (size > bar_size - offset) size = bar_size - offset; - dma_addr = pci_resource_start(pdev, NVME_CMB_BIR(cmbloc)) + offset; + dma_addr = pci_resource_start(pdev, NVME_CMB_BIR(dev->cmbloc)) + offset; cmb = ioremap_wc(dma_addr, size); if (!cmb) return NULL; @@ -1382,7 +1396,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) { struct nvme_queue *adminq = dev->queues[0]; struct pci_dev *pdev = to_pci_dev(dev->dev); - int result, i, vecs, nr_io_queues, size; + int result, nr_io_queues, size; nr_io_queues = num_online_cpus(); result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues); @@ -1417,29 +1431,18 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) } /* Deregister the admin queue's interrupt */ - free_irq(dev->entry[0].vector, adminq); + free_irq(pci_irq_vector(pdev, 0), adminq); /* * If we enable msix early due to not intx, disable it again before * setting up the full range we need. */ - if (pdev->msi_enabled) - pci_disable_msi(pdev); - else if (pdev->msix_enabled) - pci_disable_msix(pdev); - - for (i = 0; i < nr_io_queues; i++) - dev->entry[i].entry = i; - vecs = pci_enable_msix_range(pdev, dev->entry, 1, nr_io_queues); - if (vecs < 0) { - vecs = pci_enable_msi_range(pdev, 1, min(nr_io_queues, 32)); - if (vecs < 0) { - vecs = 1; - } else { - for (i = 0; i < vecs; i++) - dev->entry[i].vector = i + pdev->irq; - } - } + pci_free_irq_vectors(pdev); + nr_io_queues = pci_alloc_irq_vectors(pdev, 1, nr_io_queues, + PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY); + if (nr_io_queues <= 0) + return -EIO; + dev->max_qid = nr_io_queues; /* * Should investigate if there's a performance win from allocating @@ -1447,36 +1450,13 @@ static int nvme_setup_io_queues(struct nvme_dev *dev) * path to scale better, even if the receive path is limited by the * number of interrupts. */ - nr_io_queues = vecs; - dev->max_qid = nr_io_queues; - result = queue_request_irq(dev, adminq, adminq->irqname); + result = queue_request_irq(adminq); if (result) { adminq->cq_vector = -1; - goto free_queues; + return result; } return nvme_create_io_queues(dev); - - free_queues: - nvme_free_queues(dev, 1); - return result; -} - -static void nvme_pci_post_scan(struct nvme_ctrl *ctrl) -{ - struct nvme_dev *dev = to_nvme_dev(ctrl); - struct nvme_queue *nvmeq; - int i; - - for (i = 0; i < dev->online_queues; i++) { - nvmeq = dev->queues[i]; - - if (!nvmeq->tags || !(*nvmeq->tags)) - continue; - - irq_set_affinity_hint(dev->entry[nvmeq->cq_vector].vector, - blk_mq_tags_cpumask(*nvmeq->tags)); - } } static void nvme_del_queue_end(struct request *req, int error) @@ -1531,9 +1511,9 @@ static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode) return 0; } -static void nvme_disable_io_queues(struct nvme_dev *dev) +static void nvme_disable_io_queues(struct nvme_dev *dev, int queues) { - int pass, queues = dev->online_queues - 1; + int pass; unsigned long timeout; u8 opcode = nvme_admin_delete_sq; @@ -1615,15 +1595,9 @@ static int nvme_pci_enable(struct nvme_dev *dev) * interrupts. Pre-enable a single MSIX or MSI vec for setup. We'll * adjust this later. */ - if (pci_enable_msix(pdev, dev->entry, 1)) { - pci_enable_msi(pdev); - dev->entry[0].vector = pdev->irq; - } - - if (!dev->entry[0].vector) { - result = -ENODEV; - goto disable; - } + result = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES); + if (result < 0) + return result; cap = lo_hi_readq(dev->bar + NVME_REG_CAP); @@ -1642,9 +1616,25 @@ static int nvme_pci_enable(struct nvme_dev *dev) dev->q_depth); } - if (readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 2)) + /* + * CMBs can currently only exist on >=1.2 PCIe devices. We only + * populate sysfs if a CMB is implemented. Note that we add the + * CMB attribute to the nvme_ctrl kobj which removes the need to remove + * it on exit. Since nvme_dev_attrs_group has no name we can pass + * NULL as final argument to sysfs_add_file_to_group. + */ + + if (readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 2, 0)) { dev->cmb = nvme_map_cmb(dev); + if (dev->cmbsz) { + if (sysfs_add_file_to_group(&dev->ctrl.device->kobj, + &dev_attr_cmb.attr, NULL)) + dev_warn(dev->dev, + "failed to add sysfs attribute for CMB\n"); + } + } + pci_enable_pcie_error_reporting(pdev); pci_save_state(pdev); return 0; @@ -1665,10 +1655,7 @@ static void nvme_pci_disable(struct nvme_dev *dev) { struct pci_dev *pdev = to_pci_dev(dev->dev); - if (pdev->msi_enabled) - pci_disable_msi(pdev); - else if (pdev->msix_enabled) - pci_disable_msix(pdev); + pci_free_irq_vectors(pdev); if (pci_is_enabled(pdev)) { pci_disable_pcie_error_reporting(pdev); @@ -1678,7 +1665,7 @@ static void nvme_pci_disable(struct nvme_dev *dev) static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) { - int i; + int i, queues; u32 csts = -1; del_timer_sync(&dev->watchdog_timer); @@ -1689,6 +1676,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) csts = readl(dev->bar + NVME_REG_CSTS); } + queues = dev->online_queues - 1; for (i = dev->queue_count - 1; i > 0; i--) nvme_suspend_queue(dev->queues[i]); @@ -1700,7 +1688,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown) if (dev->queue_count) nvme_suspend_queue(dev->queues[0]); } else { - nvme_disable_io_queues(dev); + nvme_disable_io_queues(dev, queues); nvme_disable_admin_queue(dev, shutdown); } nvme_pci_disable(dev); @@ -1743,7 +1731,6 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl) if (dev->ctrl.admin_q) blk_put_queue(dev->ctrl.admin_q); kfree(dev->queues); - kfree(dev->entry); kfree(dev); } @@ -1848,11 +1835,10 @@ static int nvme_reset(struct nvme_dev *dev) { if (!dev->ctrl.admin_q || blk_queue_dying(dev->ctrl.admin_q)) return -ENODEV; - + if (work_busy(&dev->reset_work)) + return -ENODEV; if (!queue_work(nvme_workq, &dev->reset_work)) return -EBUSY; - - flush_work(&dev->reset_work); return 0; } @@ -1876,7 +1862,12 @@ static int nvme_pci_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val) static int nvme_pci_reset_ctrl(struct nvme_ctrl *ctrl) { - return nvme_reset(to_nvme_dev(ctrl)); + struct nvme_dev *dev = to_nvme_dev(ctrl); + int ret = nvme_reset(dev); + + if (!ret) + flush_work(&dev->reset_work); + return ret; } static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = { @@ -1887,7 +1878,6 @@ static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = { .reg_read64 = nvme_pci_reg_read64, .reset_ctrl = nvme_pci_reset_ctrl, .free_ctrl = nvme_pci_free_ctrl, - .post_scan = nvme_pci_post_scan, .submit_async_event = nvme_pci_submit_async_event, }; @@ -1920,10 +1910,6 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) dev = kzalloc_node(sizeof(*dev), GFP_KERNEL, node); if (!dev) return -ENOMEM; - dev->entry = kzalloc_node(num_possible_cpus() * sizeof(*dev->entry), - GFP_KERNEL, node); - if (!dev->entry) - goto free; dev->queues = kzalloc_node((num_possible_cpus() + 1) * sizeof(void *), GFP_KERNEL, node); if (!dev->queues) @@ -1964,7 +1950,6 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) nvme_dev_unmap(dev); free: kfree(dev->queues); - kfree(dev->entry); kfree(dev); return result; } @@ -1976,7 +1961,7 @@ static void nvme_reset_notify(struct pci_dev *pdev, bool prepare) if (prepare) nvme_dev_disable(dev, false); else - queue_work(nvme_workq, &dev->reset_work); + nvme_reset(dev); } static void nvme_shutdown(struct pci_dev *pdev) @@ -2045,7 +2030,7 @@ static int nvme_resume(struct device *dev) struct pci_dev *pdev = to_pci_dev(dev); struct nvme_dev *ndev = pci_get_drvdata(pdev); - queue_work(nvme_workq, &ndev->reset_work); + nvme_reset(ndev); return 0; } #endif @@ -2084,7 +2069,7 @@ static pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev) dev_info(dev->ctrl.device, "restart after slot reset\n"); pci_restore_state(pdev); - queue_work(nvme_workq, &dev->reset_work); + nvme_reset(dev); return PCI_ERS_RESULT_RECOVERED; } |