summaryrefslogtreecommitdiff
path: root/drivers/nvme/host/pci.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/nvme/host/pci.c')
-rw-r--r--drivers/nvme/host/pci.c207
1 files changed, 96 insertions, 111 deletions
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 60f7eab11865..5e52034ab010 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -16,6 +16,7 @@
#include <linux/bitops.h>
#include <linux/blkdev.h>
#include <linux/blk-mq.h>
+#include <linux/blk-mq-pci.h>
#include <linux/cpu.h>
#include <linux/delay.h>
#include <linux/errno.h>
@@ -88,7 +89,6 @@ struct nvme_dev {
unsigned max_qid;
int q_depth;
u32 db_stride;
- struct msix_entry *entry;
void __iomem *bar;
struct work_struct reset_work;
struct work_struct remove_work;
@@ -99,6 +99,7 @@ struct nvme_dev {
dma_addr_t cmb_dma_addr;
u64 cmb_size;
u32 cmbsz;
+ u32 cmbloc;
struct nvme_ctrl ctrl;
struct completion ioq_wait;
};
@@ -201,6 +202,11 @@ static unsigned int nvme_cmd_size(struct nvme_dev *dev)
nvme_iod_alloc_size(dev, NVME_INT_BYTES(dev), NVME_INT_PAGES);
}
+static int nvmeq_irq(struct nvme_queue *nvmeq)
+{
+ return pci_irq_vector(to_pci_dev(nvmeq->dev->dev), nvmeq->cq_vector);
+}
+
static int nvme_admin_init_hctx(struct blk_mq_hw_ctx *hctx, void *data,
unsigned int hctx_idx)
{
@@ -263,6 +269,13 @@ static int nvme_init_request(void *data, struct request *req,
return 0;
}
+static int nvme_pci_map_queues(struct blk_mq_tag_set *set)
+{
+ struct nvme_dev *dev = set->driver_data;
+
+ return blk_mq_pci_map_queues(set, to_pci_dev(dev->dev));
+}
+
/**
* __nvme_submit_cmd() - Copy a command into a queue and ring the doorbell
* @nvmeq: The queue to use
@@ -503,7 +516,8 @@ static int nvme_map_data(struct nvme_dev *dev, struct request *req,
goto out;
ret = BLK_MQ_RQ_QUEUE_BUSY;
- if (!dma_map_sg(dev->dev, iod->sg, iod->nents, dma_dir))
+ if (!dma_map_sg_attrs(dev->dev, iod->sg, iod->nents, dma_dir,
+ DMA_ATTR_NO_WARN))
goto out;
if (!nvme_setup_prps(dev, req, size))
@@ -880,7 +894,7 @@ static enum blk_eh_timer_return nvme_timeout(struct request *req, bool reserved)
"I/O %d QID %d timeout, reset controller\n",
req->tag, nvmeq->qid);
nvme_dev_disable(dev, false);
- queue_work(nvme_workq, &dev->reset_work);
+ nvme_reset(dev);
/*
* Mark the request as handled, since the inline shutdown
@@ -960,7 +974,7 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq)
spin_unlock_irq(&nvmeq->q_lock);
return 1;
}
- vector = nvmeq->dev->entry[nvmeq->cq_vector].vector;
+ vector = nvmeq_irq(nvmeq);
nvmeq->dev->online_queues--;
nvmeq->cq_vector = -1;
spin_unlock_irq(&nvmeq->q_lock);
@@ -968,7 +982,6 @@ static int nvme_suspend_queue(struct nvme_queue *nvmeq)
if (!nvmeq->qid && nvmeq->dev->ctrl.admin_q)
blk_mq_stop_hw_queues(nvmeq->dev->ctrl.admin_q);
- irq_set_affinity_hint(vector, NULL);
free_irq(vector, nvmeq);
return 0;
@@ -1075,15 +1088,14 @@ static struct nvme_queue *nvme_alloc_queue(struct nvme_dev *dev, int qid,
return NULL;
}
-static int queue_request_irq(struct nvme_dev *dev, struct nvme_queue *nvmeq,
- const char *name)
+static int queue_request_irq(struct nvme_queue *nvmeq)
{
if (use_threaded_interrupts)
- return request_threaded_irq(dev->entry[nvmeq->cq_vector].vector,
- nvme_irq_check, nvme_irq, IRQF_SHARED,
- name, nvmeq);
- return request_irq(dev->entry[nvmeq->cq_vector].vector, nvme_irq,
- IRQF_SHARED, name, nvmeq);
+ return request_threaded_irq(nvmeq_irq(nvmeq), nvme_irq_check,
+ nvme_irq, IRQF_SHARED, nvmeq->irqname, nvmeq);
+ else
+ return request_irq(nvmeq_irq(nvmeq), nvme_irq, IRQF_SHARED,
+ nvmeq->irqname, nvmeq);
}
static void nvme_init_queue(struct nvme_queue *nvmeq, u16 qid)
@@ -1114,7 +1126,7 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
if (result < 0)
goto release_cq;
- result = queue_request_irq(dev, nvmeq, nvmeq->irqname);
+ result = queue_request_irq(nvmeq);
if (result < 0)
goto release_sq;
@@ -1131,7 +1143,6 @@ static int nvme_create_queue(struct nvme_queue *nvmeq, int qid)
static struct blk_mq_ops nvme_mq_admin_ops = {
.queue_rq = nvme_queue_rq,
.complete = nvme_complete_rq,
- .map_queue = blk_mq_map_queue,
.init_hctx = nvme_admin_init_hctx,
.exit_hctx = nvme_admin_exit_hctx,
.init_request = nvme_admin_init_request,
@@ -1141,9 +1152,9 @@ static struct blk_mq_ops nvme_mq_admin_ops = {
static struct blk_mq_ops nvme_mq_ops = {
.queue_rq = nvme_queue_rq,
.complete = nvme_complete_rq,
- .map_queue = blk_mq_map_queue,
.init_hctx = nvme_init_hctx,
.init_request = nvme_init_request,
+ .map_queues = nvme_pci_map_queues,
.timeout = nvme_timeout,
.poll = nvme_poll,
};
@@ -1204,7 +1215,7 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
u64 cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
struct nvme_queue *nvmeq;
- dev->subsystem = readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1) ?
+ dev->subsystem = readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 1, 0) ?
NVME_CAP_NSSRC(cap) : 0;
if (dev->subsystem &&
@@ -1231,20 +1242,16 @@ static int nvme_configure_admin_queue(struct nvme_dev *dev)
result = nvme_enable_ctrl(&dev->ctrl, cap);
if (result)
- goto free_nvmeq;
+ return result;
nvmeq->cq_vector = 0;
- result = queue_request_irq(dev, nvmeq, nvmeq->irqname);
+ result = queue_request_irq(nvmeq);
if (result) {
nvmeq->cq_vector = -1;
- goto free_nvmeq;
+ return result;
}
return result;
-
- free_nvmeq:
- nvme_free_queues(dev, 0);
- return result;
}
static bool nvme_should_reset(struct nvme_dev *dev, u32 csts)
@@ -1281,7 +1288,7 @@ static void nvme_watchdog_timer(unsigned long data)
/* Skip controllers under certain specific conditions. */
if (nvme_should_reset(dev, csts)) {
- if (queue_work(nvme_workq, &dev->reset_work))
+ if (!nvme_reset(dev))
dev_warn(dev->dev,
"Failed status: 0x%x, reset controller.\n",
csts);
@@ -1306,10 +1313,8 @@ static int nvme_create_io_queues(struct nvme_dev *dev)
max = min(dev->max_qid, dev->queue_count - 1);
for (i = dev->online_queues; i <= max; i++) {
ret = nvme_create_queue(dev->queues[i], i);
- if (ret) {
- nvme_free_queues(dev, i);
+ if (ret)
break;
- }
}
/*
@@ -1321,28 +1326,37 @@ static int nvme_create_io_queues(struct nvme_dev *dev)
return ret >= 0 ? 0 : ret;
}
+static ssize_t nvme_cmb_show(struct device *dev,
+ struct device_attribute *attr,
+ char *buf)
+{
+ struct nvme_dev *ndev = to_nvme_dev(dev_get_drvdata(dev));
+
+ return snprintf(buf, PAGE_SIZE, "cmbloc : x%08x\ncmbsz : x%08x\n",
+ ndev->cmbloc, ndev->cmbsz);
+}
+static DEVICE_ATTR(cmb, S_IRUGO, nvme_cmb_show, NULL);
+
static void __iomem *nvme_map_cmb(struct nvme_dev *dev)
{
u64 szu, size, offset;
- u32 cmbloc;
resource_size_t bar_size;
struct pci_dev *pdev = to_pci_dev(dev->dev);
void __iomem *cmb;
dma_addr_t dma_addr;
- if (!use_cmb_sqes)
- return NULL;
-
dev->cmbsz = readl(dev->bar + NVME_REG_CMBSZ);
if (!(NVME_CMB_SZ(dev->cmbsz)))
return NULL;
+ dev->cmbloc = readl(dev->bar + NVME_REG_CMBLOC);
- cmbloc = readl(dev->bar + NVME_REG_CMBLOC);
+ if (!use_cmb_sqes)
+ return NULL;
szu = (u64)1 << (12 + 4 * NVME_CMB_SZU(dev->cmbsz));
size = szu * NVME_CMB_SZ(dev->cmbsz);
- offset = szu * NVME_CMB_OFST(cmbloc);
- bar_size = pci_resource_len(pdev, NVME_CMB_BIR(cmbloc));
+ offset = szu * NVME_CMB_OFST(dev->cmbloc);
+ bar_size = pci_resource_len(pdev, NVME_CMB_BIR(dev->cmbloc));
if (offset > bar_size)
return NULL;
@@ -1355,7 +1369,7 @@ static void __iomem *nvme_map_cmb(struct nvme_dev *dev)
if (size > bar_size - offset)
size = bar_size - offset;
- dma_addr = pci_resource_start(pdev, NVME_CMB_BIR(cmbloc)) + offset;
+ dma_addr = pci_resource_start(pdev, NVME_CMB_BIR(dev->cmbloc)) + offset;
cmb = ioremap_wc(dma_addr, size);
if (!cmb)
return NULL;
@@ -1382,7 +1396,7 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
{
struct nvme_queue *adminq = dev->queues[0];
struct pci_dev *pdev = to_pci_dev(dev->dev);
- int result, i, vecs, nr_io_queues, size;
+ int result, nr_io_queues, size;
nr_io_queues = num_online_cpus();
result = nvme_set_queue_count(&dev->ctrl, &nr_io_queues);
@@ -1417,29 +1431,18 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
}
/* Deregister the admin queue's interrupt */
- free_irq(dev->entry[0].vector, adminq);
+ free_irq(pci_irq_vector(pdev, 0), adminq);
/*
* If we enable msix early due to not intx, disable it again before
* setting up the full range we need.
*/
- if (pdev->msi_enabled)
- pci_disable_msi(pdev);
- else if (pdev->msix_enabled)
- pci_disable_msix(pdev);
-
- for (i = 0; i < nr_io_queues; i++)
- dev->entry[i].entry = i;
- vecs = pci_enable_msix_range(pdev, dev->entry, 1, nr_io_queues);
- if (vecs < 0) {
- vecs = pci_enable_msi_range(pdev, 1, min(nr_io_queues, 32));
- if (vecs < 0) {
- vecs = 1;
- } else {
- for (i = 0; i < vecs; i++)
- dev->entry[i].vector = i + pdev->irq;
- }
- }
+ pci_free_irq_vectors(pdev);
+ nr_io_queues = pci_alloc_irq_vectors(pdev, 1, nr_io_queues,
+ PCI_IRQ_ALL_TYPES | PCI_IRQ_AFFINITY);
+ if (nr_io_queues <= 0)
+ return -EIO;
+ dev->max_qid = nr_io_queues;
/*
* Should investigate if there's a performance win from allocating
@@ -1447,36 +1450,13 @@ static int nvme_setup_io_queues(struct nvme_dev *dev)
* path to scale better, even if the receive path is limited by the
* number of interrupts.
*/
- nr_io_queues = vecs;
- dev->max_qid = nr_io_queues;
- result = queue_request_irq(dev, adminq, adminq->irqname);
+ result = queue_request_irq(adminq);
if (result) {
adminq->cq_vector = -1;
- goto free_queues;
+ return result;
}
return nvme_create_io_queues(dev);
-
- free_queues:
- nvme_free_queues(dev, 1);
- return result;
-}
-
-static void nvme_pci_post_scan(struct nvme_ctrl *ctrl)
-{
- struct nvme_dev *dev = to_nvme_dev(ctrl);
- struct nvme_queue *nvmeq;
- int i;
-
- for (i = 0; i < dev->online_queues; i++) {
- nvmeq = dev->queues[i];
-
- if (!nvmeq->tags || !(*nvmeq->tags))
- continue;
-
- irq_set_affinity_hint(dev->entry[nvmeq->cq_vector].vector,
- blk_mq_tags_cpumask(*nvmeq->tags));
- }
}
static void nvme_del_queue_end(struct request *req, int error)
@@ -1531,9 +1511,9 @@ static int nvme_delete_queue(struct nvme_queue *nvmeq, u8 opcode)
return 0;
}
-static void nvme_disable_io_queues(struct nvme_dev *dev)
+static void nvme_disable_io_queues(struct nvme_dev *dev, int queues)
{
- int pass, queues = dev->online_queues - 1;
+ int pass;
unsigned long timeout;
u8 opcode = nvme_admin_delete_sq;
@@ -1615,15 +1595,9 @@ static int nvme_pci_enable(struct nvme_dev *dev)
* interrupts. Pre-enable a single MSIX or MSI vec for setup. We'll
* adjust this later.
*/
- if (pci_enable_msix(pdev, dev->entry, 1)) {
- pci_enable_msi(pdev);
- dev->entry[0].vector = pdev->irq;
- }
-
- if (!dev->entry[0].vector) {
- result = -ENODEV;
- goto disable;
- }
+ result = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES);
+ if (result < 0)
+ return result;
cap = lo_hi_readq(dev->bar + NVME_REG_CAP);
@@ -1642,9 +1616,25 @@ static int nvme_pci_enable(struct nvme_dev *dev)
dev->q_depth);
}
- if (readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 2))
+ /*
+ * CMBs can currently only exist on >=1.2 PCIe devices. We only
+ * populate sysfs if a CMB is implemented. Note that we add the
+ * CMB attribute to the nvme_ctrl kobj which removes the need to remove
+ * it on exit. Since nvme_dev_attrs_group has no name we can pass
+ * NULL as final argument to sysfs_add_file_to_group.
+ */
+
+ if (readl(dev->bar + NVME_REG_VS) >= NVME_VS(1, 2, 0)) {
dev->cmb = nvme_map_cmb(dev);
+ if (dev->cmbsz) {
+ if (sysfs_add_file_to_group(&dev->ctrl.device->kobj,
+ &dev_attr_cmb.attr, NULL))
+ dev_warn(dev->dev,
+ "failed to add sysfs attribute for CMB\n");
+ }
+ }
+
pci_enable_pcie_error_reporting(pdev);
pci_save_state(pdev);
return 0;
@@ -1665,10 +1655,7 @@ static void nvme_pci_disable(struct nvme_dev *dev)
{
struct pci_dev *pdev = to_pci_dev(dev->dev);
- if (pdev->msi_enabled)
- pci_disable_msi(pdev);
- else if (pdev->msix_enabled)
- pci_disable_msix(pdev);
+ pci_free_irq_vectors(pdev);
if (pci_is_enabled(pdev)) {
pci_disable_pcie_error_reporting(pdev);
@@ -1678,7 +1665,7 @@ static void nvme_pci_disable(struct nvme_dev *dev)
static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
{
- int i;
+ int i, queues;
u32 csts = -1;
del_timer_sync(&dev->watchdog_timer);
@@ -1689,6 +1676,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
csts = readl(dev->bar + NVME_REG_CSTS);
}
+ queues = dev->online_queues - 1;
for (i = dev->queue_count - 1; i > 0; i--)
nvme_suspend_queue(dev->queues[i]);
@@ -1700,7 +1688,7 @@ static void nvme_dev_disable(struct nvme_dev *dev, bool shutdown)
if (dev->queue_count)
nvme_suspend_queue(dev->queues[0]);
} else {
- nvme_disable_io_queues(dev);
+ nvme_disable_io_queues(dev, queues);
nvme_disable_admin_queue(dev, shutdown);
}
nvme_pci_disable(dev);
@@ -1743,7 +1731,6 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
if (dev->ctrl.admin_q)
blk_put_queue(dev->ctrl.admin_q);
kfree(dev->queues);
- kfree(dev->entry);
kfree(dev);
}
@@ -1848,11 +1835,10 @@ static int nvme_reset(struct nvme_dev *dev)
{
if (!dev->ctrl.admin_q || blk_queue_dying(dev->ctrl.admin_q))
return -ENODEV;
-
+ if (work_busy(&dev->reset_work))
+ return -ENODEV;
if (!queue_work(nvme_workq, &dev->reset_work))
return -EBUSY;
-
- flush_work(&dev->reset_work);
return 0;
}
@@ -1876,7 +1862,12 @@ static int nvme_pci_reg_read64(struct nvme_ctrl *ctrl, u32 off, u64 *val)
static int nvme_pci_reset_ctrl(struct nvme_ctrl *ctrl)
{
- return nvme_reset(to_nvme_dev(ctrl));
+ struct nvme_dev *dev = to_nvme_dev(ctrl);
+ int ret = nvme_reset(dev);
+
+ if (!ret)
+ flush_work(&dev->reset_work);
+ return ret;
}
static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
@@ -1887,7 +1878,6 @@ static const struct nvme_ctrl_ops nvme_pci_ctrl_ops = {
.reg_read64 = nvme_pci_reg_read64,
.reset_ctrl = nvme_pci_reset_ctrl,
.free_ctrl = nvme_pci_free_ctrl,
- .post_scan = nvme_pci_post_scan,
.submit_async_event = nvme_pci_submit_async_event,
};
@@ -1920,10 +1910,6 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
dev = kzalloc_node(sizeof(*dev), GFP_KERNEL, node);
if (!dev)
return -ENOMEM;
- dev->entry = kzalloc_node(num_possible_cpus() * sizeof(*dev->entry),
- GFP_KERNEL, node);
- if (!dev->entry)
- goto free;
dev->queues = kzalloc_node((num_possible_cpus() + 1) * sizeof(void *),
GFP_KERNEL, node);
if (!dev->queues)
@@ -1964,7 +1950,6 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
nvme_dev_unmap(dev);
free:
kfree(dev->queues);
- kfree(dev->entry);
kfree(dev);
return result;
}
@@ -1976,7 +1961,7 @@ static void nvme_reset_notify(struct pci_dev *pdev, bool prepare)
if (prepare)
nvme_dev_disable(dev, false);
else
- queue_work(nvme_workq, &dev->reset_work);
+ nvme_reset(dev);
}
static void nvme_shutdown(struct pci_dev *pdev)
@@ -2045,7 +2030,7 @@ static int nvme_resume(struct device *dev)
struct pci_dev *pdev = to_pci_dev(dev);
struct nvme_dev *ndev = pci_get_drvdata(pdev);
- queue_work(nvme_workq, &ndev->reset_work);
+ nvme_reset(ndev);
return 0;
}
#endif
@@ -2084,7 +2069,7 @@ static pci_ers_result_t nvme_slot_reset(struct pci_dev *pdev)
dev_info(dev->ctrl.device, "restart after slot reset\n");
pci_restore_state(pdev);
- queue_work(nvme_workq, &dev->reset_work);
+ nvme_reset(dev);
return PCI_ERS_RESULT_RECOVERED;
}