summaryrefslogtreecommitdiff
path: root/drivers/nvme/host/pci.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/nvme/host/pci.c')
-rw-r--r--drivers/nvme/host/pci.c125
1 files changed, 44 insertions, 81 deletions
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index fc33804662e7..1b9951d2067e 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -38,6 +38,13 @@
#define SGES_PER_PAGE (PAGE_SIZE / sizeof(struct nvme_sgl_desc))
+/*
+ * These can be higher, but we need to ensure that any command doesn't
+ * require an sg allocation that needs more than a page of data.
+ */
+#define NVME_MAX_KB_SZ 4096
+#define NVME_MAX_SEGS 127
+
static int use_threaded_interrupts;
module_param(use_threaded_interrupts, int, 0);
@@ -100,6 +107,8 @@ struct nvme_dev {
struct nvme_ctrl ctrl;
struct completion ioq_wait;
+ mempool_t *iod_mempool;
+
/* shadow doorbell buffer support: */
u32 *dbbuf_dbs;
dma_addr_t dbbuf_dbs_dma_addr;
@@ -409,6 +418,8 @@ static int nvme_init_request(struct blk_mq_tag_set *set, struct request *req,
BUG_ON(!nvmeq);
iod->nvmeq = nvmeq;
+
+ nvme_req(req)->ctrl = &dev->ctrl;
return 0;
}
@@ -477,10 +488,7 @@ static blk_status_t nvme_init_iod(struct request *rq, struct nvme_dev *dev)
iod->use_sgl = nvme_pci_use_sgls(dev, rq);
if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) {
- size_t alloc_size = nvme_pci_iod_alloc_size(dev, size, nseg,
- iod->use_sgl);
-
- iod->sg = kmalloc(alloc_size, GFP_ATOMIC);
+ iod->sg = mempool_alloc(dev->iod_mempool, GFP_ATOMIC);
if (!iod->sg)
return BLK_STS_RESOURCE;
} else {
@@ -526,75 +534,8 @@ static void nvme_free_iod(struct nvme_dev *dev, struct request *req)
}
if (iod->sg != iod->inline_sg)
- kfree(iod->sg);
-}
-
-#ifdef CONFIG_BLK_DEV_INTEGRITY
-static void nvme_dif_prep(u32 p, u32 v, struct t10_pi_tuple *pi)
-{
- if (be32_to_cpu(pi->ref_tag) == v)
- pi->ref_tag = cpu_to_be32(p);
-}
-
-static void nvme_dif_complete(u32 p, u32 v, struct t10_pi_tuple *pi)
-{
- if (be32_to_cpu(pi->ref_tag) == p)
- pi->ref_tag = cpu_to_be32(v);
-}
-
-/**
- * nvme_dif_remap - remaps ref tags to bip seed and physical lba
- *
- * The virtual start sector is the one that was originally submitted by the
- * block layer. Due to partitioning, MD/DM cloning, etc. the actual physical
- * start sector may be different. Remap protection information to match the
- * physical LBA on writes, and back to the original seed on reads.
- *
- * Type 0 and 3 do not have a ref tag, so no remapping required.
- */
-static void nvme_dif_remap(struct request *req,
- void (*dif_swap)(u32 p, u32 v, struct t10_pi_tuple *pi))
-{
- struct nvme_ns *ns = req->rq_disk->private_data;
- struct bio_integrity_payload *bip;
- struct t10_pi_tuple *pi;
- void *p, *pmap;
- u32 i, nlb, ts, phys, virt;
-
- if (!ns->pi_type || ns->pi_type == NVME_NS_DPS_PI_TYPE3)
- return;
-
- bip = bio_integrity(req->bio);
- if (!bip)
- return;
-
- pmap = kmap_atomic(bip->bip_vec->bv_page) + bip->bip_vec->bv_offset;
-
- p = pmap;
- virt = bip_get_seed(bip);
- phys = nvme_block_nr(ns, blk_rq_pos(req));
- nlb = (blk_rq_bytes(req) >> ns->lba_shift);
- ts = ns->disk->queue->integrity.tuple_size;
-
- for (i = 0; i < nlb; i++, virt++, phys++) {
- pi = (struct t10_pi_tuple *)p;
- dif_swap(phys, virt, pi);
- p += ts;
- }
- kunmap_atomic(pmap);
-}
-#else /* CONFIG_BLK_DEV_INTEGRITY */
-static void nvme_dif_remap(struct request *req,
- void (*dif_swap)(u32 p, u32 v, struct t10_pi_tuple *pi))
-{
-}
-static void nvme_dif_prep(u32 p, u32 v, struct t10_pi_tuple *pi)
-{
-}
-static void nvme_dif_complete(u32 p, u32 v, struct t10_pi_tuple *pi)
-{
+ mempool_free(iod->sg, dev->iod_mempool);
}
-#endif
static void nvme_print_sgl(struct scatterlist *sgl, int nents)
{
@@ -821,9 +762,6 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req,
if (blk_rq_map_integrity_sg(q, req->bio, &iod->meta_sg) != 1)
goto out_unmap;
- if (req_op(req) == REQ_OP_WRITE)
- nvme_dif_remap(req, nvme_dif_prep);
-
if (!dma_map_sg(dev->dev, &iod->meta_sg, 1, dma_dir))
goto out_unmap;
}
@@ -846,11 +784,8 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
if (iod->nents) {
dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir);
- if (blk_integrity_rq(req)) {
- if (req_op(req) == REQ_OP_READ)
- nvme_dif_remap(req, nvme_dif_complete);
+ if (blk_integrity_rq(req))
dma_unmap_sg(dev->dev, &iod->meta_sg, 1, dma_dir);
- }
}
nvme_cleanup_cmd(req);
@@ -2280,6 +2215,7 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl)
blk_put_queue(dev->ctrl.admin_q);
kfree(dev->queues);
free_opal_dev(dev->ctrl.opal_dev);
+ mempool_destroy(dev->iod_mempool);
kfree(dev);
}
@@ -2289,6 +2225,7 @@ static void nvme_remove_dead_ctrl(struct nvme_dev *dev, int status)
nvme_get_ctrl(&dev->ctrl);
nvme_dev_disable(dev, false);
+ nvme_kill_queues(&dev->ctrl);
if (!queue_work(nvme_wq, &dev->remove_work))
nvme_put_ctrl(&dev->ctrl);
}
@@ -2333,6 +2270,13 @@ static void nvme_reset_work(struct work_struct *work)
if (result)
goto out;
+ /*
+ * Limit the max command size to prevent iod->sg allocations going
+ * over a single page.
+ */
+ dev->ctrl.max_hw_sectors = NVME_MAX_KB_SZ << 1;
+ dev->ctrl.max_segments = NVME_MAX_SEGS;
+
result = nvme_init_identify(&dev->ctrl);
if (result)
goto out;
@@ -2405,7 +2349,6 @@ static void nvme_remove_dead_ctrl_work(struct work_struct *work)
struct nvme_dev *dev = container_of(work, struct nvme_dev, remove_work);
struct pci_dev *pdev = to_pci_dev(dev->dev);
- nvme_kill_queues(&dev->ctrl);
if (pci_get_drvdata(pdev))
device_release_driver(&pdev->dev);
nvme_put_ctrl(&dev->ctrl);
@@ -2509,6 +2452,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
int node, result = -ENOMEM;
struct nvme_dev *dev;
unsigned long quirks = id->driver_data;
+ size_t alloc_size;
node = dev_to_node(&pdev->dev);
if (node == NUMA_NO_NODE)
@@ -2541,10 +2485,27 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
quirks |= check_vendor_combination_bug(pdev);
+ /*
+ * Double check that our mempool alloc size will cover the biggest
+ * command we support.
+ */
+ alloc_size = nvme_pci_iod_alloc_size(dev, NVME_MAX_KB_SZ,
+ NVME_MAX_SEGS, true);
+ WARN_ON_ONCE(alloc_size > PAGE_SIZE);
+
+ dev->iod_mempool = mempool_create_node(1, mempool_kmalloc,
+ mempool_kfree,
+ (void *) alloc_size,
+ GFP_KERNEL, node);
+ if (!dev->iod_mempool) {
+ result = -ENOMEM;
+ goto release_pools;
+ }
+
result = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops,
quirks);
if (result)
- goto release_pools;
+ goto release_mempool;
dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev));
@@ -2553,6 +2514,8 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id)
return 0;
+ release_mempool:
+ mempool_destroy(dev->iod_mempool);
release_pools:
nvme_release_prp_pools(dev);
unmap: