diff options
Diffstat (limited to 'drivers/nvme/host/pci.c')
-rw-r--r-- | drivers/nvme/host/pci.c | 125 |
1 files changed, 44 insertions, 81 deletions
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index fc33804662e7..1b9951d2067e 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -38,6 +38,13 @@ #define SGES_PER_PAGE (PAGE_SIZE / sizeof(struct nvme_sgl_desc)) +/* + * These can be higher, but we need to ensure that any command doesn't + * require an sg allocation that needs more than a page of data. + */ +#define NVME_MAX_KB_SZ 4096 +#define NVME_MAX_SEGS 127 + static int use_threaded_interrupts; module_param(use_threaded_interrupts, int, 0); @@ -100,6 +107,8 @@ struct nvme_dev { struct nvme_ctrl ctrl; struct completion ioq_wait; + mempool_t *iod_mempool; + /* shadow doorbell buffer support: */ u32 *dbbuf_dbs; dma_addr_t dbbuf_dbs_dma_addr; @@ -409,6 +418,8 @@ static int nvme_init_request(struct blk_mq_tag_set *set, struct request *req, BUG_ON(!nvmeq); iod->nvmeq = nvmeq; + + nvme_req(req)->ctrl = &dev->ctrl; return 0; } @@ -477,10 +488,7 @@ static blk_status_t nvme_init_iod(struct request *rq, struct nvme_dev *dev) iod->use_sgl = nvme_pci_use_sgls(dev, rq); if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) { - size_t alloc_size = nvme_pci_iod_alloc_size(dev, size, nseg, - iod->use_sgl); - - iod->sg = kmalloc(alloc_size, GFP_ATOMIC); + iod->sg = mempool_alloc(dev->iod_mempool, GFP_ATOMIC); if (!iod->sg) return BLK_STS_RESOURCE; } else { @@ -526,75 +534,8 @@ static void nvme_free_iod(struct nvme_dev *dev, struct request *req) } if (iod->sg != iod->inline_sg) - kfree(iod->sg); -} - -#ifdef CONFIG_BLK_DEV_INTEGRITY -static void nvme_dif_prep(u32 p, u32 v, struct t10_pi_tuple *pi) -{ - if (be32_to_cpu(pi->ref_tag) == v) - pi->ref_tag = cpu_to_be32(p); -} - -static void nvme_dif_complete(u32 p, u32 v, struct t10_pi_tuple *pi) -{ - if (be32_to_cpu(pi->ref_tag) == p) - pi->ref_tag = cpu_to_be32(v); -} - -/** - * nvme_dif_remap - remaps ref tags to bip seed and physical lba - * - * The virtual start sector is the one that was originally submitted by the - * block layer. Due to partitioning, MD/DM cloning, etc. the actual physical - * start sector may be different. Remap protection information to match the - * physical LBA on writes, and back to the original seed on reads. - * - * Type 0 and 3 do not have a ref tag, so no remapping required. - */ -static void nvme_dif_remap(struct request *req, - void (*dif_swap)(u32 p, u32 v, struct t10_pi_tuple *pi)) -{ - struct nvme_ns *ns = req->rq_disk->private_data; - struct bio_integrity_payload *bip; - struct t10_pi_tuple *pi; - void *p, *pmap; - u32 i, nlb, ts, phys, virt; - - if (!ns->pi_type || ns->pi_type == NVME_NS_DPS_PI_TYPE3) - return; - - bip = bio_integrity(req->bio); - if (!bip) - return; - - pmap = kmap_atomic(bip->bip_vec->bv_page) + bip->bip_vec->bv_offset; - - p = pmap; - virt = bip_get_seed(bip); - phys = nvme_block_nr(ns, blk_rq_pos(req)); - nlb = (blk_rq_bytes(req) >> ns->lba_shift); - ts = ns->disk->queue->integrity.tuple_size; - - for (i = 0; i < nlb; i++, virt++, phys++) { - pi = (struct t10_pi_tuple *)p; - dif_swap(phys, virt, pi); - p += ts; - } - kunmap_atomic(pmap); -} -#else /* CONFIG_BLK_DEV_INTEGRITY */ -static void nvme_dif_remap(struct request *req, - void (*dif_swap)(u32 p, u32 v, struct t10_pi_tuple *pi)) -{ -} -static void nvme_dif_prep(u32 p, u32 v, struct t10_pi_tuple *pi) -{ -} -static void nvme_dif_complete(u32 p, u32 v, struct t10_pi_tuple *pi) -{ + mempool_free(iod->sg, dev->iod_mempool); } -#endif static void nvme_print_sgl(struct scatterlist *sgl, int nents) { @@ -821,9 +762,6 @@ static blk_status_t nvme_map_data(struct nvme_dev *dev, struct request *req, if (blk_rq_map_integrity_sg(q, req->bio, &iod->meta_sg) != 1) goto out_unmap; - if (req_op(req) == REQ_OP_WRITE) - nvme_dif_remap(req, nvme_dif_prep); - if (!dma_map_sg(dev->dev, &iod->meta_sg, 1, dma_dir)) goto out_unmap; } @@ -846,11 +784,8 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req) if (iod->nents) { dma_unmap_sg(dev->dev, iod->sg, iod->nents, dma_dir); - if (blk_integrity_rq(req)) { - if (req_op(req) == REQ_OP_READ) - nvme_dif_remap(req, nvme_dif_complete); + if (blk_integrity_rq(req)) dma_unmap_sg(dev->dev, &iod->meta_sg, 1, dma_dir); - } } nvme_cleanup_cmd(req); @@ -2280,6 +2215,7 @@ static void nvme_pci_free_ctrl(struct nvme_ctrl *ctrl) blk_put_queue(dev->ctrl.admin_q); kfree(dev->queues); free_opal_dev(dev->ctrl.opal_dev); + mempool_destroy(dev->iod_mempool); kfree(dev); } @@ -2289,6 +2225,7 @@ static void nvme_remove_dead_ctrl(struct nvme_dev *dev, int status) nvme_get_ctrl(&dev->ctrl); nvme_dev_disable(dev, false); + nvme_kill_queues(&dev->ctrl); if (!queue_work(nvme_wq, &dev->remove_work)) nvme_put_ctrl(&dev->ctrl); } @@ -2333,6 +2270,13 @@ static void nvme_reset_work(struct work_struct *work) if (result) goto out; + /* + * Limit the max command size to prevent iod->sg allocations going + * over a single page. + */ + dev->ctrl.max_hw_sectors = NVME_MAX_KB_SZ << 1; + dev->ctrl.max_segments = NVME_MAX_SEGS; + result = nvme_init_identify(&dev->ctrl); if (result) goto out; @@ -2405,7 +2349,6 @@ static void nvme_remove_dead_ctrl_work(struct work_struct *work) struct nvme_dev *dev = container_of(work, struct nvme_dev, remove_work); struct pci_dev *pdev = to_pci_dev(dev->dev); - nvme_kill_queues(&dev->ctrl); if (pci_get_drvdata(pdev)) device_release_driver(&pdev->dev); nvme_put_ctrl(&dev->ctrl); @@ -2509,6 +2452,7 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) int node, result = -ENOMEM; struct nvme_dev *dev; unsigned long quirks = id->driver_data; + size_t alloc_size; node = dev_to_node(&pdev->dev); if (node == NUMA_NO_NODE) @@ -2541,10 +2485,27 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) quirks |= check_vendor_combination_bug(pdev); + /* + * Double check that our mempool alloc size will cover the biggest + * command we support. + */ + alloc_size = nvme_pci_iod_alloc_size(dev, NVME_MAX_KB_SZ, + NVME_MAX_SEGS, true); + WARN_ON_ONCE(alloc_size > PAGE_SIZE); + + dev->iod_mempool = mempool_create_node(1, mempool_kmalloc, + mempool_kfree, + (void *) alloc_size, + GFP_KERNEL, node); + if (!dev->iod_mempool) { + result = -ENOMEM; + goto release_pools; + } + result = nvme_init_ctrl(&dev->ctrl, &pdev->dev, &nvme_pci_ctrl_ops, quirks); if (result) - goto release_pools; + goto release_mempool; dev_info(dev->ctrl.device, "pci function %s\n", dev_name(&pdev->dev)); @@ -2553,6 +2514,8 @@ static int nvme_probe(struct pci_dev *pdev, const struct pci_device_id *id) return 0; + release_mempool: + mempool_destroy(dev->iod_mempool); release_pools: nvme_release_prp_pools(dev); unmap: |