diff options
author | Christoph Hellwig <hch@lst.de> | 2016-12-09 01:20:32 +0300 |
---|---|---|
committer | Jens Axboe <axboe@fb.com> | 2016-12-09 18:30:51 +0300 |
commit | f9d03f96b988002027d4b28ea1b7a24729a4c9b5 (patch) | |
tree | e4995a30bbe58290594e2ef29de5ae2785c6c9db /drivers | |
parent | be07e14f96e3121483339a64d917fddb3b86ba98 (diff) | |
download | linux-f9d03f96b988002027d4b28ea1b7a24729a4c9b5.tar.xz |
block: improve handling of the magic discard payload
Instead of allocating a single unused biovec for discard requests, send
them down without any payload. Instead we allow the driver to add a
"special" payload using a biovec embedded into struct request (unioned
over other fields never used while in the driver), and overloading
the number of segments for this case.
This has a couple of advantages:
- we don't have to allocate the bio_vec
- the amount of special casing for discard requests in the block
layer is significantly reduced
- using this same scheme for other request types is trivial,
which will be important for implementing the new WRITE_ZEROES
op on devices where it actually requires a payload (e.g. SCSI)
- we can get rid of playing games with the request length, as
we'll never touch it and completions will work just fine
- it will allow us to support ranged discard operations in the
future by merging non-contiguous discard bios into a single
request
- last but not least it removes a lot of code
This patch is the common base for my WIP series for ranges discards and to
remove discard_zeroes_data in favor of always using REQ_OP_WRITE_ZEROES,
so it would be good to get it in quickly.
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@fb.com>
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/nvme/host/core.c | 17 | ||||
-rw-r--r-- | drivers/nvme/host/nvme.h | 6 | ||||
-rw-r--r-- | drivers/nvme/host/pci.c | 27 | ||||
-rw-r--r-- | drivers/nvme/host/rdma.c | 13 | ||||
-rw-r--r-- | drivers/nvme/target/loop.c | 4 | ||||
-rw-r--r-- | drivers/scsi/scsi_lib.c | 6 | ||||
-rw-r--r-- | drivers/scsi/sd.c | 24 |
7 files changed, 41 insertions, 56 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c index 1b48514fbe99..3b1d6478dcfb 100644 --- a/drivers/nvme/host/core.c +++ b/drivers/nvme/host/core.c @@ -239,8 +239,6 @@ static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req, struct nvme_command *cmnd) { struct nvme_dsm_range *range; - struct page *page; - int offset; unsigned int nr_bytes = blk_rq_bytes(req); range = kmalloc(sizeof(*range), GFP_ATOMIC); @@ -257,17 +255,10 @@ static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req, cmnd->dsm.nr = 0; cmnd->dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD); - req->completion_data = range; - page = virt_to_page(range); - offset = offset_in_page(range); - blk_add_request_payload(req, page, offset, sizeof(*range)); - - /* - * we set __data_len back to the size of the area to be discarded - * on disk. This allows us to report completion on the full amount - * of blocks described by the request. - */ - req->__data_len = nr_bytes; + req->special_vec.bv_page = virt_to_page(range); + req->special_vec.bv_offset = offset_in_page(range); + req->special_vec.bv_len = sizeof(*range); + req->rq_flags |= RQF_SPECIAL_PAYLOAD; return BLK_MQ_RQ_QUEUE_OK; } diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h index a3d6ffd874af..bd5321441d12 100644 --- a/drivers/nvme/host/nvme.h +++ b/drivers/nvme/host/nvme.h @@ -236,8 +236,10 @@ static inline unsigned nvme_map_len(struct request *rq) static inline void nvme_cleanup_cmd(struct request *req) { - if (req_op(req) == REQ_OP_DISCARD) - kfree(req->completion_data); + if (req->rq_flags & RQF_SPECIAL_PAYLOAD) { + kfree(page_address(req->special_vec.bv_page) + + req->special_vec.bv_offset); + } } static inline int nvme_error_status(u16 status) diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c index 82b9b3f1f21d..717d6ea47ee4 100644 --- a/drivers/nvme/host/pci.c +++ b/drivers/nvme/host/pci.c @@ -302,14 +302,14 @@ static void __nvme_submit_cmd(struct nvme_queue *nvmeq, static __le64 **iod_list(struct request *req) { struct nvme_iod *iod = blk_mq_rq_to_pdu(req); - return (__le64 **)(iod->sg + req->nr_phys_segments); + return (__le64 **)(iod->sg + blk_rq_nr_phys_segments(req)); } static int nvme_init_iod(struct request *rq, unsigned size, struct nvme_dev *dev) { struct nvme_iod *iod = blk_mq_rq_to_pdu(rq); - int nseg = rq->nr_phys_segments; + int nseg = blk_rq_nr_phys_segments(rq); if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) { iod->sg = kmalloc(nvme_iod_alloc_size(dev, size, nseg), GFP_ATOMIC); @@ -339,8 +339,6 @@ static void nvme_free_iod(struct nvme_dev *dev, struct request *req) __le64 **list = iod_list(req); dma_addr_t prp_dma = iod->first_dma; - nvme_cleanup_cmd(req); - if (iod->npages == 0) dma_pool_free(dev->prp_small_pool, list[0], prp_dma); for (i = 0; i < iod->npages; i++) { @@ -510,7 +508,7 @@ static int nvme_map_data(struct nvme_dev *dev, struct request *req, DMA_TO_DEVICE : DMA_FROM_DEVICE; int ret = BLK_MQ_RQ_QUEUE_ERROR; - sg_init_table(iod->sg, req->nr_phys_segments); + sg_init_table(iod->sg, blk_rq_nr_phys_segments(req)); iod->nents = blk_rq_map_sg(q, req, iod->sg); if (!iod->nents) goto out; @@ -566,6 +564,7 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req) } } + nvme_cleanup_cmd(req); nvme_free_iod(dev, req); } @@ -596,20 +595,20 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, } } - map_len = nvme_map_len(req); - ret = nvme_init_iod(req, map_len, dev); + ret = nvme_setup_cmd(ns, req, &cmnd); if (ret != BLK_MQ_RQ_QUEUE_OK) return ret; - ret = nvme_setup_cmd(ns, req, &cmnd); + map_len = nvme_map_len(req); + ret = nvme_init_iod(req, map_len, dev); if (ret != BLK_MQ_RQ_QUEUE_OK) - goto out; + goto out_free_cmd; - if (req->nr_phys_segments) + if (blk_rq_nr_phys_segments(req)) ret = nvme_map_data(dev, req, map_len, &cmnd); if (ret != BLK_MQ_RQ_QUEUE_OK) - goto out; + goto out_cleanup_iod; blk_mq_start_request(req); @@ -620,14 +619,16 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx, else ret = BLK_MQ_RQ_QUEUE_ERROR; spin_unlock_irq(&nvmeq->q_lock); - goto out; + goto out_cleanup_iod; } __nvme_submit_cmd(nvmeq, &cmnd); nvme_process_cq(nvmeq); spin_unlock_irq(&nvmeq->q_lock); return BLK_MQ_RQ_QUEUE_OK; -out: +out_cleanup_iod: nvme_free_iod(dev, req); +out_free_cmd: + nvme_cleanup_cmd(req); return ret; } diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c index b037d0cb2a7e..251101bf982f 100644 --- a/drivers/nvme/host/rdma.c +++ b/drivers/nvme/host/rdma.c @@ -952,8 +952,7 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq); struct nvme_rdma_device *dev = queue->device; struct ib_device *ibdev = dev->dev; - int nents, count; - int ret; + int count, ret; req->num_sge = 1; req->inline_data = false; @@ -965,16 +964,14 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue, return nvme_rdma_set_sg_null(c); req->sg_table.sgl = req->first_sgl; - ret = sg_alloc_table_chained(&req->sg_table, rq->nr_phys_segments, - req->sg_table.sgl); + ret = sg_alloc_table_chained(&req->sg_table, + blk_rq_nr_phys_segments(rq), req->sg_table.sgl); if (ret) return -ENOMEM; - nents = blk_rq_map_sg(rq->q, rq, req->sg_table.sgl); - BUG_ON(nents > rq->nr_phys_segments); - req->nents = nents; + req->nents = blk_rq_map_sg(rq->q, rq, req->sg_table.sgl); - count = ib_dma_map_sg(ibdev, req->sg_table.sgl, nents, + count = ib_dma_map_sg(ibdev, req->sg_table.sgl, req->nents, rq_data_dir(rq) == WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE); if (unlikely(count <= 0)) { sg_free_table_chained(&req->sg_table, true); diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c index 57ded6b3ed8a..9aaa70071ae5 100644 --- a/drivers/nvme/target/loop.c +++ b/drivers/nvme/target/loop.c @@ -185,13 +185,13 @@ static int nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx, if (blk_rq_bytes(req)) { iod->sg_table.sgl = iod->first_sgl; ret = sg_alloc_table_chained(&iod->sg_table, - req->nr_phys_segments, iod->sg_table.sgl); + blk_rq_nr_phys_segments(req), + iod->sg_table.sgl); if (ret) return BLK_MQ_RQ_QUEUE_BUSY; iod->req.sg = iod->sg_table.sgl; iod->req.sg_cnt = blk_rq_map_sg(req->q, req, iod->sg_table.sgl); - BUG_ON(iod->req.sg_cnt > req->nr_phys_segments); } blk_mq_start_request(req); diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c index 47a5c8783b89..9a8ccff1121f 100644 --- a/drivers/scsi/scsi_lib.c +++ b/drivers/scsi/scsi_lib.c @@ -1007,8 +1007,8 @@ static int scsi_init_sgtable(struct request *req, struct scsi_data_buffer *sdb) /* * If sg table allocation fails, requeue request later. */ - if (unlikely(sg_alloc_table_chained(&sdb->table, req->nr_phys_segments, - sdb->table.sgl))) + if (unlikely(sg_alloc_table_chained(&sdb->table, + blk_rq_nr_phys_segments(req), sdb->table.sgl))) return BLKPREP_DEFER; /* @@ -1040,7 +1040,7 @@ int scsi_init_io(struct scsi_cmnd *cmd) bool is_mq = (rq->mq_ctx != NULL); int error; - BUG_ON(!rq->nr_phys_segments); + BUG_ON(!blk_rq_nr_phys_segments(rq)); error = scsi_init_sgtable(rq, &cmd->sdb); if (error) diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index 65738b0aad36..079c2d9759fb 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -716,7 +716,6 @@ static int sd_setup_discard_cmnd(struct scsi_cmnd *cmd) struct scsi_disk *sdkp = scsi_disk(rq->rq_disk); sector_t sector = blk_rq_pos(rq); unsigned int nr_sectors = blk_rq_sectors(rq); - unsigned int nr_bytes = blk_rq_bytes(rq); unsigned int len; int ret; char *buf; @@ -772,24 +771,19 @@ static int sd_setup_discard_cmnd(struct scsi_cmnd *cmd) goto out; } - rq->completion_data = page; rq->timeout = SD_TIMEOUT; cmd->transfersize = len; cmd->allowed = SD_MAX_RETRIES; - /* - * Initially __data_len is set to the amount of data that needs to be - * transferred to the target. This amount depends on whether WRITE SAME - * or UNMAP is being used. After the scatterlist has been mapped by - * scsi_init_io() we set __data_len to the size of the area to be - * discarded on disk. This allows us to report completion on the full - * amount of blocks described by the request. - */ - blk_add_request_payload(rq, page, 0, len); - ret = scsi_init_io(cmd); - rq->__data_len = nr_bytes; + rq->special_vec.bv_page = page; + rq->special_vec.bv_offset = 0; + rq->special_vec.bv_len = len; + + rq->rq_flags |= RQF_SPECIAL_PAYLOAD; + rq->resid_len = len; + ret = scsi_init_io(cmd); out: if (ret != BLKPREP_OK) __free_page(page); @@ -1182,8 +1176,8 @@ static void sd_uninit_command(struct scsi_cmnd *SCpnt) { struct request *rq = SCpnt->request; - if (req_op(rq) == REQ_OP_DISCARD) - __free_page(rq->completion_data); + if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) + __free_page(rq->special_vec.bv_page); if (SCpnt->cmnd != rq->cmd) { mempool_free(SCpnt->cmnd, sd_cdb_pool); |