summaryrefslogtreecommitdiff
path: root/drivers
diff options
context:
space:
mode:
authorChristoph Hellwig <hch@lst.de>2016-12-09 01:20:32 +0300
committerJens Axboe <axboe@fb.com>2016-12-09 18:30:51 +0300
commitf9d03f96b988002027d4b28ea1b7a24729a4c9b5 (patch)
treee4995a30bbe58290594e2ef29de5ae2785c6c9db /drivers
parentbe07e14f96e3121483339a64d917fddb3b86ba98 (diff)
downloadlinux-f9d03f96b988002027d4b28ea1b7a24729a4c9b5.tar.xz
block: improve handling of the magic discard payload
Instead of allocating a single unused biovec for discard requests, send them down without any payload. Instead we allow the driver to add a "special" payload using a biovec embedded into struct request (unioned over other fields never used while in the driver), and overloading the number of segments for this case. This has a couple of advantages: - we don't have to allocate the bio_vec - the amount of special casing for discard requests in the block layer is significantly reduced - using this same scheme for other request types is trivial, which will be important for implementing the new WRITE_ZEROES op on devices where it actually requires a payload (e.g. SCSI) - we can get rid of playing games with the request length, as we'll never touch it and completions will work just fine - it will allow us to support ranged discard operations in the future by merging non-contiguous discard bios into a single request - last but not least it removes a lot of code This patch is the common base for my WIP series for ranges discards and to remove discard_zeroes_data in favor of always using REQ_OP_WRITE_ZEROES, so it would be good to get it in quickly. Signed-off-by: Christoph Hellwig <hch@lst.de> Signed-off-by: Jens Axboe <axboe@fb.com>
Diffstat (limited to 'drivers')
-rw-r--r--drivers/nvme/host/core.c17
-rw-r--r--drivers/nvme/host/nvme.h6
-rw-r--r--drivers/nvme/host/pci.c27
-rw-r--r--drivers/nvme/host/rdma.c13
-rw-r--r--drivers/nvme/target/loop.c4
-rw-r--r--drivers/scsi/scsi_lib.c6
-rw-r--r--drivers/scsi/sd.c24
7 files changed, 41 insertions, 56 deletions
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 1b48514fbe99..3b1d6478dcfb 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -239,8 +239,6 @@ static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
struct nvme_command *cmnd)
{
struct nvme_dsm_range *range;
- struct page *page;
- int offset;
unsigned int nr_bytes = blk_rq_bytes(req);
range = kmalloc(sizeof(*range), GFP_ATOMIC);
@@ -257,17 +255,10 @@ static inline int nvme_setup_discard(struct nvme_ns *ns, struct request *req,
cmnd->dsm.nr = 0;
cmnd->dsm.attributes = cpu_to_le32(NVME_DSMGMT_AD);
- req->completion_data = range;
- page = virt_to_page(range);
- offset = offset_in_page(range);
- blk_add_request_payload(req, page, offset, sizeof(*range));
-
- /*
- * we set __data_len back to the size of the area to be discarded
- * on disk. This allows us to report completion on the full amount
- * of blocks described by the request.
- */
- req->__data_len = nr_bytes;
+ req->special_vec.bv_page = virt_to_page(range);
+ req->special_vec.bv_offset = offset_in_page(range);
+ req->special_vec.bv_len = sizeof(*range);
+ req->rq_flags |= RQF_SPECIAL_PAYLOAD;
return BLK_MQ_RQ_QUEUE_OK;
}
diff --git a/drivers/nvme/host/nvme.h b/drivers/nvme/host/nvme.h
index a3d6ffd874af..bd5321441d12 100644
--- a/drivers/nvme/host/nvme.h
+++ b/drivers/nvme/host/nvme.h
@@ -236,8 +236,10 @@ static inline unsigned nvme_map_len(struct request *rq)
static inline void nvme_cleanup_cmd(struct request *req)
{
- if (req_op(req) == REQ_OP_DISCARD)
- kfree(req->completion_data);
+ if (req->rq_flags & RQF_SPECIAL_PAYLOAD) {
+ kfree(page_address(req->special_vec.bv_page) +
+ req->special_vec.bv_offset);
+ }
}
static inline int nvme_error_status(u16 status)
diff --git a/drivers/nvme/host/pci.c b/drivers/nvme/host/pci.c
index 82b9b3f1f21d..717d6ea47ee4 100644
--- a/drivers/nvme/host/pci.c
+++ b/drivers/nvme/host/pci.c
@@ -302,14 +302,14 @@ static void __nvme_submit_cmd(struct nvme_queue *nvmeq,
static __le64 **iod_list(struct request *req)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(req);
- return (__le64 **)(iod->sg + req->nr_phys_segments);
+ return (__le64 **)(iod->sg + blk_rq_nr_phys_segments(req));
}
static int nvme_init_iod(struct request *rq, unsigned size,
struct nvme_dev *dev)
{
struct nvme_iod *iod = blk_mq_rq_to_pdu(rq);
- int nseg = rq->nr_phys_segments;
+ int nseg = blk_rq_nr_phys_segments(rq);
if (nseg > NVME_INT_PAGES || size > NVME_INT_BYTES(dev)) {
iod->sg = kmalloc(nvme_iod_alloc_size(dev, size, nseg), GFP_ATOMIC);
@@ -339,8 +339,6 @@ static void nvme_free_iod(struct nvme_dev *dev, struct request *req)
__le64 **list = iod_list(req);
dma_addr_t prp_dma = iod->first_dma;
- nvme_cleanup_cmd(req);
-
if (iod->npages == 0)
dma_pool_free(dev->prp_small_pool, list[0], prp_dma);
for (i = 0; i < iod->npages; i++) {
@@ -510,7 +508,7 @@ static int nvme_map_data(struct nvme_dev *dev, struct request *req,
DMA_TO_DEVICE : DMA_FROM_DEVICE;
int ret = BLK_MQ_RQ_QUEUE_ERROR;
- sg_init_table(iod->sg, req->nr_phys_segments);
+ sg_init_table(iod->sg, blk_rq_nr_phys_segments(req));
iod->nents = blk_rq_map_sg(q, req, iod->sg);
if (!iod->nents)
goto out;
@@ -566,6 +564,7 @@ static void nvme_unmap_data(struct nvme_dev *dev, struct request *req)
}
}
+ nvme_cleanup_cmd(req);
nvme_free_iod(dev, req);
}
@@ -596,20 +595,20 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
}
}
- map_len = nvme_map_len(req);
- ret = nvme_init_iod(req, map_len, dev);
+ ret = nvme_setup_cmd(ns, req, &cmnd);
if (ret != BLK_MQ_RQ_QUEUE_OK)
return ret;
- ret = nvme_setup_cmd(ns, req, &cmnd);
+ map_len = nvme_map_len(req);
+ ret = nvme_init_iod(req, map_len, dev);
if (ret != BLK_MQ_RQ_QUEUE_OK)
- goto out;
+ goto out_free_cmd;
- if (req->nr_phys_segments)
+ if (blk_rq_nr_phys_segments(req))
ret = nvme_map_data(dev, req, map_len, &cmnd);
if (ret != BLK_MQ_RQ_QUEUE_OK)
- goto out;
+ goto out_cleanup_iod;
blk_mq_start_request(req);
@@ -620,14 +619,16 @@ static int nvme_queue_rq(struct blk_mq_hw_ctx *hctx,
else
ret = BLK_MQ_RQ_QUEUE_ERROR;
spin_unlock_irq(&nvmeq->q_lock);
- goto out;
+ goto out_cleanup_iod;
}
__nvme_submit_cmd(nvmeq, &cmnd);
nvme_process_cq(nvmeq);
spin_unlock_irq(&nvmeq->q_lock);
return BLK_MQ_RQ_QUEUE_OK;
-out:
+out_cleanup_iod:
nvme_free_iod(dev, req);
+out_free_cmd:
+ nvme_cleanup_cmd(req);
return ret;
}
diff --git a/drivers/nvme/host/rdma.c b/drivers/nvme/host/rdma.c
index b037d0cb2a7e..251101bf982f 100644
--- a/drivers/nvme/host/rdma.c
+++ b/drivers/nvme/host/rdma.c
@@ -952,8 +952,7 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
struct nvme_rdma_request *req = blk_mq_rq_to_pdu(rq);
struct nvme_rdma_device *dev = queue->device;
struct ib_device *ibdev = dev->dev;
- int nents, count;
- int ret;
+ int count, ret;
req->num_sge = 1;
req->inline_data = false;
@@ -965,16 +964,14 @@ static int nvme_rdma_map_data(struct nvme_rdma_queue *queue,
return nvme_rdma_set_sg_null(c);
req->sg_table.sgl = req->first_sgl;
- ret = sg_alloc_table_chained(&req->sg_table, rq->nr_phys_segments,
- req->sg_table.sgl);
+ ret = sg_alloc_table_chained(&req->sg_table,
+ blk_rq_nr_phys_segments(rq), req->sg_table.sgl);
if (ret)
return -ENOMEM;
- nents = blk_rq_map_sg(rq->q, rq, req->sg_table.sgl);
- BUG_ON(nents > rq->nr_phys_segments);
- req->nents = nents;
+ req->nents = blk_rq_map_sg(rq->q, rq, req->sg_table.sgl);
- count = ib_dma_map_sg(ibdev, req->sg_table.sgl, nents,
+ count = ib_dma_map_sg(ibdev, req->sg_table.sgl, req->nents,
rq_data_dir(rq) == WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE);
if (unlikely(count <= 0)) {
sg_free_table_chained(&req->sg_table, true);
diff --git a/drivers/nvme/target/loop.c b/drivers/nvme/target/loop.c
index 57ded6b3ed8a..9aaa70071ae5 100644
--- a/drivers/nvme/target/loop.c
+++ b/drivers/nvme/target/loop.c
@@ -185,13 +185,13 @@ static int nvme_loop_queue_rq(struct blk_mq_hw_ctx *hctx,
if (blk_rq_bytes(req)) {
iod->sg_table.sgl = iod->first_sgl;
ret = sg_alloc_table_chained(&iod->sg_table,
- req->nr_phys_segments, iod->sg_table.sgl);
+ blk_rq_nr_phys_segments(req),
+ iod->sg_table.sgl);
if (ret)
return BLK_MQ_RQ_QUEUE_BUSY;
iod->req.sg = iod->sg_table.sgl;
iod->req.sg_cnt = blk_rq_map_sg(req->q, req, iod->sg_table.sgl);
- BUG_ON(iod->req.sg_cnt > req->nr_phys_segments);
}
blk_mq_start_request(req);
diff --git a/drivers/scsi/scsi_lib.c b/drivers/scsi/scsi_lib.c
index 47a5c8783b89..9a8ccff1121f 100644
--- a/drivers/scsi/scsi_lib.c
+++ b/drivers/scsi/scsi_lib.c
@@ -1007,8 +1007,8 @@ static int scsi_init_sgtable(struct request *req, struct scsi_data_buffer *sdb)
/*
* If sg table allocation fails, requeue request later.
*/
- if (unlikely(sg_alloc_table_chained(&sdb->table, req->nr_phys_segments,
- sdb->table.sgl)))
+ if (unlikely(sg_alloc_table_chained(&sdb->table,
+ blk_rq_nr_phys_segments(req), sdb->table.sgl)))
return BLKPREP_DEFER;
/*
@@ -1040,7 +1040,7 @@ int scsi_init_io(struct scsi_cmnd *cmd)
bool is_mq = (rq->mq_ctx != NULL);
int error;
- BUG_ON(!rq->nr_phys_segments);
+ BUG_ON(!blk_rq_nr_phys_segments(rq));
error = scsi_init_sgtable(rq, &cmd->sdb);
if (error)
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 65738b0aad36..079c2d9759fb 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -716,7 +716,6 @@ static int sd_setup_discard_cmnd(struct scsi_cmnd *cmd)
struct scsi_disk *sdkp = scsi_disk(rq->rq_disk);
sector_t sector = blk_rq_pos(rq);
unsigned int nr_sectors = blk_rq_sectors(rq);
- unsigned int nr_bytes = blk_rq_bytes(rq);
unsigned int len;
int ret;
char *buf;
@@ -772,24 +771,19 @@ static int sd_setup_discard_cmnd(struct scsi_cmnd *cmd)
goto out;
}
- rq->completion_data = page;
rq->timeout = SD_TIMEOUT;
cmd->transfersize = len;
cmd->allowed = SD_MAX_RETRIES;
- /*
- * Initially __data_len is set to the amount of data that needs to be
- * transferred to the target. This amount depends on whether WRITE SAME
- * or UNMAP is being used. After the scatterlist has been mapped by
- * scsi_init_io() we set __data_len to the size of the area to be
- * discarded on disk. This allows us to report completion on the full
- * amount of blocks described by the request.
- */
- blk_add_request_payload(rq, page, 0, len);
- ret = scsi_init_io(cmd);
- rq->__data_len = nr_bytes;
+ rq->special_vec.bv_page = page;
+ rq->special_vec.bv_offset = 0;
+ rq->special_vec.bv_len = len;
+
+ rq->rq_flags |= RQF_SPECIAL_PAYLOAD;
+ rq->resid_len = len;
+ ret = scsi_init_io(cmd);
out:
if (ret != BLKPREP_OK)
__free_page(page);
@@ -1182,8 +1176,8 @@ static void sd_uninit_command(struct scsi_cmnd *SCpnt)
{
struct request *rq = SCpnt->request;
- if (req_op(rq) == REQ_OP_DISCARD)
- __free_page(rq->completion_data);
+ if (rq->rq_flags & RQF_SPECIAL_PAYLOAD)
+ __free_page(rq->special_vec.bv_page);
if (SCpnt->cmnd != rq->cmd) {
mempool_free(SCpnt->cmnd, sd_cdb_pool);