From 02746e26c39ee473b975e0f68d1295abc92672ed Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Wed, 1 Sep 2021 16:14:34 +0300 Subject: virtio-blk: avoid preallocating big SGL for data No need to pre-allocate a big buffer for the IO SGL anymore. If a device has lots of deep queues, preallocation for the sg list can consume substantial amounts of memory. For HW virtio-blk device, nr_hw_queues can be 64 or 128 and each queue's depth might be 128. This means the resulting preallocation for the data SGLs is big. Switch to runtime allocation for SGL for lists longer than 2 entries. This is the approach used by NVMe drivers so it should be reasonable for virtio block as well. Runtime SGL allocation has always been the case for the legacy I/O path so this is nothing new. The preallocated small SGL depends on SG_CHAIN so if the ARCH doesn't support SG_CHAIN, use only runtime allocation for the SGL. Re-organize the setup of the IO request to fit the new sg chain mechanism. No performance degradation was seen (fio libaio engine with 16 jobs and 128 iodepth): IO size IOPs Rand Read (before/after) IOPs Rand Write (before/after) -------- --------------------------------- ---------------------------------- 512B 318K/316K 329K/325K 4KB 323K/321K 353K/349K 16KB 199K/208K 250K/275K 128KB 36K/36.1K 39.2K/41.7K Signed-off-by: Max Gurtovoy Reviewed-by: Israel Rukshin Link: https://lore.kernel.org/r/20210901131434.31158-1-mgurtovoy@nvidia.com Reviewed-by: Feng Li Reviewed-by: Stefan Hajnoczi Tested-by: Stefan Hajnoczi Reviewed-by: Christoph Hellwig Signed-off-by: Arnd Bergmann # kconfig fixups Signed-off-by: Michael S. Tsirkin --- drivers/block/Kconfig | 1 + drivers/block/virtio_blk.c | 156 +++++++++++++++++++++++++++++---------------- 2 files changed, 101 insertions(+), 56 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/Kconfig b/drivers/block/Kconfig index ab3e37aa1830..33a3f77dce27 100644 --- a/drivers/block/Kconfig +++ b/drivers/block/Kconfig @@ -394,6 +394,7 @@ config XEN_BLKDEV_BACKEND config VIRTIO_BLK tristate "Virtio block driver" depends on VIRTIO + select SG_POOL help This is the virtual block driver for virtio. It can be used with QEMU based VMMs (like KVM or Xen). Say Y or M. diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 303caf2d17d0..6767fb709d25 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -24,6 +24,12 @@ /* The maximum number of sg elements that fit into a virtqueue */ #define VIRTIO_BLK_MAX_SG_ELEMS 32768 +#ifdef CONFIG_ARCH_NO_SG_CHAIN +#define VIRTIO_BLK_INLINE_SG_CNT 0 +#else +#define VIRTIO_BLK_INLINE_SG_CNT 2 +#endif + static int major; static DEFINE_IDA(vd_index_ida); @@ -77,6 +83,7 @@ struct virtio_blk { struct virtblk_req { struct virtio_blk_outhdr out_hdr; u8 status; + struct sg_table sg_table; struct scatterlist sg[]; }; @@ -162,12 +169,92 @@ static int virtblk_setup_discard_write_zeroes(struct request *req, bool unmap) return 0; } -static inline void virtblk_request_done(struct request *req) +static void virtblk_unmap_data(struct request *req, struct virtblk_req *vbr) { - struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); + if (blk_rq_nr_phys_segments(req)) + sg_free_table_chained(&vbr->sg_table, + VIRTIO_BLK_INLINE_SG_CNT); +} + +static int virtblk_map_data(struct blk_mq_hw_ctx *hctx, struct request *req, + struct virtblk_req *vbr) +{ + int err; + + if (!blk_rq_nr_phys_segments(req)) + return 0; + + vbr->sg_table.sgl = vbr->sg; + err = sg_alloc_table_chained(&vbr->sg_table, + blk_rq_nr_phys_segments(req), + vbr->sg_table.sgl, + VIRTIO_BLK_INLINE_SG_CNT); + if (unlikely(err)) + return -ENOMEM; + return blk_rq_map_sg(hctx->queue, req, vbr->sg_table.sgl); +} + +static void virtblk_cleanup_cmd(struct request *req) +{ if (req->rq_flags & RQF_SPECIAL_PAYLOAD) kfree(bvec_virt(&req->special_vec)); +} + +static int virtblk_setup_cmd(struct virtio_device *vdev, struct request *req, + struct virtblk_req *vbr) +{ + bool unmap = false; + u32 type; + + vbr->out_hdr.sector = 0; + + switch (req_op(req)) { + case REQ_OP_READ: + type = VIRTIO_BLK_T_IN; + vbr->out_hdr.sector = cpu_to_virtio64(vdev, + blk_rq_pos(req)); + break; + case REQ_OP_WRITE: + type = VIRTIO_BLK_T_OUT; + vbr->out_hdr.sector = cpu_to_virtio64(vdev, + blk_rq_pos(req)); + break; + case REQ_OP_FLUSH: + type = VIRTIO_BLK_T_FLUSH; + break; + case REQ_OP_DISCARD: + type = VIRTIO_BLK_T_DISCARD; + break; + case REQ_OP_WRITE_ZEROES: + type = VIRTIO_BLK_T_WRITE_ZEROES; + unmap = !(req->cmd_flags & REQ_NOUNMAP); + break; + case REQ_OP_DRV_IN: + type = VIRTIO_BLK_T_GET_ID; + break; + default: + WARN_ON_ONCE(1); + return BLK_STS_IOERR; + } + + vbr->out_hdr.type = cpu_to_virtio32(vdev, type); + vbr->out_hdr.ioprio = cpu_to_virtio32(vdev, req_get_ioprio(req)); + + if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES) { + if (virtblk_setup_discard_write_zeroes(req, unmap)) + return BLK_STS_RESOURCE; + } + + return 0; +} + +static inline void virtblk_request_done(struct request *req) +{ + struct virtblk_req *vbr = blk_mq_rq_to_pdu(req); + + virtblk_unmap_data(req, vbr); + virtblk_cleanup_cmd(req); blk_mq_end_request(req, virtblk_result(vbr)); } @@ -225,57 +312,23 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx, int qid = hctx->queue_num; int err; bool notify = false; - bool unmap = false; - u32 type; BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems); - switch (req_op(req)) { - case REQ_OP_READ: - case REQ_OP_WRITE: - type = 0; - break; - case REQ_OP_FLUSH: - type = VIRTIO_BLK_T_FLUSH; - break; - case REQ_OP_DISCARD: - type = VIRTIO_BLK_T_DISCARD; - break; - case REQ_OP_WRITE_ZEROES: - type = VIRTIO_BLK_T_WRITE_ZEROES; - unmap = !(req->cmd_flags & REQ_NOUNMAP); - break; - case REQ_OP_DRV_IN: - type = VIRTIO_BLK_T_GET_ID; - break; - default: - WARN_ON_ONCE(1); - return BLK_STS_IOERR; - } - - vbr->out_hdr.type = cpu_to_virtio32(vblk->vdev, type); - vbr->out_hdr.sector = type ? - 0 : cpu_to_virtio64(vblk->vdev, blk_rq_pos(req)); - vbr->out_hdr.ioprio = cpu_to_virtio32(vblk->vdev, req_get_ioprio(req)); + err = virtblk_setup_cmd(vblk->vdev, req, vbr); + if (unlikely(err)) + return err; blk_mq_start_request(req); - if (type == VIRTIO_BLK_T_DISCARD || type == VIRTIO_BLK_T_WRITE_ZEROES) { - err = virtblk_setup_discard_write_zeroes(req, unmap); - if (err) - return BLK_STS_RESOURCE; - } - - num = blk_rq_map_sg(hctx->queue, req, vbr->sg); - if (num) { - if (rq_data_dir(req) == WRITE) - vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_OUT); - else - vbr->out_hdr.type |= cpu_to_virtio32(vblk->vdev, VIRTIO_BLK_T_IN); + num = virtblk_map_data(hctx, req, vbr); + if (unlikely(num < 0)) { + virtblk_cleanup_cmd(req); + return BLK_STS_RESOURCE; } spin_lock_irqsave(&vblk->vqs[qid].lock, flags); - err = virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg, num); + err = virtblk_add_req(vblk->vqs[qid].vq, vbr, vbr->sg_table.sgl, num); if (err) { virtqueue_kick(vblk->vqs[qid].vq); /* Don't stop the queue if -ENOMEM: we may have failed to @@ -284,6 +337,8 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx, if (err == -ENOSPC) blk_mq_stop_hw_queue(hctx); spin_unlock_irqrestore(&vblk->vqs[qid].lock, flags); + virtblk_unmap_data(req, vbr); + virtblk_cleanup_cmd(req); switch (err) { case -ENOSPC: return BLK_STS_DEV_RESOURCE; @@ -660,16 +715,6 @@ static const struct attribute_group *virtblk_attr_groups[] = { NULL, }; -static int virtblk_init_request(struct blk_mq_tag_set *set, struct request *rq, - unsigned int hctx_idx, unsigned int numa_node) -{ - struct virtio_blk *vblk = set->driver_data; - struct virtblk_req *vbr = blk_mq_rq_to_pdu(rq); - - sg_init_table(vbr->sg, vblk->sg_elems); - return 0; -} - static int virtblk_map_queues(struct blk_mq_tag_set *set) { struct virtio_blk *vblk = set->driver_data; @@ -682,7 +727,6 @@ static const struct blk_mq_ops virtio_mq_ops = { .queue_rq = virtio_queue_rq, .commit_rqs = virtio_commit_rqs, .complete = virtblk_request_done, - .init_request = virtblk_init_request, .map_queues = virtblk_map_queues, }; @@ -762,7 +806,7 @@ static int virtblk_probe(struct virtio_device *vdev) vblk->tag_set.flags = BLK_MQ_F_SHOULD_MERGE; vblk->tag_set.cmd_size = sizeof(struct virtblk_req) + - sizeof(struct scatterlist) * sg_elems; + sizeof(struct scatterlist) * VIRTIO_BLK_INLINE_SG_CNT; vblk->tag_set.driver_data = vblk; vblk->tag_set.nr_hw_queues = vblk->num_vqs; -- cgit v1.2.3 From 0989c41bed96e5dcf7939c6303e3759f02c4c16f Mon Sep 17 00:00:00 2001 From: Max Gurtovoy Date: Thu, 2 Sep 2021 23:46:22 +0300 Subject: virtio-blk: add num_request_queues module parameter Sometimes a user would like to control the amount of request queues to be created for a block device. For example, for limiting the memory footprint of virtio-blk devices. Reviewed-by: Christoph Hellwig Reviewed-by: Stefan Hajnoczi Signed-off-by: Max Gurtovoy Link: https://lore.kernel.org/r/20210902204622.54354-1-mgurtovoy@nvidia.com Signed-off-by: Michael S. Tsirkin --- drivers/block/virtio_blk.c | 23 ++++++++++++++++++++++- 1 file changed, 22 insertions(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 6767fb709d25..a33fe0743672 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -30,6 +30,25 @@ #define VIRTIO_BLK_INLINE_SG_CNT 2 #endif +static int virtblk_queue_count_set(const char *val, + const struct kernel_param *kp) +{ + return param_set_uint_minmax(val, kp, 1, nr_cpu_ids); +} + +static const struct kernel_param_ops queue_count_ops = { + .set = virtblk_queue_count_set, + .get = param_get_uint, +}; + +static unsigned int num_request_queues; +module_param_cb(num_request_queues, &queue_count_ops, &num_request_queues, + 0644); +MODULE_PARM_DESC(num_request_queues, + "Limit the number of request queues to use for blk device. " + "0 for no limit. " + "Values > nr_cpu_ids truncated to nr_cpu_ids."); + static int major; static DEFINE_IDA(vd_index_ida); @@ -553,7 +572,9 @@ static int init_vq(struct virtio_blk *vblk) if (err) num_vqs = 1; - num_vqs = min_t(unsigned int, nr_cpu_ids, num_vqs); + num_vqs = min_t(unsigned int, + min_not_zero(num_request_queues, nr_cpu_ids), + num_vqs); vblk->vqs = kmalloc_array(num_vqs, sizeof(*vblk->vqs), GFP_KERNEL); if (!vblk->vqs) -- cgit v1.2.3 From 6ae6ff6f6e7d2f304a12a53af8298e4f16ad633e Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Tue, 19 Oct 2021 15:01:43 +0800 Subject: virtio-blk: validate num_queues during probe If an untrusted device neogitates BLK_F_MQ but advertises a zero num_queues, the driver may end up trying to allocating zero size buffers where ZERO_SIZE_PTR is returned which may pass the checking against the NULL. This will lead unexpected results. Fixing this by failing the probe in this case. Cc: Paolo Bonzini Cc: Stefan Hajnoczi Cc: Stefano Garzarella Signed-off-by: Jason Wang Link: https://lore.kernel.org/r/20211019070152.8236-2-jasowang@redhat.com Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefano Garzarella Reviewed-by: Stefan Hajnoczi --- drivers/block/virtio_blk.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'drivers/block') diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index a33fe0743672..dbcf2a7e4a00 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -571,6 +571,10 @@ static int init_vq(struct virtio_blk *vblk) &num_vqs); if (err) num_vqs = 1; + if (!err && !num_vqs) { + dev_err(&vdev->dev, "MQ advertisted but zero queues reported\n"); + return -EINVAL; + } num_vqs = min_t(unsigned int, min_not_zero(num_request_queues, nr_cpu_ids), -- cgit v1.2.3 From 63b4ffa4fad0b14e9ebfedd7f7bb709b1c92472f Mon Sep 17 00:00:00 2001 From: Colin Ian King Date: Mon, 25 Oct 2021 11:22:40 +0100 Subject: virtio_blk: Fix spelling mistake: "advertisted" -> "advertised" There is a spelling mistake in a dev_err error message. Fix it. Signed-off-by: Colin Ian King Link: https://lore.kernel.org/r/20211025102240.22801-1-colin.i.king@gmail.com Signed-off-by: Michael S. Tsirkin Reviewed-by: Max Gurtovoy Acked-by: Jason Wang --- drivers/block/virtio_blk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index dbcf2a7e4a00..20f230bfe5b3 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -572,7 +572,7 @@ static int init_vq(struct virtio_blk *vblk) if (err) num_vqs = 1; if (!err && !num_vqs) { - dev_err(&vdev->dev, "MQ advertisted but zero queues reported\n"); + dev_err(&vdev->dev, "MQ advertised but zero queues reported\n"); return -EINVAL; } -- cgit v1.2.3 From f1aa12f53529ccd67722241792f39248bc89d353 Mon Sep 17 00:00:00 2001 From: Ye Guojin Date: Thu, 21 Oct 2021 06:51:11 +0000 Subject: virtio-blk: fixup coccinelle warnings coccicheck complains about the use of snprintf() in sysfs show functions: WARNING use scnprintf or sprintf Use sysfs_emit instead of scnprintf or sprintf makes more sense. Reported-by: Zeal Robot Signed-off-by: Ye Guojin Link: https://lore.kernel.org/r/20211021065111.1047824-1-ye.guojin@zte.com.cn Signed-off-by: Michael S. Tsirkin Reviewed-by: Stefan Hajnoczi Reviewed-by: Stefano Garzarella --- drivers/block/virtio_blk.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/block') diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 20f230bfe5b3..bcb02e4a0809 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -704,7 +704,7 @@ cache_type_show(struct device *dev, struct device_attribute *attr, char *buf) u8 writeback = virtblk_get_cache_mode(vblk->vdev); BUG_ON(writeback >= ARRAY_SIZE(virtblk_cache_types)); - return snprintf(buf, 40, "%s\n", virtblk_cache_types[writeback]); + return sysfs_emit(buf, "%s\n", virtblk_cache_types[writeback]); } static DEVICE_ATTR_RW(cache_type); -- cgit v1.2.3 From ead65f76958258c4c349c6a2b9577d80cc23133f Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Sun, 24 Oct 2021 09:41:40 -0400 Subject: virtio_blk: allow 0 as num_request_queues The default value is 0 meaning "no limit". However if 0 is specified on the command line it is instead silently converted to 1. Further, the value is already validated at point of use, there's no point in duplicating code validating the value when it is set. Simplify the code while making the behaviour more consistent by using plain module_param. Fixes: 1a662cf6cb9a ("virtio-blk: add num_request_queues module parameter") Cc: Max Gurtovoy Signed-off-by: Michael S. Tsirkin --- drivers/block/virtio_blk.c | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index bcb02e4a0809..9dd0099d2bd2 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -30,20 +30,8 @@ #define VIRTIO_BLK_INLINE_SG_CNT 2 #endif -static int virtblk_queue_count_set(const char *val, - const struct kernel_param *kp) -{ - return param_set_uint_minmax(val, kp, 1, nr_cpu_ids); -} - -static const struct kernel_param_ops queue_count_ops = { - .set = virtblk_queue_count_set, - .get = param_get_uint, -}; - static unsigned int num_request_queues; -module_param_cb(num_request_queues, &queue_count_ops, &num_request_queues, - 0644); +module_param(num_request_queues, uint, 0644); MODULE_PARM_DESC(num_request_queues, "Limit the number of request queues to use for blk device. " "0 for no limit. " -- cgit v1.2.3 From f0839372478ec53ff6d728422a6088f8a35b3a28 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 25 Oct 2021 03:54:03 -0400 Subject: virtio_blk: correct types for status handling virtblk_setup_cmd returns blk_status_t in an int, callers then assign it back to a blk_status_t variable. blk_status_t is either u32 or (more typically) u8 so it works, but is inelegant and causes sparse warnings. Pass the status in blk_status_t in a consistent way. Reported-by: kernel test robot Fixes: b2c5221fd074 ("virtio-blk: avoid preallocating big SGL for data") Cc: Max Gurtovoy Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang --- drivers/block/virtio_blk.c | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'drivers/block') diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index 9dd0099d2bd2..ec6ed24d1061 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -208,8 +208,9 @@ static void virtblk_cleanup_cmd(struct request *req) kfree(bvec_virt(&req->special_vec)); } -static int virtblk_setup_cmd(struct virtio_device *vdev, struct request *req, - struct virtblk_req *vbr) +static blk_status_t virtblk_setup_cmd(struct virtio_device *vdev, + struct request *req, + struct virtblk_req *vbr) { bool unmap = false; u32 type; @@ -317,14 +318,15 @@ static blk_status_t virtio_queue_rq(struct blk_mq_hw_ctx *hctx, unsigned long flags; unsigned int num; int qid = hctx->queue_num; - int err; bool notify = false; + blk_status_t status; + int err; BUG_ON(req->nr_phys_segments + 2 > vblk->sg_elems); - err = virtblk_setup_cmd(vblk->vdev, req, vbr); - if (unlikely(err)) - return err; + status = virtblk_setup_cmd(vblk->vdev, req, vbr); + if (unlikely(status)) + return status; blk_mq_start_request(req); -- cgit v1.2.3 From a40392edf1b2c7822bc0ce68413106661a9d4232 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 27 Oct 2021 10:21:06 +0800 Subject: virtio-blk: don't let virtio core to validate used length We never tries to use used length, so the patch prevents the virtio core from validating used length. Signed-off-by: Jason Wang Link: https://lore.kernel.org/r/20211027022107.14357-4-jasowang@redhat.com Signed-off-by: Michael S. Tsirkin --- drivers/block/virtio_blk.c | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/block') diff --git a/drivers/block/virtio_blk.c b/drivers/block/virtio_blk.c index ec6ed24d1061..e1f4f63ed246 100644 --- a/drivers/block/virtio_blk.c +++ b/drivers/block/virtio_blk.c @@ -1041,6 +1041,7 @@ static struct virtio_driver virtio_blk = { .feature_table_size = ARRAY_SIZE(features), .feature_table_legacy = features_legacy, .feature_table_size_legacy = ARRAY_SIZE(features_legacy), + .suppress_used_validation = true, .driver.name = KBUILD_MODNAME, .driver.owner = THIS_MODULE, .id_table = id_table, -- cgit v1.2.3