From 9864cd5dc54cade89fd4b0954c2e522841aa247c Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 9 Oct 2018 14:24:31 +0900 Subject: dm: fix report zone remapping to account for partition offset If dm-linear or dm-flakey are layered on top of a partition of a zoned block device, remapping of the start sector and write pointer position of the zones reported by a report zones BIO must be modified to account for the target table entry mapping (start offset within the device and entry mapping with the dm device). If the target's backing device is a partition of a whole disk, the start sector on the physical device of the partition must also be accounted for when modifying the zone information. However, dm_remap_zone_report() was not considering this last case, resulting in incorrect zone information remapping with targets using disk partitions. Fix this by calculating the target backing device start sector using the position of the completed report zones BIO and the unchanged position and size of the original report zone BIO. With this value calculated, the start sector and write pointer position of the target zones can be correctly remapped. Fixes: 10999307c14e ("dm: introduce dm_remap_zone_report()") Cc: stable@vger.kernel.org Signed-off-by: Damien Le Moal Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 27 ++++++++++++++++++++------- 1 file changed, 20 insertions(+), 7 deletions(-) (limited to 'drivers/md/dm.c') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 20f7e4ef5342..45abb54037fc 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1155,12 +1155,14 @@ void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors) EXPORT_SYMBOL_GPL(dm_accept_partial_bio); /* - * The zone descriptors obtained with a zone report indicate - * zone positions within the target device. The zone descriptors - * must be remapped to match their position within the dm device. - * A target may call dm_remap_zone_report after completion of a - * REQ_OP_ZONE_REPORT bio to remap the zone descriptors obtained - * from the target device mapping to the dm device. + * The zone descriptors obtained with a zone report indicate zone positions + * within the target backing device, regardless of that device is a partition + * and regardless of the target mapping start sector on the device or partition. + * The zone descriptors start sector and write pointer position must be adjusted + * to match their relative position within the dm device. + * A target may call dm_remap_zone_report() after completion of a + * REQ_OP_ZONE_REPORT bio to remap the zone descriptors obtained from the + * backing device. */ void dm_remap_zone_report(struct dm_target *ti, struct bio *bio, sector_t start) { @@ -1171,6 +1173,7 @@ void dm_remap_zone_report(struct dm_target *ti, struct bio *bio, sector_t start) struct blk_zone *zone; unsigned int nr_rep = 0; unsigned int ofst; + sector_t part_offset; struct bio_vec bvec; struct bvec_iter iter; void *addr; @@ -1178,6 +1181,15 @@ void dm_remap_zone_report(struct dm_target *ti, struct bio *bio, sector_t start) if (bio->bi_status) return; + /* + * bio sector was incremented by the request size on completion. Taking + * into account the original request sector, the target start offset on + * the backing device and the target mapping offset (ti->begin), the + * start sector of the backing device. The partition offset is always 0 + * if the target uses a whole device. + */ + part_offset = bio->bi_iter.bi_sector + ti->begin - (start + bio_end_sector(report_bio)); + /* * Remap the start sector of the reported zones. For sequential zones, * also remap the write pointer position. @@ -1195,6 +1207,7 @@ void dm_remap_zone_report(struct dm_target *ti, struct bio *bio, sector_t start) /* Set zones start sector */ while (hdr->nr_zones && ofst < bvec.bv_len) { zone = addr + ofst; + zone->start -= part_offset; if (zone->start >= start + ti->len) { hdr->nr_zones = 0; break; @@ -1206,7 +1219,7 @@ void dm_remap_zone_report(struct dm_target *ti, struct bio *bio, sector_t start) else if (zone->cond == BLK_ZONE_COND_EMPTY) zone->wp = zone->start; else - zone->wp = zone->wp + ti->begin - start; + zone->wp = zone->wp + ti->begin - start - part_offset; } ofst += sizeof(struct blk_zone); hdr->nr_zones--; -- cgit v1.2.3 From 6a23e05c2fe3c64ec012fd81e51e3ab51e4f2f9f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 10 Oct 2018 20:49:26 -0600 Subject: dm: remove legacy request-based IO path dm supports both, and since we're killing off the legacy path in general, get rid of it in dm. Signed-off-by: Jens Axboe Signed-off-by: Mike Snitzer --- drivers/md/Kconfig | 11 -- drivers/md/dm-core.h | 10 -- drivers/md/dm-mpath.c | 14 +-- drivers/md/dm-rq.c | 316 +++++--------------------------------------------- drivers/md/dm-rq.h | 4 - drivers/md/dm-sysfs.c | 3 +- drivers/md/dm-table.c | 49 +------- drivers/md/dm.c | 21 +--- drivers/md/dm.h | 1 - 9 files changed, 36 insertions(+), 393 deletions(-) (limited to 'drivers/md/dm.c') diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index 8b8c123cae66..3db222509e44 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -215,17 +215,6 @@ config BLK_DEV_DM If unsure, say N. -config DM_MQ_DEFAULT - bool "request-based DM: use blk-mq I/O path by default" - depends on BLK_DEV_DM - ---help--- - This option enables the blk-mq based I/O path for request-based - DM devices by default. With the option the dm_mod.use_blk_mq - module/boot option defaults to Y, without it to N, but it can - still be overriden either way. - - If unsure say N. - config DM_DEBUG bool "Device mapper debugging support" depends on BLK_DEV_DM diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h index 7d480c930eaf..224d44503a06 100644 --- a/drivers/md/dm-core.h +++ b/drivers/md/dm-core.h @@ -112,18 +112,8 @@ struct mapped_device { struct dm_stats stats; - struct kthread_worker kworker; - struct task_struct *kworker_task; - - /* for request-based merge heuristic in dm_request_fn() */ - unsigned seq_rq_merge_deadline_usecs; - int last_rq_rw; - sector_t last_rq_pos; - ktime_t last_rq_start_time; - /* for blk-mq request-based DM support */ struct blk_mq_tag_set *tag_set; - bool use_blk_mq:1; bool init_tio_pdu:1; struct srcu_struct io_barrier; diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 419362c2d8ac..a24ed3973e7c 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -203,14 +203,7 @@ static struct multipath *alloc_multipath(struct dm_target *ti) static int alloc_multipath_stage2(struct dm_target *ti, struct multipath *m) { if (m->queue_mode == DM_TYPE_NONE) { - /* - * Default to request-based. - */ - if (dm_use_blk_mq(dm_table_get_md(ti->table))) - m->queue_mode = DM_TYPE_MQ_REQUEST_BASED; - else - m->queue_mode = DM_TYPE_REQUEST_BASED; - + m->queue_mode = DM_TYPE_MQ_REQUEST_BASED; } else if (m->queue_mode == DM_TYPE_BIO_BASED) { INIT_WORK(&m->process_queued_bios, process_queued_bios); /* @@ -537,10 +530,7 @@ static int multipath_clone_and_map(struct dm_target *ti, struct request *rq, * get the queue busy feedback (via BLK_STS_RESOURCE), * otherwise I/O merging can suffer. */ - if (q->mq_ops) - return DM_MAPIO_REQUEUE; - else - return DM_MAPIO_DELAY_REQUEUE; + return DM_MAPIO_REQUEUE; } clone->bio = clone->biotail = NULL; clone->rq_disk = bdev->bd_disk; diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 6e547b8dd298..7cd36e4d1310 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -23,19 +23,6 @@ static unsigned dm_mq_queue_depth = DM_MQ_QUEUE_DEPTH; #define RESERVED_REQUEST_BASED_IOS 256 static unsigned reserved_rq_based_ios = RESERVED_REQUEST_BASED_IOS; -static bool use_blk_mq = IS_ENABLED(CONFIG_DM_MQ_DEFAULT); - -bool dm_use_blk_mq_default(void) -{ - return use_blk_mq; -} - -bool dm_use_blk_mq(struct mapped_device *md) -{ - return md->use_blk_mq; -} -EXPORT_SYMBOL_GPL(dm_use_blk_mq); - unsigned dm_get_reserved_rq_based_ios(void) { return __dm_get_module_param(&reserved_rq_based_ios, @@ -59,41 +46,13 @@ int dm_request_based(struct mapped_device *md) return queue_is_rq_based(md->queue); } -static void dm_old_start_queue(struct request_queue *q) -{ - unsigned long flags; - - spin_lock_irqsave(q->queue_lock, flags); - if (blk_queue_stopped(q)) - blk_start_queue(q); - spin_unlock_irqrestore(q->queue_lock, flags); -} - -static void dm_mq_start_queue(struct request_queue *q) +void dm_start_queue(struct request_queue *q) { blk_mq_unquiesce_queue(q); blk_mq_kick_requeue_list(q); } -void dm_start_queue(struct request_queue *q) -{ - if (!q->mq_ops) - dm_old_start_queue(q); - else - dm_mq_start_queue(q); -} - -static void dm_old_stop_queue(struct request_queue *q) -{ - unsigned long flags; - - spin_lock_irqsave(q->queue_lock, flags); - if (!blk_queue_stopped(q)) - blk_stop_queue(q); - spin_unlock_irqrestore(q->queue_lock, flags); -} - -static void dm_mq_stop_queue(struct request_queue *q) +void dm_stop_queue(struct request_queue *q) { if (blk_mq_queue_stopped(q)) return; @@ -101,14 +60,6 @@ static void dm_mq_stop_queue(struct request_queue *q) blk_mq_quiesce_queue(q); } -void dm_stop_queue(struct request_queue *q) -{ - if (!q->mq_ops) - dm_old_stop_queue(q); - else - dm_mq_stop_queue(q); -} - /* * Partial completion handling for request-based dm */ @@ -179,27 +130,12 @@ static void rq_end_stats(struct mapped_device *md, struct request *orig) */ static void rq_completed(struct mapped_device *md, int rw, bool run_queue) { - struct request_queue *q = md->queue; - unsigned long flags; - atomic_dec(&md->pending[rw]); /* nudge anyone waiting on suspend queue */ if (!md_in_flight(md)) wake_up(&md->wait); - /* - * Run this off this callpath, as drivers could invoke end_io while - * inside their request_fn (and holding the queue lock). Calling - * back into ->request_fn() could deadlock attempting to grab the - * queue lock again. - */ - if (!q->mq_ops && run_queue) { - spin_lock_irqsave(q->queue_lock, flags); - blk_run_queue_async(q); - spin_unlock_irqrestore(q->queue_lock, flags); - } - /* * dm_put() must be at the end of this function. See the comment above */ @@ -222,27 +158,10 @@ static void dm_end_request(struct request *clone, blk_status_t error) tio->ti->type->release_clone_rq(clone); rq_end_stats(md, rq); - if (!rq->q->mq_ops) - blk_end_request_all(rq, error); - else - blk_mq_end_request(rq, error); + blk_mq_end_request(rq, error); rq_completed(md, rw, true); } -/* - * Requeue the original request of a clone. - */ -static void dm_old_requeue_request(struct request *rq, unsigned long delay_ms) -{ - struct request_queue *q = rq->q; - unsigned long flags; - - spin_lock_irqsave(q->queue_lock, flags); - blk_requeue_request(q, rq); - blk_delay_queue(q, delay_ms); - spin_unlock_irqrestore(q->queue_lock, flags); -} - static void __dm_mq_kick_requeue_list(struct request_queue *q, unsigned long msecs) { blk_mq_delay_kick_requeue_list(q, msecs); @@ -273,11 +192,7 @@ static void dm_requeue_original_request(struct dm_rq_target_io *tio, bool delay_ tio->ti->type->release_clone_rq(tio->clone); } - if (!rq->q->mq_ops) - dm_old_requeue_request(rq, delay_ms); - else - dm_mq_delay_requeue_request(rq, delay_ms); - + dm_mq_delay_requeue_request(rq, delay_ms); rq_completed(md, rw, false); } @@ -340,10 +255,7 @@ static void dm_softirq_done(struct request *rq) rq_end_stats(md, rq); rw = rq_data_dir(rq); - if (!rq->q->mq_ops) - blk_end_request_all(rq, tio->error); - else - blk_mq_end_request(rq, tio->error); + blk_mq_end_request(rq, tio->error); rq_completed(md, rw, false); return; } @@ -363,17 +275,14 @@ static void dm_complete_request(struct request *rq, blk_status_t error) struct dm_rq_target_io *tio = tio_from_request(rq); tio->error = error; - if (!rq->q->mq_ops) - blk_complete_request(rq); - else - blk_mq_complete_request(rq); + blk_mq_complete_request(rq); } /* * Complete the not-mapped clone and the original request with the error status * through softirq context. * Target's rq_end_io() function isn't called. - * This may be used when the target's map_rq() or clone_and_map_rq() functions fail. + * This may be used when the target's clone_and_map_rq() function fails. */ static void dm_kill_unmapped_request(struct request *rq, blk_status_t error) { @@ -381,21 +290,10 @@ static void dm_kill_unmapped_request(struct request *rq, blk_status_t error) dm_complete_request(rq, error); } -/* - * Called with the clone's queue lock held (in the case of .request_fn) - */ static void end_clone_request(struct request *clone, blk_status_t error) { struct dm_rq_target_io *tio = clone->end_io_data; - /* - * Actual request completion is done in a softirq context which doesn't - * hold the clone's queue lock. Otherwise, deadlock could occur because: - * - another request may be submitted by the upper level driver - * of the stacking during the completion - * - the submission which requires queue lock may be done - * against this clone's queue - */ dm_complete_request(tio->orig, error); } @@ -446,8 +344,6 @@ static int setup_clone(struct request *clone, struct request *rq, return 0; } -static void map_tio_request(struct kthread_work *work); - static void init_tio(struct dm_rq_target_io *tio, struct request *rq, struct mapped_device *md) { @@ -464,8 +360,6 @@ static void init_tio(struct dm_rq_target_io *tio, struct request *rq, */ if (!md->init_tio_pdu) memset(&tio->info, 0, sizeof(tio->info)); - if (md->kworker_task) - kthread_init_work(&tio->work, map_tio_request); } /* @@ -504,10 +398,7 @@ check_again: blk_rq_unprep_clone(clone); tio->ti->type->release_clone_rq(clone); tio->clone = NULL; - if (!rq->q->mq_ops) - r = DM_MAPIO_DELAY_REQUEUE; - else - r = DM_MAPIO_REQUEUE; + r = DM_MAPIO_REQUEUE; goto check_again; } break; @@ -530,20 +421,23 @@ check_again: return r; } +/* DEPRECATED: previously used for request-based merge heuristic in dm_request_fn() */ +ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf) +{ + return sprintf(buf, "%u\n", 0); +} + +ssize_t dm_attr_rq_based_seq_io_merge_deadline_store(struct mapped_device *md, + const char *buf, size_t count) +{ + return count; +} + static void dm_start_request(struct mapped_device *md, struct request *orig) { - if (!orig->q->mq_ops) - blk_start_request(orig); - else - blk_mq_start_request(orig); + blk_mq_start_request(orig); atomic_inc(&md->pending[rq_data_dir(orig)]); - if (md->seq_rq_merge_deadline_usecs) { - md->last_rq_pos = rq_end_sector(orig); - md->last_rq_rw = rq_data_dir(orig); - md->last_rq_start_time = ktime_get(); - } - if (unlikely(dm_stats_used(&md->stats))) { struct dm_rq_target_io *tio = tio_from_request(orig); tio->duration_jiffies = jiffies; @@ -563,8 +457,10 @@ static void dm_start_request(struct mapped_device *md, struct request *orig) dm_get(md); } -static int __dm_rq_init_rq(struct mapped_device *md, struct request *rq) +static int dm_mq_init_request(struct blk_mq_tag_set *set, struct request *rq, + unsigned int hctx_idx, unsigned int numa_node) { + struct mapped_device *md = set->driver_data; struct dm_rq_target_io *tio = blk_mq_rq_to_pdu(rq); /* @@ -581,163 +477,6 @@ static int __dm_rq_init_rq(struct mapped_device *md, struct request *rq) return 0; } -static int dm_rq_init_rq(struct request_queue *q, struct request *rq, gfp_t gfp) -{ - return __dm_rq_init_rq(q->rq_alloc_data, rq); -} - -static void map_tio_request(struct kthread_work *work) -{ - struct dm_rq_target_io *tio = container_of(work, struct dm_rq_target_io, work); - - if (map_request(tio) == DM_MAPIO_REQUEUE) - dm_requeue_original_request(tio, false); -} - -ssize_t dm_attr_rq_based_seq_io_merge_deadline_show(struct mapped_device *md, char *buf) -{ - return sprintf(buf, "%u\n", md->seq_rq_merge_deadline_usecs); -} - -#define MAX_SEQ_RQ_MERGE_DEADLINE_USECS 100000 - -ssize_t dm_attr_rq_based_seq_io_merge_deadline_store(struct mapped_device *md, - const char *buf, size_t count) -{ - unsigned deadline; - - if (dm_get_md_type(md) != DM_TYPE_REQUEST_BASED) - return count; - - if (kstrtouint(buf, 10, &deadline)) - return -EINVAL; - - if (deadline > MAX_SEQ_RQ_MERGE_DEADLINE_USECS) - deadline = MAX_SEQ_RQ_MERGE_DEADLINE_USECS; - - md->seq_rq_merge_deadline_usecs = deadline; - - return count; -} - -static bool dm_old_request_peeked_before_merge_deadline(struct mapped_device *md) -{ - ktime_t kt_deadline; - - if (!md->seq_rq_merge_deadline_usecs) - return false; - - kt_deadline = ns_to_ktime((u64)md->seq_rq_merge_deadline_usecs * NSEC_PER_USEC); - kt_deadline = ktime_add_safe(md->last_rq_start_time, kt_deadline); - - return !ktime_after(ktime_get(), kt_deadline); -} - -/* - * q->request_fn for old request-based dm. - * Called with the queue lock held. - */ -static void dm_old_request_fn(struct request_queue *q) -{ - struct mapped_device *md = q->queuedata; - struct dm_target *ti = md->immutable_target; - struct request *rq; - struct dm_rq_target_io *tio; - sector_t pos = 0; - - if (unlikely(!ti)) { - int srcu_idx; - struct dm_table *map = dm_get_live_table(md, &srcu_idx); - - if (unlikely(!map)) { - dm_put_live_table(md, srcu_idx); - return; - } - ti = dm_table_find_target(map, pos); - dm_put_live_table(md, srcu_idx); - } - - /* - * For suspend, check blk_queue_stopped() and increment - * ->pending within a single queue_lock not to increment the - * number of in-flight I/Os after the queue is stopped in - * dm_suspend(). - */ - while (!blk_queue_stopped(q)) { - rq = blk_peek_request(q); - if (!rq) - return; - - /* always use block 0 to find the target for flushes for now */ - pos = 0; - if (req_op(rq) != REQ_OP_FLUSH) - pos = blk_rq_pos(rq); - - if ((dm_old_request_peeked_before_merge_deadline(md) && - md_in_flight(md) && rq->bio && !bio_multiple_segments(rq->bio) && - md->last_rq_pos == pos && md->last_rq_rw == rq_data_dir(rq)) || - (ti->type->busy && ti->type->busy(ti))) { - blk_delay_queue(q, 10); - return; - } - - dm_start_request(md, rq); - - tio = tio_from_request(rq); - init_tio(tio, rq, md); - /* Establish tio->ti before queuing work (map_tio_request) */ - tio->ti = ti; - kthread_queue_work(&md->kworker, &tio->work); - BUG_ON(!irqs_disabled()); - } -} - -/* - * Fully initialize a .request_fn request-based queue. - */ -int dm_old_init_request_queue(struct mapped_device *md, struct dm_table *t) -{ - struct dm_target *immutable_tgt; - - /* Fully initialize the queue */ - md->queue->cmd_size = sizeof(struct dm_rq_target_io); - md->queue->rq_alloc_data = md; - md->queue->request_fn = dm_old_request_fn; - md->queue->init_rq_fn = dm_rq_init_rq; - - immutable_tgt = dm_table_get_immutable_target(t); - if (immutable_tgt && immutable_tgt->per_io_data_size) { - /* any target-specific per-io data is immediately after the tio */ - md->queue->cmd_size += immutable_tgt->per_io_data_size; - md->init_tio_pdu = true; - } - if (blk_init_allocated_queue(md->queue) < 0) - return -EINVAL; - - /* disable dm_old_request_fn's merge heuristic by default */ - md->seq_rq_merge_deadline_usecs = 0; - - blk_queue_softirq_done(md->queue, dm_softirq_done); - - /* Initialize the request-based DM worker thread */ - kthread_init_worker(&md->kworker); - md->kworker_task = kthread_run(kthread_worker_fn, &md->kworker, - "kdmwork-%s", dm_device_name(md)); - if (IS_ERR(md->kworker_task)) { - int error = PTR_ERR(md->kworker_task); - md->kworker_task = NULL; - return error; - } - - return 0; -} - -static int dm_mq_init_request(struct blk_mq_tag_set *set, struct request *rq, - unsigned int hctx_idx, unsigned int numa_node) -{ - return __dm_rq_init_rq(set->driver_data, rq); -} - static blk_status_t dm_mq_queue_rq(struct blk_mq_hw_ctx *hctx, const struct blk_mq_queue_data *bd) { @@ -790,11 +529,6 @@ int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t) struct dm_target *immutable_tgt; int err; - if (!dm_table_all_blk_mq_devices(t)) { - DMERR("request-based dm-mq may only be stacked on blk-mq device(s)"); - return -EINVAL; - } - md->tag_set = kzalloc_node(sizeof(struct blk_mq_tag_set), GFP_KERNEL, md->numa_node_id); if (!md->tag_set) return -ENOMEM; @@ -845,6 +579,8 @@ void dm_mq_cleanup_mapped_device(struct mapped_device *md) module_param(reserved_rq_based_ios, uint, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(reserved_rq_based_ios, "Reserved IOs in request-based mempools"); +/* Unused, but preserved for userspace compatibility */ +static bool use_blk_mq = true; module_param(use_blk_mq, bool, S_IRUGO | S_IWUSR); MODULE_PARM_DESC(use_blk_mq, "Use block multiqueue for request-based DM devices"); diff --git a/drivers/md/dm-rq.h b/drivers/md/dm-rq.h index f43c45460aac..b39245545229 100644 --- a/drivers/md/dm-rq.h +++ b/drivers/md/dm-rq.h @@ -46,10 +46,6 @@ struct dm_rq_clone_bio_info { struct bio clone; }; -bool dm_use_blk_mq_default(void); -bool dm_use_blk_mq(struct mapped_device *md); - -int dm_old_init_request_queue(struct mapped_device *md, struct dm_table *t); int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t); void dm_mq_cleanup_mapped_device(struct mapped_device *md); diff --git a/drivers/md/dm-sysfs.c b/drivers/md/dm-sysfs.c index c209b8a19b84..a05fcd50e1b9 100644 --- a/drivers/md/dm-sysfs.c +++ b/drivers/md/dm-sysfs.c @@ -92,7 +92,8 @@ static ssize_t dm_attr_suspended_show(struct mapped_device *md, char *buf) static ssize_t dm_attr_use_blk_mq_show(struct mapped_device *md, char *buf) { - sprintf(buf, "%d\n", dm_use_blk_mq(md)); + /* Purely for userspace compatibility */ + sprintf(buf, "%d\n", true); return strlen(buf); } diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 3d0e2c198f06..96e152c339a6 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -47,7 +47,6 @@ struct dm_table { bool integrity_supported:1; bool singleton:1; - bool all_blk_mq:1; unsigned integrity_added:1; /* @@ -910,21 +909,10 @@ static bool dm_table_supports_dax(struct dm_table *t) static bool dm_table_does_not_support_partial_completion(struct dm_table *t); -struct verify_rq_based_data { - unsigned sq_count; - unsigned mq_count; -}; - static int device_is_rq_based(struct dm_target *ti, struct dm_dev *dev, sector_t start, sector_t len, void *data) { struct request_queue *q = bdev_get_queue(dev->bdev); - struct verify_rq_based_data *v = data; - - if (q->mq_ops) - v->mq_count++; - else - v->sq_count++; return queue_is_rq_based(q); } @@ -933,7 +921,6 @@ static int dm_table_determine_type(struct dm_table *t) { unsigned i; unsigned bio_based = 0, request_based = 0, hybrid = 0; - struct verify_rq_based_data v = {.sq_count = 0, .mq_count = 0}; struct dm_target *tgt; struct list_head *devices = dm_table_get_devices(t); enum dm_queue_mode live_md_type = dm_get_md_type(t->md); @@ -1022,11 +1009,9 @@ verify_rq_based: int srcu_idx; struct dm_table *live_table = dm_get_live_table(t->md, &srcu_idx); - /* inherit live table's type and all_blk_mq */ - if (live_table) { + /* inherit live table's type */ + if (live_table) t->type = live_table->type; - t->all_blk_mq = live_table->all_blk_mq; - } dm_put_live_table(t->md, srcu_idx); return 0; } @@ -1042,21 +1027,10 @@ verify_rq_based: /* Non-request-stackable devices can't be used for request-based dm */ if (!tgt->type->iterate_devices || - !tgt->type->iterate_devices(tgt, device_is_rq_based, &v)) { + !tgt->type->iterate_devices(tgt, device_is_rq_based, NULL)) { DMERR("table load rejected: including non-request-stackable devices"); return -EINVAL; } - if (v.sq_count && v.mq_count) { - DMERR("table load rejected: not all devices are blk-mq request-stackable"); - return -EINVAL; - } - t->all_blk_mq = v.mq_count > 0; - - if (!t->all_blk_mq && - (t->type == DM_TYPE_MQ_REQUEST_BASED || t->type == DM_TYPE_NVME_BIO_BASED)) { - DMERR("table load rejected: all devices are not blk-mq request-stackable"); - return -EINVAL; - } return 0; } @@ -1105,11 +1079,6 @@ bool dm_table_request_based(struct dm_table *t) return __table_type_request_based(dm_table_get_type(t)); } -bool dm_table_all_blk_mq_devices(struct dm_table *t) -{ - return t->all_blk_mq; -} - static int dm_table_alloc_md_mempools(struct dm_table *t, struct mapped_device *md) { enum dm_queue_mode type = dm_table_get_type(t); @@ -2083,22 +2052,14 @@ void dm_table_run_md_queue_async(struct dm_table *t) { struct mapped_device *md; struct request_queue *queue; - unsigned long flags; if (!dm_table_request_based(t)) return; md = dm_table_get_md(t); queue = dm_get_md_queue(md); - if (queue) { - if (queue->mq_ops) - blk_mq_run_hw_queues(queue, true); - else { - spin_lock_irqsave(queue->queue_lock, flags); - blk_run_queue_async(queue); - spin_unlock_irqrestore(queue->queue_lock, flags); - } - } + if (queue) + blk_mq_run_hw_queues(queue, true); } EXPORT_SYMBOL(dm_table_run_md_queue_async); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 45abb54037fc..0ce00c6f5f9a 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1808,8 +1808,6 @@ static void dm_wq_work(struct work_struct *work); static void dm_init_normal_md_queue(struct mapped_device *md) { - md->use_blk_mq = false; - /* * Initialize aspects of queue that aren't relevant for blk-mq */ @@ -1820,8 +1818,6 @@ static void cleanup_mapped_device(struct mapped_device *md) { if (md->wq) destroy_workqueue(md->wq); - if (md->kworker_task) - kthread_stop(md->kworker_task); bioset_exit(&md->bs); bioset_exit(&md->io_bs); @@ -1888,7 +1884,6 @@ static struct mapped_device *alloc_dev(int minor) goto bad_io_barrier; md->numa_node_id = numa_node_id; - md->use_blk_mq = dm_use_blk_mq_default(); md->init_tio_pdu = false; md->type = DM_TYPE_NONE; mutex_init(&md->suspend_lock); @@ -1919,7 +1914,6 @@ static struct mapped_device *alloc_dev(int minor) INIT_WORK(&md->work, dm_wq_work); init_waitqueue_head(&md->eventq); init_completion(&md->kobj_holder.completion); - md->kworker_task = NULL; md->disk->major = _major; md->disk->first_minor = minor; @@ -2219,13 +2213,6 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t) switch (type) { case DM_TYPE_REQUEST_BASED: - dm_init_normal_md_queue(md); - r = dm_old_init_request_queue(md, t); - if (r) { - DMERR("Cannot initialize queue for request-based mapped device"); - return r; - } - break; case DM_TYPE_MQ_REQUEST_BASED: r = dm_mq_init_request_queue(md, t); if (r) { @@ -2331,9 +2318,6 @@ static void __dm_destroy(struct mapped_device *md, bool wait) blk_set_queue_dying(md->queue); - if (dm_request_based(md) && md->kworker_task) - kthread_flush_worker(&md->kworker); - /* * Take suspend_lock so that presuspend and postsuspend methods * do not race with internal suspend. @@ -2586,11 +2570,8 @@ static int __dm_suspend(struct mapped_device *md, struct dm_table *map, * Stop md->queue before flushing md->wq in case request-based * dm defers requests to md->wq from md->queue. */ - if (dm_request_based(md)) { + if (dm_request_based(md)) dm_stop_queue(md->queue); - if (md->kworker_task) - kthread_flush_worker(&md->kworker); - } flush_workqueue(md->wq); diff --git a/drivers/md/dm.h b/drivers/md/dm.h index 114a81b27c37..2d539b82ec08 100644 --- a/drivers/md/dm.h +++ b/drivers/md/dm.h @@ -70,7 +70,6 @@ struct dm_target *dm_table_get_immutable_target(struct dm_table *t); struct dm_target *dm_table_get_wildcard_target(struct dm_table *t); bool dm_table_bio_based(struct dm_table *t); bool dm_table_request_based(struct dm_table *t); -bool dm_table_all_blk_mq_devices(struct dm_table *t); void dm_table_free_md_mempools(struct dm_table *t); struct dm_md_mempools *dm_table_get_md_mempools(struct dm_table *t); -- cgit v1.2.3 From 953923c09fe83255ae11845db1c9eb576ba73df8 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 11 Oct 2018 11:06:29 -0400 Subject: dm: rename DM_TYPE_MQ_REQUEST_BASED to DM_TYPE_REQUEST_BASED Now that request-based DM is only using blk-mq, there is no need to differentiate between legacy "rq" and new "mq". We're back to a single request-based DM -- and there was much rejoicing! Signed-off-by: Mike Snitzer --- drivers/md/dm-mpath.c | 14 +++++--------- drivers/md/dm-table.c | 7 +------ drivers/md/dm.c | 2 -- include/linux/device-mapper.h | 5 ++--- 4 files changed, 8 insertions(+), 20 deletions(-) (limited to 'drivers/md/dm.c') diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index a24ed3973e7c..d6a66921daf4 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -203,7 +203,7 @@ static struct multipath *alloc_multipath(struct dm_target *ti) static int alloc_multipath_stage2(struct dm_target *ti, struct multipath *m) { if (m->queue_mode == DM_TYPE_NONE) { - m->queue_mode = DM_TYPE_MQ_REQUEST_BASED; + m->queue_mode = DM_TYPE_REQUEST_BASED; } else if (m->queue_mode == DM_TYPE_BIO_BASED) { INIT_WORK(&m->process_queued_bios, process_queued_bios); /* @@ -658,7 +658,7 @@ static int multipath_map_bio(struct dm_target *ti, struct bio *bio) static void process_queued_io_list(struct multipath *m) { - if (m->queue_mode == DM_TYPE_MQ_REQUEST_BASED) + if (m->queue_mode == DM_TYPE_REQUEST_BASED) dm_mq_kick_requeue_list(dm_table_get_md(m->ti->table)); else if (m->queue_mode == DM_TYPE_BIO_BASED) queue_work(kmultipathd, &m->process_queued_bios); @@ -1079,10 +1079,9 @@ static int parse_features(struct dm_arg_set *as, struct multipath *m) if (!strcasecmp(queue_mode_name, "bio")) m->queue_mode = DM_TYPE_BIO_BASED; - else if (!strcasecmp(queue_mode_name, "rq")) + else if (!strcasecmp(queue_mode_name, "rq") || + !strcasecmp(queue_mode_name, "mq")) m->queue_mode = DM_TYPE_REQUEST_BASED; - else if (!strcasecmp(queue_mode_name, "mq")) - m->queue_mode = DM_TYPE_MQ_REQUEST_BASED; else { ti->error = "Unknown 'queue_mode' requested"; r = -EINVAL; @@ -1716,9 +1715,6 @@ static void multipath_status(struct dm_target *ti, status_type_t type, case DM_TYPE_BIO_BASED: DMEMIT("queue_mode bio "); break; - case DM_TYPE_MQ_REQUEST_BASED: - DMEMIT("queue_mode mq "); - break; default: WARN_ON_ONCE(true); break; @@ -1962,7 +1958,7 @@ static int multipath_busy(struct dm_target *ti) /* no paths available, for blk-mq: rely on IO mapping to delay requeue */ if (!atomic_read(&m->nr_valid_paths) && test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) - return (m->queue_mode != DM_TYPE_MQ_REQUEST_BASED); + return (m->queue_mode != DM_TYPE_REQUEST_BASED); /* Guess which priority_group will be used at next mapping time */ pg = READ_ONCE(m->current_pg); diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 96e152c339a6..eeea32bb6a3e 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -871,8 +871,7 @@ static bool __table_type_bio_based(enum dm_queue_mode table_type) static bool __table_type_request_based(enum dm_queue_mode table_type) { - return (table_type == DM_TYPE_REQUEST_BASED || - table_type == DM_TYPE_MQ_REQUEST_BASED); + return table_type == DM_TYPE_REQUEST_BASED; } void dm_table_set_type(struct dm_table *t, enum dm_queue_mode type) @@ -986,10 +985,6 @@ verify_bio_based: BUG_ON(!request_based); /* No targets in this table */ - /* - * The only way to establish DM_TYPE_MQ_REQUEST_BASED is by - * having a compatible target use dm_table_set_type. - */ t->type = DM_TYPE_REQUEST_BASED; verify_rq_based: diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 0ce00c6f5f9a..bf36e2635ea7 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -2213,7 +2213,6 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t) switch (type) { case DM_TYPE_REQUEST_BASED: - case DM_TYPE_MQ_REQUEST_BASED: r = dm_mq_init_request_queue(md, t); if (r) { DMERR("Cannot initialize queue for request-based dm-mq mapped device"); @@ -2946,7 +2945,6 @@ struct dm_md_mempools *dm_alloc_md_mempools(struct mapped_device *md, enum dm_qu goto out; break; case DM_TYPE_REQUEST_BASED: - case DM_TYPE_MQ_REQUEST_BASED: pool_size = max(dm_get_reserved_rq_based_ios(), min_pool_size); front_pad = offsetof(struct dm_rq_clone_bio_info, clone); /* per_io_data_size is used for blk-mq pdu at queue allocation */ diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 6fb0808e87c8..8d937754aa0c 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -26,9 +26,8 @@ enum dm_queue_mode { DM_TYPE_NONE = 0, DM_TYPE_BIO_BASED = 1, DM_TYPE_REQUEST_BASED = 2, - DM_TYPE_MQ_REQUEST_BASED = 3, - DM_TYPE_DAX_BIO_BASED = 4, - DM_TYPE_NVME_BIO_BASED = 5, + DM_TYPE_DAX_BIO_BASED = 3, + DM_TYPE_NVME_BIO_BASED = 4, }; typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t; -- cgit v1.2.3 From bab5d988841e58fec6ae22f486905ddde2d715f4 Mon Sep 17 00:00:00 2001 From: Igor Stoppa Date: Fri, 7 Sep 2018 20:03:37 +0300 Subject: dm: remove unnecessary unlikely() around WARN_ON_ONCE() WARN_ON() already contains an unlikely(), so it's not necessary to wrap it into another. Signed-off-by: Igor Stoppa Signed-off-by: Mike Snitzer --- drivers/md/dm-cache-policy-smq.c | 2 +- drivers/md/dm.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/md/dm.c') diff --git a/drivers/md/dm-cache-policy-smq.c b/drivers/md/dm-cache-policy-smq.c index 1b5b9ad9e492..b61aac00ff40 100644 --- a/drivers/md/dm-cache-policy-smq.c +++ b/drivers/md/dm-cache-policy-smq.c @@ -1200,7 +1200,7 @@ static void queue_demotion(struct smq_policy *mq) struct policy_work work; struct entry *e; - if (unlikely(WARN_ON_ONCE(!mq->migrations_allowed))) + if (WARN_ON_ONCE(!mq->migrations_allowed)) return; e = q_peek(&mq->clean, mq->clean.nr_levels / 2, true); diff --git a/drivers/md/dm.c b/drivers/md/dm.c index bf36e2635ea7..1fbc28ab157c 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1666,7 +1666,7 @@ static blk_qc_t __process_bio(struct mapped_device *md, * Defend against IO still getting in during teardown * - as was seen for a time with nvme-fcloop */ - if (unlikely(WARN_ON_ONCE(!ti || !dm_target_is_valid(ti)))) { + if (WARN_ON_ONCE(!ti || !dm_target_is_valid(ti))) { error = -EIO; goto out; } -- cgit v1.2.3 From e76239a3748c90a8b0e197f8f4544a8ce52f126e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 12 Oct 2018 19:08:49 +0900 Subject: block: add a report_zones method Dispatching a report zones command through the request queue is a major pain due to the command reply payload rewriting necessary. Given that blkdev_report_zones() is executing everything synchronously, implement report zones as a block device file operation instead, allowing major simplification of the code in many places. sd, null-blk, dm-linear and dm-flakey being the only block device drivers supporting exposing zoned block devices, these drivers are modified to provide the device side implementation of the report_zones() block device file operation. For device mappers, a new report_zones() target type operation is defined so that the upper block layer calls blkdev_report_zones() can be propagated down to the underlying devices of the dm targets. Implementation for this new operation is added to the dm-linear and dm-flakey targets. Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig [Damien] * Changed method block_device argument to gendisk * Various bug fixes and improvements * Added support for null_blk, dm-linear and dm-flakey. Reviewed-by: Martin K. Petersen Reviewed-by: Mike Snitzer Signed-off-by: Damien Le Moal Signed-off-by: Jens Axboe --- block/blk-core.c | 1 - block/blk-mq-debugfs.c | 1 - block/blk-zoned.c | 164 +++++++++++++-------------------------- drivers/block/null_blk.h | 11 ++- drivers/block/null_blk_main.c | 23 +----- drivers/block/null_blk_zoned.c | 57 ++++---------- drivers/md/dm-flakey.c | 30 +++++--- drivers/md/dm-linear.c | 35 +++++---- drivers/md/dm.c | 169 ++++++++++++++++++++--------------------- drivers/scsi/sd.c | 13 +--- drivers/scsi/sd.h | 11 ++- drivers/scsi/sd_zbc.c | 153 ++++++++++++------------------------- include/linux/blk_types.h | 2 - include/linux/blkdev.h | 8 +- include/linux/device-mapper.h | 12 ++- include/trace/events/f2fs.h | 1 - 16 files changed, 266 insertions(+), 425 deletions(-) (limited to 'drivers/md/dm.c') diff --git a/block/blk-core.c b/block/blk-core.c index 3ed60723e242..bc6ea87d10e0 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2300,7 +2300,6 @@ generic_make_request_checks(struct bio *bio) if (!q->limits.max_write_same_sectors) goto not_supported; break; - case REQ_OP_ZONE_REPORT: case REQ_OP_ZONE_RESET: if (!blk_queue_is_zoned(q)) goto not_supported; diff --git a/block/blk-mq-debugfs.c b/block/blk-mq-debugfs.c index 41b86f50d126..10b284a1f18d 100644 --- a/block/blk-mq-debugfs.c +++ b/block/blk-mq-debugfs.c @@ -283,7 +283,6 @@ static const char *const op_name[] = { REQ_OP_NAME(WRITE), REQ_OP_NAME(FLUSH), REQ_OP_NAME(DISCARD), - REQ_OP_NAME(ZONE_REPORT), REQ_OP_NAME(SECURE_ERASE), REQ_OP_NAME(ZONE_RESET), REQ_OP_NAME(WRITE_SAME), diff --git a/block/blk-zoned.c b/block/blk-zoned.c index 5d967fd39fbd..90cf503091d5 100644 --- a/block/blk-zoned.c +++ b/block/blk-zoned.c @@ -93,13 +93,10 @@ unsigned int blkdev_nr_zones(struct block_device *bdev) EXPORT_SYMBOL_GPL(blkdev_nr_zones); /* - * Check that a zone report belongs to the partition. - * If yes, fix its start sector and write pointer, copy it in the - * zone information array and return true. Return false otherwise. + * Check that a zone report belongs to this partition, and if yes, fix its start + * sector and write pointer and return true. Return false otherwise. */ -static bool blkdev_report_zone(struct block_device *bdev, - struct blk_zone *rep, - struct blk_zone *zone) +static bool blkdev_report_zone(struct block_device *bdev, struct blk_zone *rep) { sector_t offset = get_start_sect(bdev); @@ -114,11 +111,36 @@ static bool blkdev_report_zone(struct block_device *bdev, rep->wp = rep->start + rep->len; else rep->wp -= offset; - memcpy(zone, rep, sizeof(struct blk_zone)); - return true; } +static int blk_report_zones(struct gendisk *disk, sector_t sector, + struct blk_zone *zones, unsigned int *nr_zones, + gfp_t gfp_mask) +{ + struct request_queue *q = disk->queue; + unsigned int z = 0, n, nrz = *nr_zones; + sector_t capacity = get_capacity(disk); + int ret; + + while (z < nrz && sector < capacity) { + n = nrz - z; + ret = disk->fops->report_zones(disk, sector, &zones[z], &n, + gfp_mask); + if (ret) + return ret; + if (!n) + break; + sector += blk_queue_zone_sectors(q) * n; + z += n; + } + + WARN_ON(z > *nr_zones); + *nr_zones = z; + + return 0; +} + /** * blkdev_report_zones - Get zones information * @bdev: Target block device @@ -133,130 +155,46 @@ static bool blkdev_report_zone(struct block_device *bdev, * requested by @nr_zones. The number of zones actually reported is * returned in @nr_zones. */ -int blkdev_report_zones(struct block_device *bdev, - sector_t sector, - struct blk_zone *zones, - unsigned int *nr_zones, +int blkdev_report_zones(struct block_device *bdev, sector_t sector, + struct blk_zone *zones, unsigned int *nr_zones, gfp_t gfp_mask) { struct request_queue *q = bdev_get_queue(bdev); - struct blk_zone_report_hdr *hdr; - unsigned int nrz = *nr_zones; - struct page *page; - unsigned int nr_rep; - size_t rep_bytes; - unsigned int nr_pages; - struct bio *bio; - struct bio_vec *bv; - unsigned int i, n, nz; - unsigned int ofst; - void *addr; + unsigned int i, nrz; int ret; - if (!q) - return -ENXIO; - if (!blk_queue_is_zoned(q)) return -EOPNOTSUPP; - if (!nrz) - return 0; - - if (sector > bdev->bd_part->nr_sects) { - *nr_zones = 0; - return 0; - } - /* - * The zone report has a header. So make room for it in the - * payload. Also make sure that the report fits in a single BIO - * that will not be split down the stack. + * A block device that advertized itself as zoned must have a + * report_zones method. If it does not have one defined, the device + * driver has a bug. So warn about that. */ - rep_bytes = sizeof(struct blk_zone_report_hdr) + - sizeof(struct blk_zone) * nrz; - rep_bytes = (rep_bytes + PAGE_SIZE - 1) & PAGE_MASK; - if (rep_bytes > (queue_max_sectors(q) << 9)) - rep_bytes = queue_max_sectors(q) << 9; - - nr_pages = min_t(unsigned int, BIO_MAX_PAGES, - rep_bytes >> PAGE_SHIFT); - nr_pages = min_t(unsigned int, nr_pages, - queue_max_segments(q)); - - bio = bio_alloc(gfp_mask, nr_pages); - if (!bio) - return -ENOMEM; + if (WARN_ON_ONCE(!bdev->bd_disk->fops->report_zones)) + return -EOPNOTSUPP; - bio_set_dev(bio, bdev); - bio->bi_iter.bi_sector = blk_zone_start(q, sector); - bio_set_op_attrs(bio, REQ_OP_ZONE_REPORT, 0); - - for (i = 0; i < nr_pages; i++) { - page = alloc_page(gfp_mask); - if (!page) { - ret = -ENOMEM; - goto out; - } - if (!bio_add_page(bio, page, PAGE_SIZE, 0)) { - __free_page(page); - break; - } + if (!*nr_zones || sector >= bdev->bd_part->nr_sects) { + *nr_zones = 0; + return 0; } - if (i == 0) - ret = -ENOMEM; - else - ret = submit_bio_wait(bio); + nrz = min(*nr_zones, + __blkdev_nr_zones(q, bdev->bd_part->nr_sects - sector)); + ret = blk_report_zones(bdev->bd_disk, get_start_sect(bdev) + sector, + zones, &nrz, gfp_mask); if (ret) - goto out; - - /* - * Process the report result: skip the header and go through the - * reported zones to fixup and fixup the zone information for - * partitions. At the same time, return the zone information into - * the zone array. - */ - n = 0; - nz = 0; - nr_rep = 0; - bio_for_each_segment_all(bv, bio, i) { + return ret; - if (!bv->bv_page) + for (i = 0; i < nrz; i++) { + if (!blkdev_report_zone(bdev, zones)) break; - - addr = kmap_atomic(bv->bv_page); - - /* Get header in the first page */ - ofst = 0; - if (!nr_rep) { - hdr = addr; - nr_rep = hdr->nr_zones; - ofst = sizeof(struct blk_zone_report_hdr); - } - - /* Fixup and report zones */ - while (ofst < bv->bv_len && - n < nr_rep && nz < nrz) { - if (blkdev_report_zone(bdev, addr + ofst, &zones[nz])) - nz++; - ofst += sizeof(struct blk_zone); - n++; - } - - kunmap_atomic(addr); - - if (n >= nr_rep || nz >= nrz) - break; - + zones++; } - *nr_zones = nz; -out: - bio_for_each_segment_all(bv, bio, i) - __free_page(bv->bv_page); - bio_put(bio); + *nr_zones = i; - return ret; + return 0; } EXPORT_SYMBOL_GPL(blkdev_report_zones); diff --git a/drivers/block/null_blk.h b/drivers/block/null_blk.h index 34e0030f0592..7685df43f1ef 100644 --- a/drivers/block/null_blk.h +++ b/drivers/block/null_blk.h @@ -87,7 +87,9 @@ struct nullb { #ifdef CONFIG_BLK_DEV_ZONED int null_zone_init(struct nullb_device *dev); void null_zone_exit(struct nullb_device *dev); -blk_status_t null_zone_report(struct nullb *nullb, struct bio *bio); +int null_zone_report(struct gendisk *disk, sector_t sector, + struct blk_zone *zones, unsigned int *nr_zones, + gfp_t gfp_mask); void null_zone_write(struct nullb_cmd *cmd, sector_t sector, unsigned int nr_sectors); void null_zone_reset(struct nullb_cmd *cmd, sector_t sector); @@ -97,10 +99,11 @@ static inline int null_zone_init(struct nullb_device *dev) return -EINVAL; } static inline void null_zone_exit(struct nullb_device *dev) {} -static inline blk_status_t null_zone_report(struct nullb *nullb, - struct bio *bio) +static inline int null_zone_report(struct gendisk *disk, sector_t sector, + struct blk_zone *zones, + unsigned int *nr_zones, gfp_t gfp_mask) { - return BLK_STS_NOTSUPP; + return -EOPNOTSUPP; } static inline void null_zone_write(struct nullb_cmd *cmd, sector_t sector, unsigned int nr_sectors) diff --git a/drivers/block/null_blk_main.c b/drivers/block/null_blk_main.c index e94591021682..5ba426dbf377 100644 --- a/drivers/block/null_blk_main.c +++ b/drivers/block/null_blk_main.c @@ -1129,34 +1129,12 @@ static void null_restart_queue_async(struct nullb *nullb) blk_mq_start_stopped_hw_queues(q, true); } -static bool cmd_report_zone(struct nullb *nullb, struct nullb_cmd *cmd) -{ - struct nullb_device *dev = cmd->nq->dev; - - if (dev->queue_mode == NULL_Q_BIO) { - if (bio_op(cmd->bio) == REQ_OP_ZONE_REPORT) { - cmd->error = null_zone_report(nullb, cmd->bio); - return true; - } - } else { - if (req_op(cmd->rq) == REQ_OP_ZONE_REPORT) { - cmd->error = null_zone_report(nullb, cmd->rq->bio); - return true; - } - } - - return false; -} - static blk_status_t null_handle_cmd(struct nullb_cmd *cmd) { struct nullb_device *dev = cmd->nq->dev; struct nullb *nullb = dev->nullb; int err = 0; - if (cmd_report_zone(nullb, cmd)) - goto out; - if (test_bit(NULLB_DEV_FL_THROTTLED, &dev->flags)) { struct request *rq = cmd->rq; @@ -1443,6 +1421,7 @@ static const struct block_device_operations null_fops = { .owner = THIS_MODULE, .open = null_open, .release = null_release, + .report_zones = null_zone_report, }; static void null_init_queue(struct nullb *nullb, struct nullb_queue *nq) diff --git a/drivers/block/null_blk_zoned.c b/drivers/block/null_blk_zoned.c index 7c6b86d98700..c0b0e4a3fa8f 100644 --- a/drivers/block/null_blk_zoned.c +++ b/drivers/block/null_blk_zoned.c @@ -48,54 +48,27 @@ void null_zone_exit(struct nullb_device *dev) kvfree(dev->zones); } -static void null_zone_fill_bio(struct nullb_device *dev, struct bio *bio, - unsigned int zno, unsigned int nr_zones) +int null_zone_report(struct gendisk *disk, sector_t sector, + struct blk_zone *zones, unsigned int *nr_zones, + gfp_t gfp_mask) { - struct blk_zone_report_hdr *hdr = NULL; - struct bio_vec bvec; - struct bvec_iter iter; - void *addr; - unsigned int zones_to_cpy; - - bio_for_each_segment(bvec, bio, iter) { - addr = kmap_atomic(bvec.bv_page); - - zones_to_cpy = bvec.bv_len / sizeof(struct blk_zone); - - if (!hdr) { - hdr = (struct blk_zone_report_hdr *)addr; - hdr->nr_zones = nr_zones; - zones_to_cpy--; - addr += sizeof(struct blk_zone_report_hdr); - } - - zones_to_cpy = min_t(unsigned int, zones_to_cpy, nr_zones); - - memcpy(addr, &dev->zones[zno], - zones_to_cpy * sizeof(struct blk_zone)); - - kunmap_atomic(addr); + struct nullb *nullb = disk->private_data; + struct nullb_device *dev = nullb->dev; + unsigned int zno, nrz = 0; - nr_zones -= zones_to_cpy; - zno += zones_to_cpy; + if (!dev->zoned) + /* Not a zoned null device */ + return -EOPNOTSUPP; - if (!nr_zones) - break; + zno = null_zone_no(dev, sector); + if (zno < dev->nr_zones) { + nrz = min_t(unsigned int, *nr_zones, dev->nr_zones - zno); + memcpy(zones, &dev->zones[zno], nrz * sizeof(struct blk_zone)); } -} -blk_status_t null_zone_report(struct nullb *nullb, struct bio *bio) -{ - struct nullb_device *dev = nullb->dev; - unsigned int zno = null_zone_no(dev, bio->bi_iter.bi_sector); - unsigned int nr_zones = dev->nr_zones - zno; - unsigned int max_zones; + *nr_zones = nrz; - max_zones = (bio->bi_iter.bi_size / sizeof(struct blk_zone)) - 1; - nr_zones = min_t(unsigned int, nr_zones, max_zones); - null_zone_fill_bio(nullb->dev, bio, zno, nr_zones); - - return BLK_STS_OK; + return 0; } void null_zone_write(struct nullb_cmd *cmd, sector_t sector, diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c index 32aabe27b37c..3cb97fa4c11d 100644 --- a/drivers/md/dm-flakey.c +++ b/drivers/md/dm-flakey.c @@ -315,10 +315,6 @@ static int flakey_map(struct dm_target *ti, struct bio *bio) if (bio_op(bio) == REQ_OP_ZONE_RESET) goto map_bio; - /* We need to remap reported zones, so remember the BIO iter */ - if (bio_op(bio) == REQ_OP_ZONE_REPORT) - goto map_bio; - /* Are we alive ? */ elapsed = (jiffies - fc->start_time) / HZ; if (elapsed % (fc->up_interval + fc->down_interval) >= fc->up_interval) { @@ -380,11 +376,6 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio, if (bio_op(bio) == REQ_OP_ZONE_RESET) return DM_ENDIO_DONE; - if (bio_op(bio) == REQ_OP_ZONE_REPORT) { - dm_remap_zone_report(ti, bio, fc->start); - return DM_ENDIO_DONE; - } - if (!*error && pb->bio_submitted && (bio_data_dir(bio) == READ)) { if (fc->corrupt_bio_byte && (fc->corrupt_bio_rw == READ) && all_corrupt_bio_flags_match(bio, fc)) { @@ -457,6 +448,26 @@ static int flakey_prepare_ioctl(struct dm_target *ti, struct block_device **bdev return 0; } +#ifdef CONFIG_BLK_DEV_ZONED +static int flakey_report_zones(struct dm_target *ti, sector_t sector, + struct blk_zone *zones, unsigned int *nr_zones, + gfp_t gfp_mask) +{ + struct flakey_c *fc = ti->private; + int ret; + + /* Do report and remap it */ + ret = blkdev_report_zones(fc->dev->bdev, flakey_map_sector(ti, sector), + zones, nr_zones, gfp_mask); + if (ret != 0) + return ret; + + if (*nr_zones) + dm_remap_zone_report(ti, fc->start, zones, nr_zones); + return 0; +} +#endif + static int flakey_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data) { struct flakey_c *fc = ti->private; @@ -469,6 +480,7 @@ static struct target_type flakey_target = { .version = {1, 5, 0}, #ifdef CONFIG_BLK_DEV_ZONED .features = DM_TARGET_ZONED_HM, + .report_zones = flakey_report_zones, #endif .module = THIS_MODULE, .ctr = flakey_ctr, diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c index 2f7c44a006c4..8d7ddee6ac4d 100644 --- a/drivers/md/dm-linear.c +++ b/drivers/md/dm-linear.c @@ -102,19 +102,6 @@ static int linear_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_REMAPPED; } -#ifdef CONFIG_BLK_DEV_ZONED -static int linear_end_io(struct dm_target *ti, struct bio *bio, - blk_status_t *error) -{ - struct linear_c *lc = ti->private; - - if (!*error && bio_op(bio) == REQ_OP_ZONE_REPORT) - dm_remap_zone_report(ti, bio, lc->start); - - return DM_ENDIO_DONE; -} -#endif - static void linear_status(struct dm_target *ti, status_type_t type, unsigned status_flags, char *result, unsigned maxlen) { @@ -148,6 +135,26 @@ static int linear_prepare_ioctl(struct dm_target *ti, struct block_device **bdev return 0; } +#ifdef CONFIG_BLK_DEV_ZONED +static int linear_report_zones(struct dm_target *ti, sector_t sector, + struct blk_zone *zones, unsigned int *nr_zones, + gfp_t gfp_mask) +{ + struct linear_c *lc = (struct linear_c *) ti->private; + int ret; + + /* Do report and remap it */ + ret = blkdev_report_zones(lc->dev->bdev, linear_map_sector(ti, sector), + zones, nr_zones, gfp_mask); + if (ret != 0) + return ret; + + if (*nr_zones) + dm_remap_zone_report(ti, lc->start, zones, nr_zones); + return 0; +} +#endif + static int linear_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data) { @@ -211,8 +218,8 @@ static struct target_type linear_target = { .name = "linear", .version = {1, 4, 0}, #ifdef CONFIG_BLK_DEV_ZONED - .end_io = linear_end_io, .features = DM_TARGET_PASSES_INTEGRITY | DM_TARGET_ZONED_HM, + .report_zones = linear_report_zones, #else .features = DM_TARGET_PASSES_INTEGRITY, #endif diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 45abb54037fc..6be21dc210a1 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -458,6 +458,57 @@ static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo) return dm_get_geometry(md, geo); } +static int dm_blk_report_zones(struct gendisk *disk, sector_t sector, + struct blk_zone *zones, unsigned int *nr_zones, + gfp_t gfp_mask) +{ +#ifdef CONFIG_BLK_DEV_ZONED + struct mapped_device *md = disk->private_data; + struct dm_target *tgt; + struct dm_table *map; + int srcu_idx, ret; + + if (dm_suspended_md(md)) + return -EAGAIN; + + map = dm_get_live_table(md, &srcu_idx); + if (!map) + return -EIO; + + tgt = dm_table_find_target(map, sector); + if (!dm_target_is_valid(tgt)) { + ret = -EIO; + goto out; + } + + /* + * If we are executing this, we already know that the block device + * is a zoned device and so each target should have support for that + * type of drive. A missing report_zones method means that the target + * driver has a problem. + */ + if (WARN_ON(!tgt->type->report_zones)) { + ret = -EIO; + goto out; + } + + /* + * blkdev_report_zones() will loop and call this again to cover all the + * zones of the target, eventually moving on to the next target. + * So there is no need to loop here trying to fill the entire array + * of zones. + */ + ret = tgt->type->report_zones(tgt, sector, zones, + nr_zones, gfp_mask); + +out: + dm_put_live_table(md, srcu_idx); + return ret; +#else + return -ENOTSUPP; +#endif +} + static int dm_prepare_ioctl(struct mapped_device *md, int *srcu_idx, struct block_device **bdev) __acquires(md->io_barrier) @@ -1155,93 +1206,49 @@ void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors) EXPORT_SYMBOL_GPL(dm_accept_partial_bio); /* - * The zone descriptors obtained with a zone report indicate zone positions - * within the target backing device, regardless of that device is a partition - * and regardless of the target mapping start sector on the device or partition. - * The zone descriptors start sector and write pointer position must be adjusted - * to match their relative position within the dm device. - * A target may call dm_remap_zone_report() after completion of a - * REQ_OP_ZONE_REPORT bio to remap the zone descriptors obtained from the - * backing device. + * The zone descriptors obtained with a zone report indicate + * zone positions within the underlying device of the target. The zone + * descriptors must be remapped to match their position within the dm device. + * The caller target should obtain the zones information using + * blkdev_report_zones() to ensure that remapping for partition offset is + * already handled. */ -void dm_remap_zone_report(struct dm_target *ti, struct bio *bio, sector_t start) +void dm_remap_zone_report(struct dm_target *ti, sector_t start, + struct blk_zone *zones, unsigned int *nr_zones) { #ifdef CONFIG_BLK_DEV_ZONED - struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone); - struct bio *report_bio = tio->io->orig_bio; - struct blk_zone_report_hdr *hdr = NULL; struct blk_zone *zone; - unsigned int nr_rep = 0; - unsigned int ofst; - sector_t part_offset; - struct bio_vec bvec; - struct bvec_iter iter; - void *addr; - - if (bio->bi_status) - return; - - /* - * bio sector was incremented by the request size on completion. Taking - * into account the original request sector, the target start offset on - * the backing device and the target mapping offset (ti->begin), the - * start sector of the backing device. The partition offset is always 0 - * if the target uses a whole device. - */ - part_offset = bio->bi_iter.bi_sector + ti->begin - (start + bio_end_sector(report_bio)); + unsigned int nrz = *nr_zones; + int i; /* - * Remap the start sector of the reported zones. For sequential zones, - * also remap the write pointer position. + * Remap the start sector and write pointer position of the zones in + * the array. Since we may have obtained from the target underlying + * device more zones that the target size, also adjust the number + * of zones. */ - bio_for_each_segment(bvec, report_bio, iter) { - addr = kmap_atomic(bvec.bv_page); - - /* Remember the report header in the first page */ - if (!hdr) { - hdr = addr; - ofst = sizeof(struct blk_zone_report_hdr); - } else - ofst = 0; - - /* Set zones start sector */ - while (hdr->nr_zones && ofst < bvec.bv_len) { - zone = addr + ofst; - zone->start -= part_offset; - if (zone->start >= start + ti->len) { - hdr->nr_zones = 0; - break; - } - zone->start = zone->start + ti->begin - start; - if (zone->type != BLK_ZONE_TYPE_CONVENTIONAL) { - if (zone->cond == BLK_ZONE_COND_FULL) - zone->wp = zone->start + zone->len; - else if (zone->cond == BLK_ZONE_COND_EMPTY) - zone->wp = zone->start; - else - zone->wp = zone->wp + ti->begin - start - part_offset; - } - ofst += sizeof(struct blk_zone); - hdr->nr_zones--; - nr_rep++; + for (i = 0; i < nrz; i++) { + zone = zones + i; + if (zone->start >= start + ti->len) { + memset(zone, 0, sizeof(struct blk_zone) * (nrz - i)); + break; } - if (addr != hdr) - kunmap_atomic(addr); + zone->start = zone->start + ti->begin - start; + if (zone->type == BLK_ZONE_TYPE_CONVENTIONAL) + continue; - if (!hdr->nr_zones) - break; - } - - if (hdr) { - hdr->nr_zones = nr_rep; - kunmap_atomic(hdr); + if (zone->cond == BLK_ZONE_COND_FULL) + zone->wp = zone->start + zone->len; + else if (zone->cond == BLK_ZONE_COND_EMPTY) + zone->wp = zone->start; + else + zone->wp = zone->wp + ti->begin - start; } - bio_advance(report_bio, report_bio->bi_iter.bi_size); - + *nr_zones = i; #else /* !CONFIG_BLK_DEV_ZONED */ - bio->bi_status = BLK_STS_NOTSUPP; + *nr_zones = 0; #endif } EXPORT_SYMBOL_GPL(dm_remap_zone_report); @@ -1327,8 +1334,7 @@ static int clone_bio(struct dm_target_io *tio, struct bio *bio, return r; } - if (bio_op(bio) != REQ_OP_ZONE_REPORT) - bio_advance(clone, to_bytes(sector - clone->bi_iter.bi_sector)); + bio_advance(clone, to_bytes(sector - clone->bi_iter.bi_sector)); clone->bi_iter.bi_size = to_bytes(len); if (unlikely(bio_integrity(bio) != NULL)) @@ -1541,7 +1547,6 @@ static bool __process_abnormal_io(struct clone_info *ci, struct dm_target *ti, */ static int __split_and_process_non_flush(struct clone_info *ci) { - struct bio *bio = ci->bio; struct dm_target *ti; unsigned len; int r; @@ -1553,11 +1558,7 @@ static int __split_and_process_non_flush(struct clone_info *ci) if (unlikely(__process_abnormal_io(ci, ti, &r))) return r; - if (bio_op(bio) == REQ_OP_ZONE_REPORT) - len = ci->sector_count; - else - len = min_t(sector_t, max_io_len(ci->sector, ti), - ci->sector_count); + len = min_t(sector_t, max_io_len(ci->sector, ti), ci->sector_count); r = __clone_and_map_data_bio(ci, ti, ci->sector, &len); if (r < 0) @@ -1616,9 +1617,6 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md, * We take a clone of the original to store in * ci.io->orig_bio to be used by end_io_acct() and * for dec_pending to use for completion handling. - * As this path is not used for REQ_OP_ZONE_REPORT, - * the usage of io->orig_bio in dm_remap_zone_report() - * won't be affected by this reassignment. */ struct bio *b = bio_split(bio, bio_sectors(bio) - ci.sector_count, GFP_NOIO, &md->queue->bio_split); @@ -3167,6 +3165,7 @@ static const struct block_device_operations dm_blk_dops = { .release = dm_blk_close, .ioctl = dm_blk_ioctl, .getgeo = dm_blk_getgeo, + .report_zones = dm_blk_report_zones, .pr_ops = &dm_pr_ops, .owner = THIS_MODULE }; diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c index b762d0fd773c..42c0f299021d 100644 --- a/drivers/scsi/sd.c +++ b/drivers/scsi/sd.c @@ -1272,8 +1272,6 @@ static int sd_init_command(struct scsi_cmnd *cmd) case REQ_OP_READ: case REQ_OP_WRITE: return sd_setup_read_write_cmnd(cmd); - case REQ_OP_ZONE_REPORT: - return sd_zbc_setup_report_cmnd(cmd); case REQ_OP_ZONE_RESET: return sd_zbc_setup_reset_cmnd(cmd); default: @@ -1802,6 +1800,7 @@ static const struct block_device_operations sd_fops = { .check_events = sd_check_events, .revalidate_disk = sd_revalidate_disk, .unlock_native_capacity = sd_unlock_native_capacity, + .report_zones = sd_zbc_report_zones, .pr_ops = &sd_pr_ops, }; @@ -1953,16 +1952,6 @@ static int sd_done(struct scsi_cmnd *SCpnt) scsi_set_resid(SCpnt, blk_rq_bytes(req)); } break; - case REQ_OP_ZONE_REPORT: - if (!result) { - good_bytes = scsi_bufflen(SCpnt) - - scsi_get_resid(SCpnt); - scsi_set_resid(SCpnt, 0); - } else { - good_bytes = 0; - scsi_set_resid(SCpnt, blk_rq_bytes(req)); - } - break; default: /* * In case of bogus fw or device, we could end up having diff --git a/drivers/scsi/sd.h b/drivers/scsi/sd.h index a7d4f50b67d4..f72f20fd0d8b 100644 --- a/drivers/scsi/sd.h +++ b/drivers/scsi/sd.h @@ -273,10 +273,12 @@ static inline int sd_is_zoned(struct scsi_disk *sdkp) extern int sd_zbc_read_zones(struct scsi_disk *sdkp, unsigned char *buffer); extern void sd_zbc_remove(struct scsi_disk *sdkp); extern void sd_zbc_print_zones(struct scsi_disk *sdkp); -extern int sd_zbc_setup_report_cmnd(struct scsi_cmnd *cmd); extern int sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd); extern void sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes, struct scsi_sense_hdr *sshdr); +extern int sd_zbc_report_zones(struct gendisk *disk, sector_t sector, + struct blk_zone *zones, unsigned int *nr_zones, + gfp_t gfp_mask); #else /* CONFIG_BLK_DEV_ZONED */ @@ -290,11 +292,6 @@ static inline void sd_zbc_remove(struct scsi_disk *sdkp) {} static inline void sd_zbc_print_zones(struct scsi_disk *sdkp) {} -static inline int sd_zbc_setup_report_cmnd(struct scsi_cmnd *cmd) -{ - return BLKPREP_INVALID; -} - static inline int sd_zbc_setup_reset_cmnd(struct scsi_cmnd *cmd) { return BLKPREP_INVALID; @@ -304,6 +301,8 @@ static inline void sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes, struct scsi_sense_hdr *sshdr) {} +#define sd_zbc_report_zones NULL + #endif /* CONFIG_BLK_DEV_ZONED */ #endif /* _SCSI_DISK_H */ diff --git a/drivers/scsi/sd_zbc.c b/drivers/scsi/sd_zbc.c index 0678e1e108b0..0f2cfc81fce3 100644 --- a/drivers/scsi/sd_zbc.c +++ b/drivers/scsi/sd_zbc.c @@ -62,7 +62,7 @@ static void sd_zbc_parse_report(struct scsi_disk *sdkp, u8 *buf, } /** - * sd_zbc_report_zones - Issue a REPORT ZONES scsi command. + * sd_zbc_do_report_zones - Issue a REPORT ZONES scsi command. * @sdkp: The target disk * @buf: Buffer to use for the reply * @buflen: the buffer size @@ -75,9 +75,9 @@ static void sd_zbc_parse_report(struct scsi_disk *sdkp, u8 *buf, * zones and will only report the count of zones fitting in the command reply * buffer. */ -static int sd_zbc_report_zones(struct scsi_disk *sdkp, unsigned char *buf, - unsigned int buflen, sector_t lba, - bool partial) +static int sd_zbc_do_report_zones(struct scsi_disk *sdkp, unsigned char *buf, + unsigned int buflen, sector_t lba, + bool partial) { struct scsi_device *sdp = sdkp->device; const int timeout = sdp->request_queue->rq_timeout; @@ -118,108 +118,56 @@ static int sd_zbc_report_zones(struct scsi_disk *sdkp, unsigned char *buf, } /** - * sd_zbc_setup_report_cmnd - Prepare a REPORT ZONES scsi command - * @cmd: The command to setup + * sd_zbc_report_zones - Disk report zones operation. + * @disk: The target disk + * @sector: Start 512B sector of the report + * @zones: Array of zone descriptors + * @nr_zones: Number of descriptors in the array + * @gfp_mask: Memory allocation mask * - * Call in sd_init_command() for a REQ_OP_ZONE_REPORT request. + * Execute a report zones command on the target disk. */ -int sd_zbc_setup_report_cmnd(struct scsi_cmnd *cmd) +int sd_zbc_report_zones(struct gendisk *disk, sector_t sector, + struct blk_zone *zones, unsigned int *nr_zones, + gfp_t gfp_mask) { - struct request *rq = cmd->request; - struct scsi_disk *sdkp = scsi_disk(rq->rq_disk); - sector_t lba, sector = blk_rq_pos(rq); - unsigned int nr_bytes = blk_rq_bytes(rq); - int ret; - - WARN_ON(nr_bytes == 0); + struct scsi_disk *sdkp = scsi_disk(disk); + unsigned int i, buflen, nrz = *nr_zones; + unsigned char *buf; + size_t offset = 0; + int ret = 0; if (!sd_is_zoned(sdkp)) /* Not a zoned device */ - return BLKPREP_KILL; - - ret = scsi_init_io(cmd); - if (ret != BLKPREP_OK) - return ret; - - cmd->cmd_len = 16; - memset(cmd->cmnd, 0, cmd->cmd_len); - cmd->cmnd[0] = ZBC_IN; - cmd->cmnd[1] = ZI_REPORT_ZONES; - lba = sectors_to_logical(sdkp->device, sector); - put_unaligned_be64(lba, &cmd->cmnd[2]); - put_unaligned_be32(nr_bytes, &cmd->cmnd[10]); - /* Do partial report for speeding things up */ - cmd->cmnd[14] = ZBC_REPORT_ZONE_PARTIAL; - - cmd->sc_data_direction = DMA_FROM_DEVICE; - cmd->sdb.length = nr_bytes; - cmd->transfersize = sdkp->device->sector_size; - cmd->allowed = 0; + return -EOPNOTSUPP; - return BLKPREP_OK; -} - -/** - * sd_zbc_report_zones_complete - Process a REPORT ZONES scsi command reply. - * @scmd: The completed report zones command - * @good_bytes: reply size in bytes - * - * Convert all reported zone descriptors to struct blk_zone. The conversion - * is done in-place, directly in the request specified sg buffer. - */ -static void sd_zbc_report_zones_complete(struct scsi_cmnd *scmd, - unsigned int good_bytes) -{ - struct request *rq = scmd->request; - struct scsi_disk *sdkp = scsi_disk(rq->rq_disk); - struct sg_mapping_iter miter; - struct blk_zone_report_hdr hdr; - struct blk_zone zone; - unsigned int offset, bytes = 0; - unsigned long flags; - u8 *buf; - - if (good_bytes < 64) - return; - - memset(&hdr, 0, sizeof(struct blk_zone_report_hdr)); - - sg_miter_start(&miter, scsi_sglist(scmd), scsi_sg_count(scmd), - SG_MITER_TO_SG | SG_MITER_ATOMIC); + /* + * Get a reply buffer for the number of requested zones plus a header. + * For ATA, buffers must be aligned to 512B. + */ + buflen = roundup((nrz + 1) * 64, 512); + buf = kmalloc(buflen, gfp_mask); + if (!buf) + return -ENOMEM; - local_irq_save(flags); - while (sg_miter_next(&miter) && bytes < good_bytes) { + ret = sd_zbc_do_report_zones(sdkp, buf, buflen, + sectors_to_logical(sdkp->device, sector), true); + if (ret) + goto out_free_buf; - buf = miter.addr; - offset = 0; + nrz = min(nrz, get_unaligned_be32(&buf[0]) / 64); + for (i = 0; i < nrz; i++) { + offset += 64; + sd_zbc_parse_report(sdkp, buf + offset, zones); + zones++; + } - if (bytes == 0) { - /* Set the report header */ - hdr.nr_zones = min_t(unsigned int, - (good_bytes - 64) / 64, - get_unaligned_be32(&buf[0]) / 64); - memcpy(buf, &hdr, sizeof(struct blk_zone_report_hdr)); - offset += 64; - bytes += 64; - } + *nr_zones = nrz; - /* Parse zone descriptors */ - while (offset < miter.length && hdr.nr_zones) { - WARN_ON(offset > miter.length); - buf = miter.addr + offset; - sd_zbc_parse_report(sdkp, buf, &zone); - memcpy(buf, &zone, sizeof(struct blk_zone)); - offset += 64; - bytes += 64; - hdr.nr_zones--; - } - - if (!hdr.nr_zones) - break; +out_free_buf: + kfree(buf); - } - sg_miter_stop(&miter); - local_irq_restore(flags); + return ret; } /** @@ -302,13 +250,6 @@ void sd_zbc_complete(struct scsi_cmnd *cmd, unsigned int good_bytes, case REQ_OP_WRITE_ZEROES: case REQ_OP_WRITE_SAME: break; - - case REQ_OP_ZONE_REPORT: - - if (!result) - sd_zbc_report_zones_complete(cmd, good_bytes); - break; - } } @@ -390,7 +331,7 @@ static int sd_zbc_check_zones(struct scsi_disk *sdkp, u32 *zblocks) return -ENOMEM; /* Do a report zone to get max_lba and the same field */ - ret = sd_zbc_report_zones(sdkp, buf, SD_ZBC_BUF_SIZE, 0, false); + ret = sd_zbc_do_report_zones(sdkp, buf, SD_ZBC_BUF_SIZE, 0, false); if (ret) goto out_free; @@ -447,8 +388,8 @@ static int sd_zbc_check_zones(struct scsi_disk *sdkp, u32 *zblocks) } if (block < sdkp->capacity) { - ret = sd_zbc_report_zones(sdkp, buf, - SD_ZBC_BUF_SIZE, block, true); + ret = sd_zbc_do_report_zones(sdkp, buf, SD_ZBC_BUF_SIZE, + block, true); if (ret) goto out_free; } @@ -565,8 +506,8 @@ sd_zbc_setup_seq_zones_bitmap(struct scsi_disk *sdkp, u32 zone_shift, goto out; while (lba < sdkp->capacity) { - ret = sd_zbc_report_zones(sdkp, buf, SD_ZBC_BUF_SIZE, - lba, true); + ret = sd_zbc_do_report_zones(sdkp, buf, SD_ZBC_BUF_SIZE, lba, + true); if (ret) goto out; lba = sd_zbc_get_seq_zones(sdkp, buf, SD_ZBC_BUF_SIZE, diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 9578c7ab1eb6..093a818c5b68 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -283,8 +283,6 @@ enum req_opf { REQ_OP_FLUSH = 2, /* discard sectors */ REQ_OP_DISCARD = 3, - /* get zone information */ - REQ_OP_ZONE_REPORT = 4, /* securely erase sectors */ REQ_OP_SECURE_ERASE = 5, /* seset a zone write pointer */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6bb845f9601a..51fe6472ce02 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -396,11 +396,6 @@ struct queue_limits { #ifdef CONFIG_BLK_DEV_ZONED -struct blk_zone_report_hdr { - unsigned int nr_zones; - u8 padding[60]; -}; - extern unsigned int blkdev_nr_zones(struct block_device *bdev); extern int blkdev_report_zones(struct block_device *bdev, sector_t sector, struct blk_zone *zones, @@ -1867,6 +1862,9 @@ struct block_device_operations { int (*getgeo)(struct block_device *, struct hd_geometry *); /* this callback is with swap_lock and sometimes page table lock held */ void (*swap_slot_free_notify) (struct block_device *, unsigned long); + int (*report_zones)(struct gendisk *, sector_t sector, + struct blk_zone *zones, unsigned int *nr_zones, + gfp_t gfp_mask); struct module *owner; const struct pr_ops *pr_ops; }; diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 6fb0808e87c8..a23b396a8edc 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -92,6 +92,11 @@ typedef int (*dm_message_fn) (struct dm_target *ti, unsigned argc, char **argv, typedef int (*dm_prepare_ioctl_fn) (struct dm_target *ti, struct block_device **bdev); +typedef int (*dm_report_zones_fn) (struct dm_target *ti, sector_t sector, + struct blk_zone *zones, + unsigned int *nr_zones, + gfp_t gfp_mask); + /* * These iteration functions are typically used to check (and combine) * properties of underlying devices. @@ -180,6 +185,9 @@ struct target_type { dm_status_fn status; dm_message_fn message; dm_prepare_ioctl_fn prepare_ioctl; +#ifdef CONFIG_BLK_DEV_ZONED + dm_report_zones_fn report_zones; +#endif dm_busy_fn busy; dm_iterate_devices_fn iterate_devices; dm_io_hints_fn io_hints; @@ -420,8 +428,8 @@ struct gendisk *dm_disk(struct mapped_device *md); int dm_suspended(struct dm_target *ti); int dm_noflush_suspending(struct dm_target *ti); void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors); -void dm_remap_zone_report(struct dm_target *ti, struct bio *bio, - sector_t start); +void dm_remap_zone_report(struct dm_target *ti, sector_t start, + struct blk_zone *zones, unsigned int *nr_zones); union map_info *dm_get_rq_mapinfo(struct request *rq); struct queue_limits *dm_get_queue_limits(struct mapped_device *md); diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 795698925d20..3ec73f17ee2a 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -82,7 +82,6 @@ TRACE_DEFINE_ENUM(CP_TRIMMED); { REQ_OP_WRITE, "WRITE" }, \ { REQ_OP_FLUSH, "FLUSH" }, \ { REQ_OP_DISCARD, "DISCARD" }, \ - { REQ_OP_ZONE_REPORT, "ZONE_REPORT" }, \ { REQ_OP_SECURE_ERASE, "SECURE_ERASE" }, \ { REQ_OP_ZONE_RESET, "ZONE_RESET" }, \ { REQ_OP_WRITE_SAME, "WRITE_SAME" }, \ -- cgit v1.2.3