From d2c3c8dcb5987b8352e82089c79a41b6e17e28d2 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Thu, 20 Apr 2017 10:46:07 -0700 Subject: dm: convert DM printk macros to pr_ macros Using pr_ is the more common logging style. Standardize style and use new macro DM_FMT. Use no_printk in DMDEBUG macros when CONFIG_DM_DEBUG is not #defined. Signed-off-by: Joe Perches Signed-off-by: Mike Snitzer --- include/linux/device-mapper.h | 71 +++++++++++++++++++------------------------ 1 file changed, 32 insertions(+), 39 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 456da5017b32..19bc7fdfd6da 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -543,48 +543,41 @@ extern struct ratelimit_state dm_ratelimit_state; #define dm_ratelimit() 0 #endif -#define DMCRIT(f, arg...) \ - printk(KERN_CRIT DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg) - -#define DMERR(f, arg...) \ - printk(KERN_ERR DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg) -#define DMERR_LIMIT(f, arg...) \ - do { \ - if (dm_ratelimit()) \ - printk(KERN_ERR DM_NAME ": " DM_MSG_PREFIX ": " \ - f "\n", ## arg); \ - } while (0) - -#define DMWARN(f, arg...) \ - printk(KERN_WARNING DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg) -#define DMWARN_LIMIT(f, arg...) \ - do { \ - if (dm_ratelimit()) \ - printk(KERN_WARNING DM_NAME ": " DM_MSG_PREFIX ": " \ - f "\n", ## arg); \ - } while (0) - -#define DMINFO(f, arg...) \ - printk(KERN_INFO DM_NAME ": " DM_MSG_PREFIX ": " f "\n", ## arg) -#define DMINFO_LIMIT(f, arg...) \ - do { \ - if (dm_ratelimit()) \ - printk(KERN_INFO DM_NAME ": " DM_MSG_PREFIX ": " f \ - "\n", ## arg); \ - } while (0) +#define DM_FMT(fmt) DM_NAME ": " DM_MSG_PREFIX ": " fmt "\n" + +#define DMCRIT(fmt, ...) pr_crit(DM_FMT(fmt), ##__VA_ARGS__) + +#define DMERR(fmt, ...) pr_err(DM_FMT(fmt), ##__VA_ARGS__) +#define DMERR_LIMIT(fmt, ...) \ +do { \ + if (dm_ratelimit()) \ + DMERR(fmt, ##__VA_ARGS__); \ +} while (0) + +#define DMWARN(fmt, ...) pr_warn(DM_FMT(fmt), ##__VA_ARGS__) +#define DMWARN_LIMIT(fmt, ...) \ +do { \ + if (dm_ratelimit()) \ + DMWARN(fmt, ##__VA_ARGS__); \ +} while (0) + +#define DMINFO(fmt, ...) pr_info(DM_FMT(fmt), ##__VA_ARGS__) +#define DMINFO_LIMIT(fmt, ...) \ +do { \ + if (dm_ratelimit()) \ + DMINFO(fmt, ##__VA_ARGS__); \ +} while (0) #ifdef CONFIG_DM_DEBUG -# define DMDEBUG(f, arg...) \ - printk(KERN_DEBUG DM_NAME ": " DM_MSG_PREFIX " DEBUG: " f "\n", ## arg) -# define DMDEBUG_LIMIT(f, arg...) \ - do { \ - if (dm_ratelimit()) \ - printk(KERN_DEBUG DM_NAME ": " DM_MSG_PREFIX ": " f \ - "\n", ## arg); \ - } while (0) +#define DMDEBUG(fmt, ...) printk(KERN_DEBUG DM_FMT(fmt), ##__VA_ARGS__) +#define DMDEBUG_LIMIT(fmt, ...) \ +do { \ + if (dm_ratelimit()) \ + DMDEBUG(fmt, ##__VA_ARGS__); \ +} while (0) #else -# define DMDEBUG(f, arg...) do {} while (0) -# define DMDEBUG_LIMIT(f, arg...) do {} while (0) +#define DMDEBUG(fmt, ...) no_printk(fmt, ##__VA_ARGS__) +#define DMDEBUG_LIMIT(fmt, ...) no_printk(fmt, ##__VA_ARGS__) #endif #define DMEMIT(x...) sz += ((sz >= maxlen) ? \ -- cgit v1.2.3 From dd88d313bef0277e27597aa394607ed26c658724 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 8 May 2017 16:40:43 -0700 Subject: dm table: add zoned block devices validation 1) Introduce DM_TARGET_ZONED_HM feature flag: The target drivers currently available will not operate correctly if a table target maps onto a host-managed zoned block device. To avoid problems, introduce the new feature flag DM_TARGET_ZONED_HM to allow a target to explicitly state that it supports host-managed zoned block devices. This feature is checked for all targets in a table if any of the table's block devices are host-managed. Note that as host-aware zoned block devices are backward compatible with regular block devices, they can be used by any of the current target types. This new feature is thus restricted to host-managed zoned block devices. 2) Check device area zone alignment: If a target maps to a zoned block device, check that the device area is aligned on zone boundaries to avoid problems with REQ_OP_ZONE_RESET operations (resetting a partially mapped sequential zone would not be possible). This also facilitates the processing of zone report with REQ_OP_ZONE_REPORT bios. 3) Check block devices zone model compatibility When setting the DM device's queue limits, several possibilities exists for zoned block devices: 1) The DM target driver may want to expose a different zone model (e.g. host-managed device emulation or regular block device on top of host-managed zoned block devices) 2) Expose the underlying zone model of the devices as-is To allow both cases, the underlying block device zone model must be set in the target limits in dm_set_device_limits() and the compatibility of all devices checked similarly to the logical block size alignment. For this last check, introduce validate_hardware_zoned_model() to check that all targets of a table have the same zone model and that the zone size of the target devices are equal. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Bart Van Assche [Mike Snitzer refactored Damien's original work to simplify the code] Signed-off-by: Mike Snitzer --- drivers/md/dm-table.c | 162 ++++++++++++++++++++++++++++++++++++++++++ include/linux/device-mapper.h | 6 ++ 2 files changed, 168 insertions(+) (limited to 'include/linux') diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index 5f5eae41f804..a39bcd9b982a 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -319,6 +319,39 @@ static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev, return 1; } + /* + * If the target is mapped to zoned block device(s), check + * that the zones are not partially mapped. + */ + if (bdev_zoned_model(bdev) != BLK_ZONED_NONE) { + unsigned int zone_sectors = bdev_zone_sectors(bdev); + + if (start & (zone_sectors - 1)) { + DMWARN("%s: start=%llu not aligned to h/w zone size %u of %s", + dm_device_name(ti->table->md), + (unsigned long long)start, + zone_sectors, bdevname(bdev, b)); + return 1; + } + + /* + * Note: The last zone of a zoned block device may be smaller + * than other zones. So for a target mapping the end of a + * zoned block device with such a zone, len would not be zone + * aligned. We do not allow such last smaller zone to be part + * of the mapping here to ensure that mappings with multiple + * devices do not end up with a smaller zone in the middle of + * the sector range. + */ + if (len & (zone_sectors - 1)) { + DMWARN("%s: len=%llu not aligned to h/w zone size %u of %s", + dm_device_name(ti->table->md), + (unsigned long long)len, + zone_sectors, bdevname(bdev, b)); + return 1; + } + } + if (logical_block_size_sectors <= 1) return 0; @@ -456,6 +489,8 @@ static int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev, q->limits.alignment_offset, (unsigned long long) start << SECTOR_SHIFT); + limits->zoned = blk_queue_zoned_model(q); + return 0; } @@ -1346,6 +1381,88 @@ bool dm_table_has_no_data_devices(struct dm_table *table) return true; } +static int device_is_zoned_model(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) +{ + struct request_queue *q = bdev_get_queue(dev->bdev); + enum blk_zoned_model *zoned_model = data; + + return q && blk_queue_zoned_model(q) == *zoned_model; +} + +static bool dm_table_supports_zoned_model(struct dm_table *t, + enum blk_zoned_model zoned_model) +{ + struct dm_target *ti; + unsigned i; + + for (i = 0; i < dm_table_get_num_targets(t); i++) { + ti = dm_table_get_target(t, i); + + if (zoned_model == BLK_ZONED_HM && + !dm_target_supports_zoned_hm(ti->type)) + return false; + + if (!ti->type->iterate_devices || + !ti->type->iterate_devices(ti, device_is_zoned_model, &zoned_model)) + return false; + } + + return true; +} + +static int device_matches_zone_sectors(struct dm_target *ti, struct dm_dev *dev, + sector_t start, sector_t len, void *data) +{ + struct request_queue *q = bdev_get_queue(dev->bdev); + unsigned int *zone_sectors = data; + + return q && blk_queue_zone_sectors(q) == *zone_sectors; +} + +static bool dm_table_matches_zone_sectors(struct dm_table *t, + unsigned int zone_sectors) +{ + struct dm_target *ti; + unsigned i; + + for (i = 0; i < dm_table_get_num_targets(t); i++) { + ti = dm_table_get_target(t, i); + + if (!ti->type->iterate_devices || + !ti->type->iterate_devices(ti, device_matches_zone_sectors, &zone_sectors)) + return false; + } + + return true; +} + +static int validate_hardware_zoned_model(struct dm_table *table, + enum blk_zoned_model zoned_model, + unsigned int zone_sectors) +{ + if (zoned_model == BLK_ZONED_NONE) + return 0; + + if (!dm_table_supports_zoned_model(table, zoned_model)) { + DMERR("%s: zoned model is not consistent across all devices", + dm_device_name(table->md)); + return -EINVAL; + } + + /* Check zone size validity and compatibility */ + if (!zone_sectors || !is_power_of_2(zone_sectors)) + return -EINVAL; + + if (!dm_table_matches_zone_sectors(table, zone_sectors)) { + DMERR("%s: zone sectors is not consistent across all devices", + dm_device_name(table->md)); + return -EINVAL; + } + + return 0; +} + /* * Establish the new table's queue_limits and validate them. */ @@ -1355,6 +1472,8 @@ int dm_calculate_queue_limits(struct dm_table *table, struct dm_target *ti; struct queue_limits ti_limits; unsigned i; + enum blk_zoned_model zoned_model = BLK_ZONED_NONE; + unsigned int zone_sectors = 0; blk_set_stacking_limits(limits); @@ -1372,6 +1491,15 @@ int dm_calculate_queue_limits(struct dm_table *table, ti->type->iterate_devices(ti, dm_set_device_limits, &ti_limits); + if (zoned_model == BLK_ZONED_NONE && ti_limits.zoned != BLK_ZONED_NONE) { + /* + * After stacking all limits, validate all devices + * in table support this zoned model and zone sectors. + */ + zoned_model = ti_limits.zoned; + zone_sectors = ti_limits.chunk_sectors; + } + /* Set I/O hints portion of queue limits */ if (ti->type->io_hints) ti->type->io_hints(ti, &ti_limits); @@ -1396,8 +1524,42 @@ combine_limits: dm_device_name(table->md), (unsigned long long) ti->begin, (unsigned long long) ti->len); + + /* + * FIXME: this should likely be moved to blk_stack_limits(), would + * also eliminate limits->zoned stacking hack in dm_set_device_limits() + */ + if (limits->zoned == BLK_ZONED_NONE && ti_limits.zoned != BLK_ZONED_NONE) { + /* + * By default, the stacked limits zoned model is set to + * BLK_ZONED_NONE in blk_set_stacking_limits(). Update + * this model using the first target model reported + * that is not BLK_ZONED_NONE. This will be either the + * first target device zoned model or the model reported + * by the target .io_hints. + */ + limits->zoned = ti_limits.zoned; + } } + /* + * Verify that the zoned model and zone sectors, as determined before + * any .io_hints override, are the same across all devices in the table. + * - this is especially relevant if .io_hints is emulating a disk-managed + * zoned model (aka BLK_ZONED_NONE) on host-managed zoned block devices. + * BUT... + */ + if (limits->zoned != BLK_ZONED_NONE) { + /* + * ...IF the above limits stacking determined a zoned model + * validate that all of the table's devices conform to it. + */ + zoned_model = limits->zoned; + zone_sectors = limits->chunk_sectors; + } + if (validate_hardware_zoned_model(table, zoned_model, zone_sectors)) + return -EINVAL; + return validate_hardware_logical_block_alignment(table, limits); } diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 19bc7fdfd6da..186ef74009cb 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -237,6 +237,12 @@ typedef unsigned (*dm_num_write_bios_fn) (struct dm_target *ti, struct bio *bio) #define DM_TARGET_PASSES_INTEGRITY 0x00000020 #define dm_target_passes_integrity(type) ((type)->features & DM_TARGET_PASSES_INTEGRITY) +/* + * Indicates that a target supports host-managed zoned block devices. + */ +#define DM_TARGET_ZONED_HM 0x00000040 +#define dm_target_supports_zoned_hm(type) ((type)->features & DM_TARGET_ZONED_HM) + struct dm_target { struct dm_table *table; struct target_type *type; -- cgit v1.2.3 From 10999307c14eac281fbec3ada73bee7a05bd41dc Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 8 May 2017 16:40:48 -0700 Subject: dm: introduce dm_remap_zone_report() A target driver support zoned block devices and exposing it as such may receive REQ_OP_ZONE_REPORT request for the user to determine the mapped device zone configuration. To process properly such request, the target driver may need to remap the zone descriptors provided in the report reply. The helper function dm_remap_zone_report() does this generically using only the target start offset and length and the start offset within the target device. dm_remap_zone_report() will remap the start sector of all zones reported. If the report includes sequential zones, the write pointer position of these zones will also be remapped. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Bart Van Assche Signed-off-by: Mike Snitzer --- drivers/md/dm.c | 79 +++++++++++++++++++++++++++++++++++++++++++ include/linux/device-mapper.h | 2 ++ 2 files changed, 81 insertions(+) (limited to 'include/linux') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index e38d1d7d17d6..96bd13e581cd 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1012,6 +1012,85 @@ void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors) } EXPORT_SYMBOL_GPL(dm_accept_partial_bio); +/* + * The zone descriptors obtained with a zone report indicate + * zone positions within the target device. The zone descriptors + * must be remapped to match their position within the dm device. + * A target may call dm_remap_zone_report after completion of a + * REQ_OP_ZONE_REPORT bio to remap the zone descriptors obtained + * from the target device mapping to the dm device. + */ +void dm_remap_zone_report(struct dm_target *ti, struct bio *bio, sector_t start) +{ +#ifdef CONFIG_BLK_DEV_ZONED + struct dm_target_io *tio = container_of(bio, struct dm_target_io, clone); + struct bio *report_bio = tio->io->bio; + struct blk_zone_report_hdr *hdr = NULL; + struct blk_zone *zone; + unsigned int nr_rep = 0; + unsigned int ofst; + struct bio_vec bvec; + struct bvec_iter iter; + void *addr; + + if (bio->bi_status) + return; + + /* + * Remap the start sector of the reported zones. For sequential zones, + * also remap the write pointer position. + */ + bio_for_each_segment(bvec, report_bio, iter) { + addr = kmap_atomic(bvec.bv_page); + + /* Remember the report header in the first page */ + if (!hdr) { + hdr = addr; + ofst = sizeof(struct blk_zone_report_hdr); + } else + ofst = 0; + + /* Set zones start sector */ + while (hdr->nr_zones && ofst < bvec.bv_len) { + zone = addr + ofst; + if (zone->start >= start + ti->len) { + hdr->nr_zones = 0; + break; + } + zone->start = zone->start + ti->begin - start; + if (zone->type != BLK_ZONE_TYPE_CONVENTIONAL) { + if (zone->cond == BLK_ZONE_COND_FULL) + zone->wp = zone->start + zone->len; + else if (zone->cond == BLK_ZONE_COND_EMPTY) + zone->wp = zone->start; + else + zone->wp = zone->wp + ti->begin - start; + } + ofst += sizeof(struct blk_zone); + hdr->nr_zones--; + nr_rep++; + } + + if (addr != hdr) + kunmap_atomic(addr); + + if (!hdr->nr_zones) + break; + } + + if (hdr) { + hdr->nr_zones = nr_rep; + kunmap_atomic(hdr); + } + + bio_advance(report_bio, report_bio->bi_iter.bi_size); + +#else /* !CONFIG_BLK_DEV_ZONED */ + bio->bi_status = BLK_STS_NOTSUPP; +#endif +} +EXPORT_SYMBOL_GPL(dm_remap_zone_report); + /* * Flush current->bio_list when the target map method blocks. * This fixes deadlocks in snapshot and possibly in other targets. diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 186ef74009cb..0c1b50ad23b0 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -450,6 +450,8 @@ struct gendisk *dm_disk(struct mapped_device *md); int dm_suspended(struct dm_target *ti); int dm_noflush_suspending(struct dm_target *ti); void dm_accept_partial_bio(struct bio *bio, unsigned n_sectors); +void dm_remap_zone_report(struct dm_target *ti, struct bio *bio, + sector_t start); union map_info *dm_get_rq_mapinfo(struct request *rq); struct queue_limits *dm_get_queue_limits(struct mapped_device *md); -- cgit v1.2.3 From b73c67c2cbb0004e6da9720a167fe42e31f7a6e8 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Mon, 8 May 2017 16:40:51 -0700 Subject: dm kcopyd: add sequential write feature When copyying blocks to host-managed zoned block devices, writes must be sequential. However, dm_kcopyd_copy() does not guarantee this as writes are issued in the completion order of reads, and reads may complete out of order despite being issued sequentially. Fix this by introducing the DM_KCOPYD_WRITE_SEQ feature flag. This can be specified when calling dm_kcopyd_copy() and should be set automatically if one of the destinations is a host-managed zoned block device. For a split job, the master job maintains the write position at which writes must be issued. This is checked with the pop() function which is modified to not return any write I/O sub job that is not at the correct write position. When DM_KCOPYD_WRITE_SEQ is specified for a job, errors cannot be ignored and the flag DM_KCOPYD_IGNORE_ERROR is ignored, even if specified by the user. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Reviewed-by: Bart Van Assche Signed-off-by: Mike Snitzer --- drivers/md/dm-kcopyd.c | 65 +++++++++++++++++++++++++++++++++++++++++++++-- include/linux/dm-kcopyd.h | 1 + 2 files changed, 64 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/drivers/md/dm-kcopyd.c b/drivers/md/dm-kcopyd.c index f85846741d50..cf2c67e35eaf 100644 --- a/drivers/md/dm-kcopyd.c +++ b/drivers/md/dm-kcopyd.c @@ -356,6 +356,7 @@ struct kcopyd_job { struct mutex lock; atomic_t sub_jobs; sector_t progress; + sector_t write_offset; struct kcopyd_job *master_job; }; @@ -386,6 +387,31 @@ void dm_kcopyd_exit(void) * Functions to push and pop a job onto the head of a given job * list. */ +static struct kcopyd_job *pop_io_job(struct list_head *jobs, + struct dm_kcopyd_client *kc) +{ + struct kcopyd_job *job; + + /* + * For I/O jobs, pop any read, any write without sequential write + * constraint and sequential writes that are at the right position. + */ + list_for_each_entry(job, jobs, list) { + if (job->rw == READ || !test_bit(DM_KCOPYD_WRITE_SEQ, &job->flags)) { + list_del(&job->list); + return job; + } + + if (job->write_offset == job->master_job->write_offset) { + job->master_job->write_offset += job->source.count; + list_del(&job->list); + return job; + } + } + + return NULL; +} + static struct kcopyd_job *pop(struct list_head *jobs, struct dm_kcopyd_client *kc) { @@ -395,8 +421,12 @@ static struct kcopyd_job *pop(struct list_head *jobs, spin_lock_irqsave(&kc->job_lock, flags); if (!list_empty(jobs)) { - job = list_entry(jobs->next, struct kcopyd_job, list); - list_del(&job->list); + if (jobs == &kc->io_jobs) + job = pop_io_job(jobs, kc); + else { + job = list_entry(jobs->next, struct kcopyd_job, list); + list_del(&job->list); + } } spin_unlock_irqrestore(&kc->job_lock, flags); @@ -506,6 +536,14 @@ static int run_io_job(struct kcopyd_job *job) .client = job->kc->io_client, }; + /* + * If we need to write sequentially and some reads or writes failed, + * no point in continuing. + */ + if (test_bit(DM_KCOPYD_WRITE_SEQ, &job->flags) && + job->master_job->write_err) + return -EIO; + io_job_start(job->kc->throttle); if (job->rw == READ) @@ -655,6 +693,7 @@ static void segment_complete(int read_err, unsigned long write_err, int i; *sub_job = *job; + sub_job->write_offset = progress; sub_job->source.sector += progress; sub_job->source.count = count; @@ -723,6 +762,27 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, job->num_dests = num_dests; memcpy(&job->dests, dests, sizeof(*dests) * num_dests); + /* + * If one of the destination is a host-managed zoned block device, + * we need to write sequentially. If one of the destination is a + * host-aware device, then leave it to the caller to choose what to do. + */ + if (!test_bit(DM_KCOPYD_WRITE_SEQ, &job->flags)) { + for (i = 0; i < job->num_dests; i++) { + if (bdev_zoned_model(dests[i].bdev) == BLK_ZONED_HM) { + set_bit(DM_KCOPYD_WRITE_SEQ, &job->flags); + break; + } + } + } + + /* + * If we need to write sequentially, errors cannot be ignored. + */ + if (test_bit(DM_KCOPYD_WRITE_SEQ, &job->flags) && + test_bit(DM_KCOPYD_IGNORE_ERROR, &job->flags)) + clear_bit(DM_KCOPYD_IGNORE_ERROR, &job->flags); + if (from) { job->source = *from; job->pages = NULL; @@ -746,6 +806,7 @@ int dm_kcopyd_copy(struct dm_kcopyd_client *kc, struct dm_io_region *from, job->fn = fn; job->context = context; job->master_job = job; + job->write_offset = 0; if (job->source.count <= SUB_JOB_SIZE) dispatch_job(job); diff --git a/include/linux/dm-kcopyd.h b/include/linux/dm-kcopyd.h index f486d636b82e..cfac8588ed56 100644 --- a/include/linux/dm-kcopyd.h +++ b/include/linux/dm-kcopyd.h @@ -20,6 +20,7 @@ #define DM_KCOPYD_MAX_REGIONS 8 #define DM_KCOPYD_IGNORE_ERROR 1 +#define DM_KCOPYD_WRITE_SEQ 2 struct dm_kcopyd_throttle { unsigned throttle; -- cgit v1.2.3