diff options
-rw-r--r-- | Documentation/admin-guide/device-mapper/dm-zoned.rst | 34 | ||||
-rw-r--r-- | drivers/md/dm-zoned-metadata.c | 310 | ||||
-rw-r--r-- | drivers/md/dm-zoned-target.c | 185 | ||||
-rw-r--r-- | drivers/md/dm-zoned.h | 7 |
4 files changed, 427 insertions, 109 deletions
diff --git a/Documentation/admin-guide/device-mapper/dm-zoned.rst b/Documentation/admin-guide/device-mapper/dm-zoned.rst index 7547ce635161..553752ea2521 100644 --- a/Documentation/admin-guide/device-mapper/dm-zoned.rst +++ b/Documentation/admin-guide/device-mapper/dm-zoned.rst @@ -37,9 +37,13 @@ Algorithm dm-zoned implements an on-disk buffering scheme to handle non-sequential write accesses to the sequential zones of a zoned block device. Conventional zones are used for caching as well as for storing internal -metadata. +metadata. It can also use a regular block device together with the zoned +block device; in that case the regular block device will be split logically +in zones with the same size as the zoned block device. These zones will be +placed in front of the zones from the zoned block device and will be handled +just like conventional zones. -The zones of the device are separated into 2 types: +The zones of the device(s) are separated into 2 types: 1) Metadata zones: these are conventional zones used to store metadata. Metadata zones are not reported as useable capacity to the user. @@ -127,6 +131,13 @@ resumed. Flushing metadata thus only temporarily delays write and discard requests. Read requests can be processed concurrently while metadata flush is being executed. +If a regular device is used in conjunction with the zoned block device, +a third set of metadata (without the zone bitmaps) is written to the +start of the zoned block device. This metadata has a generation counter of +'0' and will never be updated during normal operation; it just serves for +identification purposes. The first and second copy of the metadata +are located at the start of the regular block device. + Usage ===== @@ -138,12 +149,21 @@ Ex:: dmzadm --format /dev/sdxx -For a formatted device, the target can be created normally with the -dmsetup utility. The only parameter that dm-zoned requires is the -underlying zoned block device name. Ex:: - echo "0 `blockdev --getsize ${dev}` zoned ${dev}" | \ - dmsetup create dmz-`basename ${dev}` +If two drives are to be used, both devices must be specified, with the +regular block device as the first device. + +Ex:: + + dmzadm --format /dev/sdxx /dev/sdyy + + +Fomatted device(s) can be started with the dmzadm utility, too.: + +Ex:: + + dmzadm --start /dev/sdxx /dev/sdyy + Information about the internal layout and current usage of the zones can be obtained with the 'status' callback from dmsetup: diff --git a/drivers/md/dm-zoned-metadata.c b/drivers/md/dm-zoned-metadata.c index 939deed1606a..fba690dd37f5 100644 --- a/drivers/md/dm-zoned-metadata.c +++ b/drivers/md/dm-zoned-metadata.c @@ -16,7 +16,7 @@ /* * Metadata version. */ -#define DMZ_META_VER 1 +#define DMZ_META_VER 2 /* * On-disk super block magic. @@ -69,8 +69,17 @@ struct dmz_super { /* Checksum */ __le32 crc; /* 48 */ + /* DM-Zoned label */ + u8 dmz_label[32]; /* 80 */ + + /* DM-Zoned UUID */ + u8 dmz_uuid[16]; /* 96 */ + + /* Device UUID */ + u8 dev_uuid[16]; /* 112 */ + /* Padding to full 512B sector */ - u8 reserved[464]; /* 512 */ + u8 reserved[400]; /* 512 */ }; /* @@ -133,8 +142,11 @@ struct dmz_sb { */ struct dmz_metadata { struct dmz_dev *dev; + unsigned int nr_devs; char devname[BDEVNAME_SIZE]; + char label[BDEVNAME_SIZE]; + uuid_t uuid; sector_t zone_bitmap_size; unsigned int zone_nr_bitmap_blocks; @@ -161,8 +173,9 @@ struct dmz_metadata { /* Zone information array */ struct dm_zone *zones; - struct dmz_sb sb[2]; + struct dmz_sb sb[3]; unsigned int mblk_primary; + unsigned int sb_version; u64 sb_gen; unsigned int min_nr_mblks; unsigned int max_nr_mblks; @@ -195,31 +208,56 @@ struct dmz_metadata { }; #define dmz_zmd_info(zmd, format, args...) \ - DMINFO("(%s): " format, (zmd)->devname, ## args) + DMINFO("(%s): " format, (zmd)->label, ## args) #define dmz_zmd_err(zmd, format, args...) \ - DMERR("(%s): " format, (zmd)->devname, ## args) + DMERR("(%s): " format, (zmd)->label, ## args) #define dmz_zmd_warn(zmd, format, args...) \ - DMWARN("(%s): " format, (zmd)->devname, ## args) + DMWARN("(%s): " format, (zmd)->label, ## args) #define dmz_zmd_debug(zmd, format, args...) \ - DMDEBUG("(%s): " format, (zmd)->devname, ## args) + DMDEBUG("(%s): " format, (zmd)->label, ## args) /* * Various accessors */ +static unsigned int dmz_dev_zone_id(struct dmz_metadata *zmd, struct dm_zone *zone) +{ + unsigned int zone_id; + + if (WARN_ON(!zone)) + return 0; + + zone_id = zone->id; + if (zmd->nr_devs > 1 && + (zone_id >= zmd->dev[1].zone_offset)) + zone_id -= zmd->dev[1].zone_offset; + return zone_id; +} + sector_t dmz_start_sect(struct dmz_metadata *zmd, struct dm_zone *zone) { - return (sector_t)zone->id << zmd->zone_nr_sectors_shift; + unsigned int zone_id = dmz_dev_zone_id(zmd, zone); + + return (sector_t)zone_id << zmd->zone_nr_sectors_shift; } sector_t dmz_start_block(struct dmz_metadata *zmd, struct dm_zone *zone) { - return (sector_t)zone->id << zmd->zone_nr_blocks_shift; + unsigned int zone_id = dmz_dev_zone_id(zmd, zone); + + return (sector_t)zone_id << zmd->zone_nr_blocks_shift; } struct dmz_dev *dmz_zone_to_dev(struct dmz_metadata *zmd, struct dm_zone *zone) { + if (WARN_ON(!zone)) + return &zmd->dev[0]; + + if (zmd->nr_devs > 1 && + zone->id >= zmd->dev[1].zone_offset) + return &zmd->dev[1]; + return &zmd->dev[0]; } @@ -275,17 +313,29 @@ unsigned int dmz_nr_unmap_seq_zones(struct dmz_metadata *zmd) const char *dmz_metadata_label(struct dmz_metadata *zmd) { - return (const char *)zmd->devname; + return (const char *)zmd->label; } bool dmz_check_dev(struct dmz_metadata *zmd) { - return dmz_check_bdev(&zmd->dev[0]); + unsigned int i; + + for (i = 0; i < zmd->nr_devs; i++) { + if (!dmz_check_bdev(&zmd->dev[i])) + return false; + } + return true; } bool dmz_dev_is_dying(struct dmz_metadata *zmd) { - return dmz_bdev_is_dying(&zmd->dev[0]); + unsigned int i; + + for (i = 0; i < zmd->nr_devs; i++) { + if (dmz_bdev_is_dying(&zmd->dev[i])) + return true; + } + return false; } /* @@ -687,6 +737,9 @@ static int dmz_rdwr_block(struct dmz_dev *dev, int op, struct bio *bio; int ret; + if (WARN_ON(!dev)) + return -EIO; + if (dmz_bdev_is_dying(dev)) return -EIO; @@ -711,19 +764,32 @@ static int dmz_rdwr_block(struct dmz_dev *dev, int op, */ static int dmz_write_sb(struct dmz_metadata *zmd, unsigned int set) { - sector_t block = zmd->sb[set].block; struct dmz_mblock *mblk = zmd->sb[set].mblk; struct dmz_super *sb = zmd->sb[set].sb; struct dmz_dev *dev = zmd->sb[set].dev; + sector_t sb_block; u64 sb_gen = zmd->sb_gen + 1; int ret; sb->magic = cpu_to_le32(DMZ_MAGIC); - sb->version = cpu_to_le32(DMZ_META_VER); + + sb->version = cpu_to_le32(zmd->sb_version); + if (zmd->sb_version > 1) { + BUILD_BUG_ON(UUID_SIZE != 16); + export_uuid(sb->dmz_uuid, &zmd->uuid); + memcpy(sb->dmz_label, zmd->label, BDEVNAME_SIZE); + export_uuid(sb->dev_uuid, &dev->uuid); + } sb->gen = cpu_to_le64(sb_gen); - sb->sb_block = cpu_to_le64(block); + /* + * The metadata always references the absolute block address, + * ie relative to the entire block range, not the per-device + * block address. + */ + sb_block = zmd->sb[set].zone->id << zmd->zone_nr_blocks_shift; + sb->sb_block = cpu_to_le64(sb_block); sb->nr_meta_blocks = cpu_to_le32(zmd->nr_meta_blocks); sb->nr_reserved_seq = cpu_to_le32(zmd->nr_reserved_seq); sb->nr_chunks = cpu_to_le32(zmd->nr_chunks); @@ -734,7 +800,8 @@ static int dmz_write_sb(struct dmz_metadata *zmd, unsigned int set) sb->crc = 0; sb->crc = cpu_to_le32(crc32_le(sb_gen, (unsigned char *)sb, DMZ_BLOCK_SIZE)); - ret = dmz_rdwr_block(dev, REQ_OP_WRITE, block, mblk->page); + ret = dmz_rdwr_block(dev, REQ_OP_WRITE, zmd->sb[set].block, + mblk->page); if (ret == 0) ret = blkdev_issue_flush(dev->bdev, GFP_NOIO, NULL); @@ -915,6 +982,23 @@ static int dmz_check_sb(struct dmz_metadata *zmd, unsigned int set) u32 crc, stored_crc; u64 gen; + if (le32_to_cpu(sb->magic) != DMZ_MAGIC) { + dmz_dev_err(dev, "Invalid meta magic (needed 0x%08x, got 0x%08x)", + DMZ_MAGIC, le32_to_cpu(sb->magic)); + return -ENXIO; + } + + zmd->sb_version = le32_to_cpu(sb->version); + if (zmd->sb_version > DMZ_META_VER) { + dmz_dev_err(dev, "Invalid meta version (needed %d, got %d)", + DMZ_META_VER, zmd->sb_version); + return -EINVAL; + } + if ((zmd->sb_version < 1) && (set == 2)) { + dmz_dev_err(dev, "Tertiary superblocks are not supported"); + return -EINVAL; + } + gen = le64_to_cpu(sb->gen); stored_crc = le32_to_cpu(sb->crc); sb->crc = 0; @@ -925,16 +1009,45 @@ static int dmz_check_sb(struct dmz_metadata *zmd, unsigned int set) return -ENXIO; } - if (le32_to_cpu(sb->magic) != DMZ_MAGIC) { - dmz_dev_err(dev, "Invalid meta magic (needed 0x%08x, got 0x%08x)", - DMZ_MAGIC, le32_to_cpu(sb->magic)); - return -ENXIO; - } + if (zmd->sb_version > 1) { + uuid_t sb_uuid; + + import_uuid(&sb_uuid, sb->dmz_uuid); + if (uuid_is_null(&sb_uuid)) { + dmz_dev_err(dev, "NULL DM-Zoned uuid"); + return -ENXIO; + } else if (uuid_is_null(&zmd->uuid)) { + uuid_copy(&zmd->uuid, &sb_uuid); + } else if (!uuid_equal(&zmd->uuid, &sb_uuid)) { + dmz_dev_err(dev, "mismatching DM-Zoned uuid, " + "is %pUl expected %pUl", + &sb_uuid, &zmd->uuid); + return -ENXIO; + } + if (!strlen(zmd->label)) + memcpy(zmd->label, sb->dmz_label, BDEVNAME_SIZE); + else if (memcmp(zmd->label, sb->dmz_label, BDEVNAME_SIZE)) { + dmz_dev_err(dev, "mismatching DM-Zoned label, " + "is %s expected %s", + sb->dmz_label, zmd->label); + return -ENXIO; + } + import_uuid(&dev->uuid, sb->dev_uuid); + if (uuid_is_null(&dev->uuid)) { + dmz_dev_err(dev, "NULL device uuid"); + return -ENXIO; + } - if (le32_to_cpu(sb->version) != DMZ_META_VER) { - dmz_dev_err(dev, "Invalid meta version (needed %d, got %d)", - DMZ_META_VER, le32_to_cpu(sb->version)); - return -ENXIO; + if (set == 2) { + /* + * Generation number should be 0, but it doesn't + * really matter if it isn't. + */ + if (gen != 0) + dmz_dev_warn(dev, "Invalid generation %llu", + gen); + return 0; + } } nr_meta_zones = (le32_to_cpu(sb->nr_meta_blocks) + zmd->zone_nr_blocks - 1) @@ -1185,21 +1298,38 @@ static int dmz_load_sb(struct dmz_metadata *zmd) "Using super block %u (gen %llu)", zmd->mblk_primary, zmd->sb_gen); + if ((zmd->sb_version > 1) && zmd->sb[2].zone) { + zmd->sb[2].block = dmz_start_block(zmd, zmd->sb[2].zone); + zmd->sb[2].dev = dmz_zone_to_dev(zmd, zmd->sb[2].zone); + ret = dmz_get_sb(zmd, 2); + if (ret) { + dmz_dev_err(zmd->sb[2].dev, + "Read tertiary super block failed"); + return ret; + } + ret = dmz_check_sb(zmd, 2); + if (ret == -EINVAL) + return ret; + } return 0; } /* * Initialize a zone descriptor. */ -static int dmz_init_zone(struct blk_zone *blkz, unsigned int idx, void *data) +static int dmz_init_zone(struct blk_zone *blkz, unsigned int num, void *data) { struct dmz_metadata *zmd = data; + struct dmz_dev *dev = zmd->nr_devs > 1 ? &zmd->dev[1] : &zmd->dev[0]; + int idx = num + dev->zone_offset; struct dm_zone *zone = &zmd->zones[idx]; - struct dmz_dev *dev = zmd->dev; - /* Ignore the eventual last runt (smaller) zone */ if (blkz->len != zmd->zone_nr_sectors) { - if (blkz->start + blkz->len == dev->capacity) + if (zmd->sb_version > 1) { + /* Ignore the eventual runt (smaller) zone */ + set_bit(DMZ_OFFLINE, &zone->flags); + return 0; + } else if (blkz->start + blkz->len == dev->capacity) return 0; return -ENXIO; } @@ -1234,16 +1364,45 @@ static int dmz_init_zone(struct blk_zone *blkz, unsigned int idx, void *data) zmd->nr_useable_zones++; if (dmz_is_rnd(zone)) { zmd->nr_rnd_zones++; - if (!zmd->sb[0].zone) { - /* Super block zone */ + if (zmd->nr_devs == 1 && !zmd->sb[0].zone) { + /* Primary super block zone */ zmd->sb[0].zone = zone; } } + if (zmd->nr_devs > 1 && !zmd->sb[2].zone) { + /* Tertiary superblock zone */ + zmd->sb[2].zone = zone; + } } return 0; } +static void dmz_emulate_zones(struct dmz_metadata *zmd, struct dmz_dev *dev) +{ + int idx; + sector_t zone_offset = 0; + + for(idx = 0; idx < dev->nr_zones; idx++) { + struct dm_zone *zone = &zmd->zones[idx]; + + INIT_LIST_HEAD(&zone->link); + atomic_set(&zone->refcount, 0); + zone->id = idx; + zone->chunk = DMZ_MAP_UNMAPPED; + set_bit(DMZ_RND, &zone->flags); + zone->wp_block = 0; + zmd->nr_rnd_zones++; + zmd->nr_useable_zones++; + if (dev->capacity - zone_offset < zmd->zone_nr_sectors) { + /* Disable runt zone */ + set_bit(DMZ_OFFLINE, &zone->flags); + break; + } + zone_offset += zmd->zone_nr_sectors; + } +} + /* * Free zones descriptors. */ @@ -1259,11 +1418,11 @@ static void dmz_drop_zones(struct dmz_metadata *zmd) */ static int dmz_init_zones(struct dmz_metadata *zmd) { - struct dmz_dev *dev = &zmd->dev[0]; - int ret; + int i, ret; + struct dmz_dev *zoned_dev = &zmd->dev[0]; /* Init */ - zmd->zone_nr_sectors = dev->zone_nr_sectors; + zmd->zone_nr_sectors = zmd->dev[0].zone_nr_sectors; zmd->zone_nr_sectors_shift = ilog2(zmd->zone_nr_sectors); zmd->zone_nr_blocks = dmz_sect2blk(zmd->zone_nr_sectors); zmd->zone_nr_blocks_shift = ilog2(zmd->zone_nr_blocks); @@ -1274,7 +1433,14 @@ static int dmz_init_zones(struct dmz_metadata *zmd) DMZ_BLOCK_SIZE_BITS); /* Allocate zone array */ - zmd->nr_zones = dev->nr_zones; + zmd->nr_zones = 0; + for (i = 0; i < zmd->nr_devs; i++) + zmd->nr_zones += zmd->dev[i].nr_zones; + + if (!zmd->nr_zones) { + DMERR("(%s): No zones found", zmd->devname); + return -ENXIO; + } zmd->zones = kcalloc(zmd->nr_zones, sizeof(struct dm_zone), GFP_KERNEL); if (!zmd->zones) return -ENOMEM; @@ -1282,14 +1448,27 @@ static int dmz_init_zones(struct dmz_metadata *zmd) DMDEBUG("(%s): Using %zu B for zone information", zmd->devname, sizeof(struct dm_zone) * zmd->nr_zones); + if (zmd->nr_devs > 1) { + dmz_emulate_zones(zmd, &zmd->dev[0]); + /* + * Primary superblock zone is always at zone 0 when multiple + * drives are present. + */ + zmd->sb[0].zone = &zmd->zones[0]; + + zoned_dev = &zmd->dev[1]; + } + /* * Get zone information and initialize zone descriptors. At the same * time, determine where the super block should be: first block of the * first randomly writable zone. */ - ret = blkdev_report_zones(dev->bdev, 0, BLK_ALL_ZONES, dmz_init_zone, - zmd); + ret = blkdev_report_zones(zoned_dev->bdev, 0, BLK_ALL_ZONES, + dmz_init_zone, zmd); if (ret < 0) { + DMDEBUG("(%s): Failed to report zones, error %d", + zmd->devname, ret); dmz_drop_zones(zmd); return ret; } @@ -1325,6 +1504,9 @@ static int dmz_update_zone(struct dmz_metadata *zmd, struct dm_zone *zone) unsigned int noio_flag; int ret; + if (dev->flags & DMZ_BDEV_REGULAR) + return 0; + /* * Get zone information from disk. Since blkdev_report_zones() uses * GFP_KERNEL by default for memory allocations, set the per-task @@ -2475,19 +2657,34 @@ static void dmz_print_dev(struct dmz_metadata *zmd, int num) { struct dmz_dev *dev = &zmd->dev[num]; - dmz_dev_info(dev, "Host-%s zoned block device", - bdev_zoned_model(dev->bdev) == BLK_ZONED_HA ? - "aware" : "managed"); - dmz_dev_info(dev, " %llu 512-byte logical sectors", - (u64)dev->capacity); - dmz_dev_info(dev, " %u zones of %llu 512-byte logical sectors", - dev->nr_zones, (u64)zmd->zone_nr_sectors); + if (bdev_zoned_model(dev->bdev) == BLK_ZONED_NONE) + dmz_dev_info(dev, "Regular block device"); + else + dmz_dev_info(dev, "Host-%s zoned block device", + bdev_zoned_model(dev->bdev) == BLK_ZONED_HA ? + "aware" : "managed"); + if (zmd->sb_version > 1) { + sector_t sector_offset = + dev->zone_offset << zmd->zone_nr_sectors_shift; + + dmz_dev_info(dev, " %llu 512-byte logical sectors (offset %llu)", + (u64)dev->capacity, (u64)sector_offset); + dmz_dev_info(dev, " %u zones of %llu 512-byte logical sectors (offset %llu)", + dev->nr_zones, (u64)zmd->zone_nr_sectors, + (u64)dev->zone_offset); + } else { + dmz_dev_info(dev, " %llu 512-byte logical sectors", + (u64)dev->capacity); + dmz_dev_info(dev, " %u zones of %llu 512-byte logical sectors", + dev->nr_zones, (u64)zmd->zone_nr_sectors); + } } /* * Initialize the zoned metadata. */ -int dmz_ctr_metadata(struct dmz_dev *dev, struct dmz_metadata **metadata, +int dmz_ctr_metadata(struct dmz_dev *dev, int num_dev, + struct dmz_metadata **metadata, const char *devname) { struct dmz_metadata *zmd; @@ -2501,6 +2698,7 @@ int dmz_ctr_metadata(struct dmz_dev *dev, struct dmz_metadata **metadata, strcpy(zmd->devname, devname); zmd->dev = dev; + zmd->nr_devs = num_dev; zmd->mblk_rbtree = RB_ROOT; init_rwsem(&zmd->mblk_sem); mutex_init(&zmd->mblk_flush_lock); @@ -2535,11 +2733,24 @@ int dmz_ctr_metadata(struct dmz_dev *dev, struct dmz_metadata **metadata, /* Set metadata zones starting from sb_zone */ for (i = 0; i < zmd->nr_meta_zones << 1; i++) { zone = dmz_get(zmd, zmd->sb[0].zone->id + i); - if (!dmz_is_rnd(zone)) + if (!dmz_is_rnd(zone)) { + dmz_zmd_err(zmd, + "metadata zone %d is not random", i); + ret = -ENXIO; goto err; + } + set_bit(DMZ_META, &zone->flags); + } + if (zmd->sb[2].zone) { + zone = dmz_get(zmd, zmd->sb[2].zone->id); + if (!zone) { + dmz_zmd_err(zmd, + "Tertiary metadata zone not present"); + ret = -ENXIO; + goto err; + } set_bit(DMZ_META, &zone->flags); } - /* Load mapping table */ ret = dmz_load_mapping(zmd); if (ret) @@ -2564,8 +2775,9 @@ int dmz_ctr_metadata(struct dmz_dev *dev, struct dmz_metadata **metadata, goto err; } - dmz_zmd_info(zmd, "DM-Zoned metadata version %d", DMZ_META_VER); - dmz_print_dev(zmd, 0); + dmz_zmd_info(zmd, "DM-Zoned metadata version %d", zmd->sb_version); + for (i = 0; i < zmd->nr_devs; i++) + dmz_print_dev(zmd, i); dmz_zmd_info(zmd, " %u zones of %llu 512-byte logical sectors", zmd->nr_zones, (u64)zmd->zone_nr_sectors); diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c index a09fb78ffe88..ea43f6892ced 100644 --- a/drivers/md/dm-zoned-target.c +++ b/drivers/md/dm-zoned-target.c @@ -13,6 +13,8 @@ #define DMZ_MIN_BIOS 8192 +#define DMZ_MAX_DEVS 2 + /* * Zone BIO context. */ @@ -38,7 +40,7 @@ struct dm_chunk_work { * Target descriptor. */ struct dmz_target { - struct dm_dev *ddev; + struct dm_dev *ddev[DMZ_MAX_DEVS]; unsigned long flags; @@ -81,7 +83,7 @@ static inline void dmz_bio_endio(struct bio *bio, blk_status_t status) if (status != BLK_STS_OK && bio->bi_status == BLK_STS_OK) bio->bi_status = status; - if (bio->bi_status != BLK_STS_OK) + if (bioctx->dev && bio->bi_status != BLK_STS_OK) bioctx->dev->flags |= DMZ_CHECK_BDEV; if (refcount_dec_and_test(&bioctx->ref)) { @@ -690,60 +692,64 @@ static int dmz_map(struct dm_target *ti, struct bio *bio) /* * Get zoned device information. */ -static int dmz_get_zoned_device(struct dm_target *ti, char *path) +static int dmz_get_zoned_device(struct dm_target *ti, char *path, + int idx, int nr_devs) { struct dmz_target *dmz = ti->private; - struct request_queue *q; + struct dm_dev *ddev; struct dmz_dev *dev; - sector_t aligned_capacity; int ret; + struct block_device *bdev; /* Get the target device */ - ret = dm_get_device(ti, path, dm_table_get_mode(ti->table), &dmz->ddev); + ret = dm_get_device(ti, path, dm_table_get_mode(ti->table), &ddev); if (ret) { ti->error = "Get target device failed"; - dmz->ddev = NULL; return ret; } - dev = kzalloc(sizeof(struct dmz_dev), GFP_KERNEL); - if (!dev) { - ret = -ENOMEM; - goto err; + bdev = ddev->bdev; + if (bdev_zoned_model(bdev) == BLK_ZONED_NONE) { + if (nr_devs == 1) { + ti->error = "Invalid regular device"; + goto err; + } + if (idx != 0) { + ti->error = "First device must be a regular device"; + goto err; + } + if (dmz->ddev[0]) { + ti->error = "Too many regular devices"; + goto err; + } + dev = &dmz->dev[idx]; + dev->flags = DMZ_BDEV_REGULAR; + } else { + if (dmz->ddev[idx]) { + ti->error = "Too many zoned devices"; + goto err; + } + if (nr_devs > 1 && idx == 0) { + ti->error = "First device must be a regular device"; + goto err; + } + dev = &dmz->dev[idx]; } - - dev->bdev = dmz->ddev->bdev; + dev->bdev = bdev; (void)bdevname(dev->bdev, dev->name); - if (bdev_zoned_model(dev->bdev) == BLK_ZONED_NONE) { - ti->error = "Not a zoned block device"; - ret = -EINVAL; - goto err; - } - - q = bdev_get_queue(dev->bdev); - dev->capacity = i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT; - aligned_capacity = dev->capacity & - ~((sector_t)blk_queue_zone_sectors(q) - 1); - if (ti->begin || - ((ti->len != dev->capacity) && (ti->len != aligned_capacity))) { - ti->error = "Partial mapping not supported"; - ret = -EINVAL; + dev->capacity = i_size_read(bdev->bd_inode) >> SECTOR_SHIFT; + if (ti->begin) { + ti->error = "Partial mapping is not supported"; goto err; } - dev->zone_nr_sectors = blk_queue_zone_sectors(q); - - dev->nr_zones = blkdev_nr_zones(dev->bdev->bd_disk); - - dmz->dev = dev; + dmz->ddev[idx] = ddev; return 0; err: - dm_put_device(ti, dmz->ddev); - kfree(dev); - - return ret; + dm_put_device(ti, ddev); + return -EINVAL; } /* @@ -752,10 +758,56 @@ err: static void dmz_put_zoned_device(struct dm_target *ti) { struct dmz_target *dmz = ti->private; + int i; - dm_put_device(ti, dmz->ddev); - kfree(dmz->dev); - dmz->dev = NULL; + for (i = 0; i < DMZ_MAX_DEVS; i++) { + if (dmz->ddev[i]) { + dm_put_device(ti, dmz->ddev[i]); + dmz->ddev[i] = NULL; + } + } +} + +static int dmz_fixup_devices(struct dm_target *ti) +{ + struct dmz_target *dmz = ti->private; + struct dmz_dev *reg_dev, *zoned_dev; + struct request_queue *q; + + /* + * When we have two devices, the first one must be a regular block + * device and the second a zoned block device. + */ + if (dmz->ddev[0] && dmz->ddev[1]) { + reg_dev = &dmz->dev[0]; + if (!(reg_dev->flags & DMZ_BDEV_REGULAR)) { + ti->error = "Primary disk is not a regular device"; + return -EINVAL; + } + zoned_dev = &dmz->dev[1]; + if (zoned_dev->flags & DMZ_BDEV_REGULAR) { + ti->error = "Secondary disk is not a zoned device"; + return -EINVAL; + } + } else { + reg_dev = NULL; + zoned_dev = &dmz->dev[0]; + if (zoned_dev->flags & DMZ_BDEV_REGULAR) { + ti->error = "Disk is not a zoned device"; + return -EINVAL; + } + } + q = bdev_get_queue(zoned_dev->bdev); + zoned_dev->zone_nr_sectors = blk_queue_zone_sectors(q); + zoned_dev->nr_zones = blkdev_nr_zones(zoned_dev->bdev->bd_disk); + + if (reg_dev) { + reg_dev->zone_nr_sectors = zoned_dev->zone_nr_sectors; + reg_dev->nr_zones = DIV_ROUND_UP(reg_dev->capacity, + reg_dev->zone_nr_sectors); + zoned_dev->zone_offset = reg_dev->nr_zones; + } + return 0; } /* @@ -764,11 +816,10 @@ static void dmz_put_zoned_device(struct dm_target *ti) static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv) { struct dmz_target *dmz; - struct dmz_dev *dev; int ret; /* Check arguments */ - if (argc != 1) { + if (argc < 1 || argc > 2) { ti->error = "Invalid argument count"; return -EINVAL; } @@ -779,18 +830,34 @@ static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv) ti->error = "Unable to allocate the zoned target descriptor"; return -ENOMEM; } + dmz->dev = kcalloc(2, sizeof(struct dmz_dev), GFP_KERNEL); + if (!dmz->dev) { + ti->error = "Unable to allocate the zoned device descriptors"; + kfree(dmz); + return -ENOMEM; + } ti->private = dmz; /* Get the target zoned block device */ - ret = dmz_get_zoned_device(ti, argv[0]); + ret = dmz_get_zoned_device(ti, argv[0], 0, argc); + if (ret) + goto err; + + if (argc == 2) { + ret = dmz_get_zoned_device(ti, argv[1], 1, argc); + if (ret) { + dmz_put_zoned_device(ti); + goto err; + } + } + ret = dmz_fixup_devices(ti); if (ret) { - dmz->ddev = NULL; + dmz_put_zoned_device(ti); goto err; } /* Initialize metadata */ - dev = dmz->dev; - ret = dmz_ctr_metadata(dev, &dmz->metadata, + ret = dmz_ctr_metadata(dmz->dev, argc, &dmz->metadata, dm_table_device_name(ti->table)); if (ret) { ti->error = "Metadata initialization failed"; @@ -867,6 +934,7 @@ err_meta: err_dev: dmz_put_zoned_device(ti); err: + kfree(dmz->dev); kfree(dmz); return ret; @@ -897,6 +965,7 @@ static void dmz_dtr(struct dm_target *ti) mutex_destroy(&dmz->chunk_lock); + kfree(dmz->dev); kfree(dmz); } @@ -971,10 +1040,17 @@ static int dmz_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, void *data) { struct dmz_target *dmz = ti->private; - struct dmz_dev *dev = dmz->dev; - sector_t capacity = dev->capacity & ~(dmz_zone_nr_sectors(dmz->metadata) - 1); - - return fn(ti, dmz->ddev, 0, capacity, data); + unsigned int zone_nr_sectors = dmz_zone_nr_sectors(dmz->metadata); + sector_t capacity; + int r; + + capacity = dmz->dev[0].capacity & ~(zone_nr_sectors - 1); + r = fn(ti, dmz->ddev[0], 0, capacity, data); + if (!r && dmz->ddev[1]) { + capacity = dmz->dev[1].capacity & ~(zone_nr_sectors - 1); + r = fn(ti, dmz->ddev[1], 0, capacity, data); + } + return r; } static void dmz_status(struct dm_target *ti, status_type_t type, @@ -984,6 +1060,7 @@ static void dmz_status(struct dm_target *ti, status_type_t type, struct dmz_target *dmz = ti->private; ssize_t sz = 0; char buf[BDEVNAME_SIZE]; + struct dmz_dev *dev; switch (type) { case STATUSTYPE_INFO: @@ -995,8 +1072,14 @@ static void dmz_status(struct dm_target *ti, status_type_t type, dmz_nr_seq_zones(dmz->metadata)); break; case STATUSTYPE_TABLE: - format_dev_t(buf, dmz->dev->bdev->bd_dev); + dev = &dmz->dev[0]; + format_dev_t(buf, dev->bdev->bd_dev); DMEMIT("%s", buf); + if (dmz->dev[1].bdev) { + dev = &dmz->dev[1]; + format_dev_t(buf, dev->bdev->bd_dev); + DMEMIT(" %s", buf); + } break; } return; @@ -1018,7 +1101,7 @@ static int dmz_message(struct dm_target *ti, unsigned int argc, char **argv, static struct target_type dmz_type = { .name = "zoned", - .version = {1, 1, 0}, + .version = {2, 0, 0}, .features = DM_TARGET_SINGLETON | DM_TARGET_ZONED_HM, .module = THIS_MODULE, .ctr = dmz_ctr, diff --git a/drivers/md/dm-zoned.h b/drivers/md/dm-zoned.h index 2629bd51fa26..4971a765be55 100644 --- a/drivers/md/dm-zoned.h +++ b/drivers/md/dm-zoned.h @@ -52,10 +52,12 @@ struct dmz_dev { struct block_device *bdev; char name[BDEVNAME_SIZE]; + uuid_t uuid; sector_t capacity; unsigned int nr_zones; + unsigned int zone_offset; unsigned int flags; @@ -69,6 +71,7 @@ struct dmz_dev { /* Device flags. */ #define DMZ_BDEV_DYING (1 << 0) #define DMZ_CHECK_BDEV (2 << 0) +#define DMZ_BDEV_REGULAR (4 << 0) /* * Zone descriptor. @@ -163,8 +166,8 @@ struct dmz_reclaim; /* * Functions defined in dm-zoned-metadata.c */ -int dmz_ctr_metadata(struct dmz_dev *dev, struct dmz_metadata **zmd, - const char *devname); +int dmz_ctr_metadata(struct dmz_dev *dev, int num_dev, + struct dmz_metadata **zmd, const char *devname); void dmz_dtr_metadata(struct dmz_metadata *zmd); int dmz_resume_metadata(struct dmz_metadata *zmd); |