diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-05-17 01:55:48 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-05-17 01:55:48 +0300 |
commit | 311f71281ff4b24f86a39c60c959f485c68a6d36 (patch) | |
tree | 05983f559c3e7eb7fc2e0cdab5d14e2ecaf1bf5a /drivers/md/dm-dust.c | |
parent | 7878c231dae05bae9dcf2ad4d309f02e51625033 (diff) | |
parent | 8454fca4f53bbe5e0a71613192674c8ce5c52318 (diff) | |
download | linux-311f71281ff4b24f86a39c60c959f485c68a6d36.tar.xz |
Merge tag 'for-5.2/dm-changes-v2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm
Pull device mapper updates from Mike Snitzer:
- Improve DM snapshot target's scalability by using finer grained
locking. Requires some list_bl interface improvements.
- Add ability for DM integrity to use a bitmap mode, that tracks
regions where data and metadata are out of sync, instead of using a
journal.
- Improve DM thin provisioning target to not write metadata changes to
disk if the thin-pool and associated thin devices are merely
activated but not used. This avoids metadata corruption due to
concurrent activation of thin devices across different OS instances
(e.g. split brain scenarios, which ultimately would be avoided if
proper device filters were used -- but not having proper filtering
has proven a very common configuration mistake)
- Fix missing call to path selector type->end_io in DM multipath. This
fixes reported performance problems due to inaccurate path selector
IO accounting causing an imbalance of IO (e.g. avoiding issuing IO to
particular path due to it seemingly being heavily used).
- Fix bug in DM cache metadata's loading of its discard bitset that
could lead to all cache blocks being discarded if the very first
cache block was discarded (thankfully in practice the first cache
block is generally in use; be it FS superblock, partition table, disk
label, etc).
- Add testing-only DM dust target which simulates a device that has
failing sectors and/or read failures.
- Fix a DM init error path reference count hang that caused boot hangs
if user supplied malformed input on kernel commandline.
- Fix a couple issues with DM crypt target's logging being overly
verbose or lacking context.
- Various other small fixes to DM init, DM multipath, DM zoned, and DM
crypt.
* tag 'for-5.2/dm-changes-v2' of git://git.kernel.org/pub/scm/linux/kernel/git/device-mapper/linux-dm: (42 commits)
dm: fix a couple brace coding style issues
dm crypt: print device name in integrity error message
dm crypt: move detailed message into debug level
dm ioctl: fix hang in early create error condition
dm integrity: whitespace, coding style and dead code cleanup
dm integrity: implement synchronous mode for reboot handling
dm integrity: handle machine reboot in bitmap mode
dm integrity: add a bitmap mode
dm integrity: introduce a function add_new_range_and_wait()
dm integrity: allow large ranges to be described
dm ingerity: pass size to dm_integrity_alloc_page_list()
dm integrity: introduce rw_journal_sectors()
dm integrity: update documentation
dm integrity: don't report unused options
dm integrity: don't check null pointer before kvfree and vfree
dm integrity: correctly calculate the size of metadata area
dm dust: Make dm_dust_init and dm_dust_exit static
dm dust: remove redundant unsigned comparison to less than zero
dm mpath: always free attached_handler_name in parse_path()
dm init: fix max devices/targets checks
...
Diffstat (limited to 'drivers/md/dm-dust.c')
-rw-r--r-- | drivers/md/dm-dust.c | 515 |
1 files changed, 515 insertions, 0 deletions
diff --git a/drivers/md/dm-dust.c b/drivers/md/dm-dust.c new file mode 100644 index 000000000000..845f376a72d9 --- /dev/null +++ b/drivers/md/dm-dust.c @@ -0,0 +1,515 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (c) 2018 Red Hat, Inc. + * + * This is a test "dust" device, which fails reads on specified + * sectors, emulating the behavior of a hard disk drive sending + * a "Read Medium Error" sense. + * + */ + +#include <linux/device-mapper.h> +#include <linux/module.h> +#include <linux/rbtree.h> + +#define DM_MSG_PREFIX "dust" + +struct badblock { + struct rb_node node; + sector_t bb; +}; + +struct dust_device { + struct dm_dev *dev; + struct rb_root badblocklist; + unsigned long long badblock_count; + spinlock_t dust_lock; + unsigned int blksz; + unsigned int sect_per_block; + sector_t start; + bool fail_read_on_bb:1; + bool quiet_mode:1; +}; + +static struct badblock *dust_rb_search(struct rb_root *root, sector_t blk) +{ + struct rb_node *node = root->rb_node; + + while (node) { + struct badblock *bblk = rb_entry(node, struct badblock, node); + + if (bblk->bb > blk) + node = node->rb_left; + else if (bblk->bb < blk) + node = node->rb_right; + else + return bblk; + } + + return NULL; +} + +static bool dust_rb_insert(struct rb_root *root, struct badblock *new) +{ + struct badblock *bblk; + struct rb_node **link = &root->rb_node, *parent = NULL; + sector_t value = new->bb; + + while (*link) { + parent = *link; + bblk = rb_entry(parent, struct badblock, node); + + if (bblk->bb > value) + link = &(*link)->rb_left; + else if (bblk->bb < value) + link = &(*link)->rb_right; + else + return false; + } + + rb_link_node(&new->node, parent, link); + rb_insert_color(&new->node, root); + + return true; +} + +static int dust_remove_block(struct dust_device *dd, unsigned long long block) +{ + struct badblock *bblock; + unsigned long flags; + + spin_lock_irqsave(&dd->dust_lock, flags); + bblock = dust_rb_search(&dd->badblocklist, block * dd->sect_per_block); + + if (bblock == NULL) { + if (!dd->quiet_mode) { + DMERR("%s: block %llu not found in badblocklist", + __func__, block); + } + spin_unlock_irqrestore(&dd->dust_lock, flags); + return -EINVAL; + } + + rb_erase(&bblock->node, &dd->badblocklist); + dd->badblock_count--; + if (!dd->quiet_mode) + DMINFO("%s: badblock removed at block %llu", __func__, block); + kfree(bblock); + spin_unlock_irqrestore(&dd->dust_lock, flags); + + return 0; +} + +static int dust_add_block(struct dust_device *dd, unsigned long long block) +{ + struct badblock *bblock; + unsigned long flags; + + bblock = kmalloc(sizeof(*bblock), GFP_KERNEL); + if (bblock == NULL) { + if (!dd->quiet_mode) + DMERR("%s: badblock allocation failed", __func__); + return -ENOMEM; + } + + spin_lock_irqsave(&dd->dust_lock, flags); + bblock->bb = block * dd->sect_per_block; + if (!dust_rb_insert(&dd->badblocklist, bblock)) { + if (!dd->quiet_mode) { + DMERR("%s: block %llu already in badblocklist", + __func__, block); + } + spin_unlock_irqrestore(&dd->dust_lock, flags); + kfree(bblock); + return -EINVAL; + } + + dd->badblock_count++; + if (!dd->quiet_mode) + DMINFO("%s: badblock added at block %llu", __func__, block); + spin_unlock_irqrestore(&dd->dust_lock, flags); + + return 0; +} + +static int dust_query_block(struct dust_device *dd, unsigned long long block) +{ + struct badblock *bblock; + unsigned long flags; + + spin_lock_irqsave(&dd->dust_lock, flags); + bblock = dust_rb_search(&dd->badblocklist, block * dd->sect_per_block); + if (bblock != NULL) + DMINFO("%s: block %llu found in badblocklist", __func__, block); + else + DMINFO("%s: block %llu not found in badblocklist", __func__, block); + spin_unlock_irqrestore(&dd->dust_lock, flags); + + return 0; +} + +static int __dust_map_read(struct dust_device *dd, sector_t thisblock) +{ + struct badblock *bblk = dust_rb_search(&dd->badblocklist, thisblock); + + if (bblk) + return DM_MAPIO_KILL; + + return DM_MAPIO_REMAPPED; +} + +static int dust_map_read(struct dust_device *dd, sector_t thisblock, + bool fail_read_on_bb) +{ + unsigned long flags; + int ret = DM_MAPIO_REMAPPED; + + if (fail_read_on_bb) { + spin_lock_irqsave(&dd->dust_lock, flags); + ret = __dust_map_read(dd, thisblock); + spin_unlock_irqrestore(&dd->dust_lock, flags); + } + + return ret; +} + +static void __dust_map_write(struct dust_device *dd, sector_t thisblock) +{ + struct badblock *bblk = dust_rb_search(&dd->badblocklist, thisblock); + + if (bblk) { + rb_erase(&bblk->node, &dd->badblocklist); + dd->badblock_count--; + kfree(bblk); + if (!dd->quiet_mode) { + sector_div(thisblock, dd->sect_per_block); + DMINFO("block %llu removed from badblocklist by write", + (unsigned long long)thisblock); + } + } +} + +static int dust_map_write(struct dust_device *dd, sector_t thisblock, + bool fail_read_on_bb) +{ + unsigned long flags; + + if (fail_read_on_bb) { + spin_lock_irqsave(&dd->dust_lock, flags); + __dust_map_write(dd, thisblock); + spin_unlock_irqrestore(&dd->dust_lock, flags); + } + + return DM_MAPIO_REMAPPED; +} + +static int dust_map(struct dm_target *ti, struct bio *bio) +{ + struct dust_device *dd = ti->private; + int ret; + + bio_set_dev(bio, dd->dev->bdev); + bio->bi_iter.bi_sector = dd->start + dm_target_offset(ti, bio->bi_iter.bi_sector); + + if (bio_data_dir(bio) == READ) + ret = dust_map_read(dd, bio->bi_iter.bi_sector, dd->fail_read_on_bb); + else + ret = dust_map_write(dd, bio->bi_iter.bi_sector, dd->fail_read_on_bb); + + return ret; +} + +static bool __dust_clear_badblocks(struct rb_root *tree, + unsigned long long count) +{ + struct rb_node *node = NULL, *nnode = NULL; + + nnode = rb_first(tree); + if (nnode == NULL) { + BUG_ON(count != 0); + return false; + } + + while (nnode) { + node = nnode; + nnode = rb_next(node); + rb_erase(node, tree); + count--; + kfree(node); + } + BUG_ON(count != 0); + BUG_ON(tree->rb_node != NULL); + + return true; +} + +static int dust_clear_badblocks(struct dust_device *dd) +{ + unsigned long flags; + struct rb_root badblocklist; + unsigned long long badblock_count; + + spin_lock_irqsave(&dd->dust_lock, flags); + badblocklist = dd->badblocklist; + badblock_count = dd->badblock_count; + dd->badblocklist = RB_ROOT; + dd->badblock_count = 0; + spin_unlock_irqrestore(&dd->dust_lock, flags); + + if (!__dust_clear_badblocks(&badblocklist, badblock_count)) + DMINFO("%s: no badblocks found", __func__); + else + DMINFO("%s: badblocks cleared", __func__); + + return 0; +} + +/* + * Target parameters: + * + * <device_path> <offset> <blksz> + * + * device_path: path to the block device + * offset: offset to data area from start of device_path + * blksz: block size (minimum 512, maximum 1073741824, must be a power of 2) + */ +static int dust_ctr(struct dm_target *ti, unsigned int argc, char **argv) +{ + struct dust_device *dd; + unsigned long long tmp; + char dummy; + unsigned int blksz; + unsigned int sect_per_block; + sector_t DUST_MAX_BLKSZ_SECTORS = 2097152; + sector_t max_block_sectors = min(ti->len, DUST_MAX_BLKSZ_SECTORS); + + if (argc != 3) { + ti->error = "Invalid argument count"; + return -EINVAL; + } + + if (kstrtouint(argv[2], 10, &blksz) || !blksz) { + ti->error = "Invalid block size parameter"; + return -EINVAL; + } + + if (blksz < 512) { + ti->error = "Block size must be at least 512"; + return -EINVAL; + } + + if (!is_power_of_2(blksz)) { + ti->error = "Block size must be a power of 2"; + return -EINVAL; + } + + if (to_sector(blksz) > max_block_sectors) { + ti->error = "Block size is too large"; + return -EINVAL; + } + + sect_per_block = (blksz >> SECTOR_SHIFT); + + if (sscanf(argv[1], "%llu%c", &tmp, &dummy) != 1 || tmp != (sector_t)tmp) { + ti->error = "Invalid device offset sector"; + return -EINVAL; + } + + dd = kzalloc(sizeof(struct dust_device), GFP_KERNEL); + if (dd == NULL) { + ti->error = "Cannot allocate context"; + return -ENOMEM; + } + + if (dm_get_device(ti, argv[0], dm_table_get_mode(ti->table), &dd->dev)) { + ti->error = "Device lookup failed"; + kfree(dd); + return -EINVAL; + } + + dd->sect_per_block = sect_per_block; + dd->blksz = blksz; + dd->start = tmp; + + /* + * Whether to fail a read on a "bad" block. + * Defaults to false; enabled later by message. + */ + dd->fail_read_on_bb = false; + + /* + * Initialize bad block list rbtree. + */ + dd->badblocklist = RB_ROOT; + dd->badblock_count = 0; + spin_lock_init(&dd->dust_lock); + + dd->quiet_mode = false; + + BUG_ON(dm_set_target_max_io_len(ti, dd->sect_per_block) != 0); + + ti->num_discard_bios = 1; + ti->num_flush_bios = 1; + ti->private = dd; + + return 0; +} + +static void dust_dtr(struct dm_target *ti) +{ + struct dust_device *dd = ti->private; + + __dust_clear_badblocks(&dd->badblocklist, dd->badblock_count); + dm_put_device(ti, dd->dev); + kfree(dd); +} + +static int dust_message(struct dm_target *ti, unsigned int argc, char **argv, + char *result_buf, unsigned int maxlen) +{ + struct dust_device *dd = ti->private; + sector_t size = i_size_read(dd->dev->bdev->bd_inode) >> SECTOR_SHIFT; + bool invalid_msg = false; + int result = -EINVAL; + unsigned long long tmp, block; + unsigned long flags; + char dummy; + + if (argc == 1) { + if (!strcasecmp(argv[0], "addbadblock") || + !strcasecmp(argv[0], "removebadblock") || + !strcasecmp(argv[0], "queryblock")) { + DMERR("%s requires an additional argument", argv[0]); + } else if (!strcasecmp(argv[0], "disable")) { + DMINFO("disabling read failures on bad sectors"); + dd->fail_read_on_bb = false; + result = 0; + } else if (!strcasecmp(argv[0], "enable")) { + DMINFO("enabling read failures on bad sectors"); + dd->fail_read_on_bb = true; + result = 0; + } else if (!strcasecmp(argv[0], "countbadblocks")) { + spin_lock_irqsave(&dd->dust_lock, flags); + DMINFO("countbadblocks: %llu badblock(s) found", + dd->badblock_count); + spin_unlock_irqrestore(&dd->dust_lock, flags); + result = 0; + } else if (!strcasecmp(argv[0], "clearbadblocks")) { + result = dust_clear_badblocks(dd); + } else if (!strcasecmp(argv[0], "quiet")) { + if (!dd->quiet_mode) + dd->quiet_mode = true; + else + dd->quiet_mode = false; + result = 0; + } else { + invalid_msg = true; + } + } else if (argc == 2) { + if (sscanf(argv[1], "%llu%c", &tmp, &dummy) != 1) + return result; + + block = tmp; + sector_div(size, dd->sect_per_block); + if (block > size) { + DMERR("selected block value out of range"); + return result; + } + + if (!strcasecmp(argv[0], "addbadblock")) + result = dust_add_block(dd, block); + else if (!strcasecmp(argv[0], "removebadblock")) + result = dust_remove_block(dd, block); + else if (!strcasecmp(argv[0], "queryblock")) + result = dust_query_block(dd, block); + else + invalid_msg = true; + + } else + DMERR("invalid number of arguments '%d'", argc); + + if (invalid_msg) + DMERR("unrecognized message '%s' received", argv[0]); + + return result; +} + +static void dust_status(struct dm_target *ti, status_type_t type, + unsigned int status_flags, char *result, unsigned int maxlen) +{ + struct dust_device *dd = ti->private; + unsigned int sz = 0; + + switch (type) { + case STATUSTYPE_INFO: + DMEMIT("%s %s %s", dd->dev->name, + dd->fail_read_on_bb ? "fail_read_on_bad_block" : "bypass", + dd->quiet_mode ? "quiet" : "verbose"); + break; + + case STATUSTYPE_TABLE: + DMEMIT("%s %llu %u", dd->dev->name, + (unsigned long long)dd->start, dd->blksz); + break; + } +} + +static int dust_prepare_ioctl(struct dm_target *ti, struct block_device **bdev) +{ + struct dust_device *dd = ti->private; + struct dm_dev *dev = dd->dev; + + *bdev = dev->bdev; + + /* + * Only pass ioctls through if the device sizes match exactly. + */ + if (dd->start || + ti->len != i_size_read(dev->bdev->bd_inode) >> SECTOR_SHIFT) + return 1; + + return 0; +} + +static int dust_iterate_devices(struct dm_target *ti, iterate_devices_callout_fn fn, + void *data) +{ + struct dust_device *dd = ti->private; + + return fn(ti, dd->dev, dd->start, ti->len, data); +} + +static struct target_type dust_target = { + .name = "dust", + .version = {1, 0, 0}, + .module = THIS_MODULE, + .ctr = dust_ctr, + .dtr = dust_dtr, + .iterate_devices = dust_iterate_devices, + .map = dust_map, + .message = dust_message, + .status = dust_status, + .prepare_ioctl = dust_prepare_ioctl, +}; + +static int __init dm_dust_init(void) +{ + int result = dm_register_target(&dust_target); + + if (result < 0) + DMERR("dm_register_target failed %d", result); + + return result; +} + +static void __exit dm_dust_exit(void) +{ + dm_unregister_target(&dust_target); +} + +module_init(dm_dust_init); +module_exit(dm_dust_exit); + +MODULE_DESCRIPTION(DM_NAME " dust test target"); +MODULE_AUTHOR("Bryan Gurney <dm-devel@redhat.com>"); +MODULE_LICENSE("GPL"); |