summaryrefslogtreecommitdiff
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/Kconfig12
-rw-r--r--drivers/md/Makefile4
-rw-r--r--drivers/md/dm-cache-target.c127
-rw-r--r--drivers/md/dm-init.c303
-rw-r--r--drivers/md/dm-integrity.c10
-rw-r--r--drivers/md/dm-ioctl.c103
-rw-r--r--drivers/md/dm-raid.c14
-rw-r--r--drivers/md/dm-rq.c16
-rw-r--r--drivers/md/dm-rq.h16
-rw-r--r--drivers/md/dm-snap.c8
-rw-r--r--drivers/md/dm-switch.c3
-rw-r--r--drivers/md/dm-thin.c14
-rw-r--r--drivers/md/dm-verity-fec.c6
-rw-r--r--drivers/md/dm-writecache.c2
-rw-r--r--drivers/md/dm-zoned-target.c1
-rw-r--r--drivers/md/dm.c139
-rw-r--r--drivers/md/persistent-data/dm-block-manager.c8
17 files changed, 647 insertions, 139 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 3db222509e44..2557f198e175 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -436,6 +436,18 @@ config DM_DELAY
If unsure, say N.
+config DM_INIT
+ bool "DM \"dm-mod.create=\" parameter support"
+ depends on BLK_DEV_DM=y
+ ---help---
+ Enable "dm-mod.create=" parameter to create mapped devices at init time.
+ This option is useful to allow mounting rootfs without requiring an
+ initramfs.
+ See Documentation/device-mapper/dm-init.txt for dm-mod.create="..."
+ format.
+
+ If unsure, say N.
+
config DM_UEVENT
bool "DM uevents"
depends on BLK_DEV_DM
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index 822f4e8753bc..a52b703e588e 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -69,6 +69,10 @@ obj-$(CONFIG_DM_INTEGRITY) += dm-integrity.o
obj-$(CONFIG_DM_ZONED) += dm-zoned.o
obj-$(CONFIG_DM_WRITECACHE) += dm-writecache.o
+ifeq ($(CONFIG_DM_INIT),y)
+dm-mod-objs += dm-init.o
+endif
+
ifeq ($(CONFIG_DM_UEVENT),y)
dm-mod-objs += dm-uevent.o
endif
diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c
index b29a8327eed1..d249cf8ac277 100644
--- a/drivers/md/dm-cache-target.c
+++ b/drivers/md/dm-cache-target.c
@@ -353,6 +353,7 @@ struct cache_features {
enum cache_metadata_mode mode;
enum cache_io_mode io_mode;
unsigned metadata_version;
+ bool discard_passdown:1;
};
struct cache_stats {
@@ -1899,7 +1900,11 @@ static bool process_discard_bio(struct cache *cache, struct bio *bio)
b = to_dblock(from_dblock(b) + 1);
}
- bio_endio(bio);
+ if (cache->features.discard_passdown) {
+ remap_to_origin(cache, bio);
+ generic_make_request(bio);
+ } else
+ bio_endio(bio);
return false;
}
@@ -2233,13 +2238,14 @@ static void init_features(struct cache_features *cf)
cf->mode = CM_WRITE;
cf->io_mode = CM_IO_WRITEBACK;
cf->metadata_version = 1;
+ cf->discard_passdown = true;
}
static int parse_features(struct cache_args *ca, struct dm_arg_set *as,
char **error)
{
static const struct dm_arg _args[] = {
- {0, 2, "Invalid number of cache feature arguments"},
+ {0, 3, "Invalid number of cache feature arguments"},
};
int r, mode_ctr = 0;
@@ -2274,6 +2280,9 @@ static int parse_features(struct cache_args *ca, struct dm_arg_set *as,
else if (!strcasecmp(arg, "metadata2"))
cf->metadata_version = 2;
+ else if (!strcasecmp(arg, "no_discard_passdown"))
+ cf->discard_passdown = false;
+
else {
*error = "Unrecognised cache feature requested";
return -EINVAL;
@@ -2496,7 +2505,6 @@ static int cache_create(struct cache_args *ca, struct cache **result)
ti->num_discard_bios = 1;
ti->discards_supported = true;
- ti->split_discard_bios = false;
ti->per_io_data_size = sizeof(struct per_bio_data);
@@ -3120,6 +3128,39 @@ static void cache_resume(struct dm_target *ti)
do_waker(&cache->waker.work);
}
+static void emit_flags(struct cache *cache, char *result,
+ unsigned maxlen, ssize_t *sz_ptr)
+{
+ ssize_t sz = *sz_ptr;
+ struct cache_features *cf = &cache->features;
+ unsigned count = (cf->metadata_version == 2) + !cf->discard_passdown + 1;
+
+ DMEMIT("%u ", count);
+
+ if (cf->metadata_version == 2)
+ DMEMIT("metadata2 ");
+
+ if (writethrough_mode(cache))
+ DMEMIT("writethrough ");
+
+ else if (passthrough_mode(cache))
+ DMEMIT("passthrough ");
+
+ else if (writeback_mode(cache))
+ DMEMIT("writeback ");
+
+ else {
+ DMEMIT("unknown ");
+ DMERR("%s: internal error: unknown io mode: %d",
+ cache_device_name(cache), (int) cf->io_mode);
+ }
+
+ if (!cf->discard_passdown)
+ DMEMIT("no_discard_passdown ");
+
+ *sz_ptr = sz;
+}
+
/*
* Status format:
*
@@ -3186,25 +3227,7 @@ static void cache_status(struct dm_target *ti, status_type_t type,
(unsigned) atomic_read(&cache->stats.promotion),
(unsigned long) atomic_read(&cache->nr_dirty));
- if (cache->features.metadata_version == 2)
- DMEMIT("2 metadata2 ");
- else
- DMEMIT("1 ");
-
- if (writethrough_mode(cache))
- DMEMIT("writethrough ");
-
- else if (passthrough_mode(cache))
- DMEMIT("passthrough ");
-
- else if (writeback_mode(cache))
- DMEMIT("writeback ");
-
- else {
- DMERR("%s: internal error: unknown io mode: %d",
- cache_device_name(cache), (int) cache->features.io_mode);
- goto err;
- }
+ emit_flags(cache, result, maxlen, &sz);
DMEMIT("2 migration_threshold %llu ", (unsigned long long) cache->migration_threshold);
@@ -3433,14 +3456,62 @@ static int cache_iterate_devices(struct dm_target *ti,
return r;
}
+static bool origin_dev_supports_discard(struct block_device *origin_bdev)
+{
+ struct request_queue *q = bdev_get_queue(origin_bdev);
+
+ return q && blk_queue_discard(q);
+}
+
+/*
+ * If discard_passdown was enabled verify that the origin device
+ * supports discards. Disable discard_passdown if not.
+ */
+static void disable_passdown_if_not_supported(struct cache *cache)
+{
+ struct block_device *origin_bdev = cache->origin_dev->bdev;
+ struct queue_limits *origin_limits = &bdev_get_queue(origin_bdev)->limits;
+ const char *reason = NULL;
+ char buf[BDEVNAME_SIZE];
+
+ if (!cache->features.discard_passdown)
+ return;
+
+ if (!origin_dev_supports_discard(origin_bdev))
+ reason = "discard unsupported";
+
+ else if (origin_limits->max_discard_sectors < cache->sectors_per_block)
+ reason = "max discard sectors smaller than a block";
+
+ if (reason) {
+ DMWARN("Origin device (%s) %s: Disabling discard passdown.",
+ bdevname(origin_bdev, buf), reason);
+ cache->features.discard_passdown = false;
+ }
+}
+
static void set_discard_limits(struct cache *cache, struct queue_limits *limits)
{
+ struct block_device *origin_bdev = cache->origin_dev->bdev;
+ struct queue_limits *origin_limits = &bdev_get_queue(origin_bdev)->limits;
+
+ if (!cache->features.discard_passdown) {
+ /* No passdown is done so setting own virtual limits */
+ limits->max_discard_sectors = min_t(sector_t, cache->discard_block_size * 1024,
+ cache->origin_sectors);
+ limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT;
+ return;
+ }
+
/*
- * FIXME: these limits may be incompatible with the cache device
+ * cache_iterate_devices() is stacking both origin and fast device limits
+ * but discards aren't passed to fast device, so inherit origin's limits.
*/
- limits->max_discard_sectors = min_t(sector_t, cache->discard_block_size * 1024,
- cache->origin_sectors);
- limits->discard_granularity = cache->discard_block_size << SECTOR_SHIFT;
+ limits->max_discard_sectors = origin_limits->max_discard_sectors;
+ limits->max_hw_discard_sectors = origin_limits->max_hw_discard_sectors;
+ limits->discard_granularity = origin_limits->discard_granularity;
+ limits->discard_alignment = origin_limits->discard_alignment;
+ limits->discard_misaligned = origin_limits->discard_misaligned;
}
static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
@@ -3457,6 +3528,8 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
blk_limits_io_min(limits, cache->sectors_per_block << SECTOR_SHIFT);
blk_limits_io_opt(limits, cache->sectors_per_block << SECTOR_SHIFT);
}
+
+ disable_passdown_if_not_supported(cache);
set_discard_limits(cache, limits);
}
@@ -3464,7 +3537,7 @@ static void cache_io_hints(struct dm_target *ti, struct queue_limits *limits)
static struct target_type cache_target = {
.name = "cache",
- .version = {2, 0, 0},
+ .version = {2, 1, 0},
.module = THIS_MODULE,
.ctr = cache_ctr,
.dtr = cache_dtr,
diff --git a/drivers/md/dm-init.c b/drivers/md/dm-init.c
new file mode 100644
index 000000000000..b53f30f16b4d
--- /dev/null
+++ b/drivers/md/dm-init.c
@@ -0,0 +1,303 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * dm-init.c
+ * Copyright (C) 2017 The Chromium OS Authors <chromium-os-dev@chromium.org>
+ *
+ * This file is released under the GPLv2.
+ */
+
+#include <linux/ctype.h>
+#include <linux/device.h>
+#include <linux/device-mapper.h>
+#include <linux/init.h>
+#include <linux/list.h>
+#include <linux/moduleparam.h>
+
+#define DM_MSG_PREFIX "init"
+#define DM_MAX_DEVICES 256
+#define DM_MAX_TARGETS 256
+#define DM_MAX_STR_SIZE 4096
+
+static char *create;
+
+/*
+ * Format: dm-mod.create=<name>,<uuid>,<minor>,<flags>,<table>[,<table>+][;<name>,<uuid>,<minor>,<flags>,<table>[,<table>+]+]
+ * Table format: <start_sector> <num_sectors> <target_type> <target_args>
+ *
+ * See Documentation/device-mapper/dm-init.txt for dm-mod.create="..." format
+ * details.
+ */
+
+struct dm_device {
+ struct dm_ioctl dmi;
+ struct dm_target_spec *table[DM_MAX_TARGETS];
+ char *target_args_array[DM_MAX_TARGETS];
+ struct list_head list;
+};
+
+const char *dm_allowed_targets[] __initconst = {
+ "crypt",
+ "delay",
+ "linear",
+ "snapshot-origin",
+ "striped",
+ "verity",
+};
+
+static int __init dm_verify_target_type(const char *target)
+{
+ unsigned int i;
+
+ for (i = 0; i < ARRAY_SIZE(dm_allowed_targets); i++) {
+ if (!strcmp(dm_allowed_targets[i], target))
+ return 0;
+ }
+ return -EINVAL;
+}
+
+static void __init dm_setup_cleanup(struct list_head *devices)
+{
+ struct dm_device *dev, *tmp;
+ unsigned int i;
+
+ list_for_each_entry_safe(dev, tmp, devices, list) {
+ list_del(&dev->list);
+ for (i = 0; i < dev->dmi.target_count; i++) {
+ kfree(dev->table[i]);
+ kfree(dev->target_args_array[i]);
+ }
+ kfree(dev);
+ }
+}
+
+/**
+ * str_field_delimit - delimit a string based on a separator char.
+ * @str: the pointer to the string to delimit.
+ * @separator: char that delimits the field
+ *
+ * Find a @separator and replace it by '\0'.
+ * Remove leading and trailing spaces.
+ * Return the remainder string after the @separator.
+ */
+static char __init *str_field_delimit(char **str, char separator)
+{
+ char *s;
+
+ /* TODO: add support for escaped characters */
+ *str = skip_spaces(*str);
+ s = strchr(*str, separator);
+ /* Delimit the field and remove trailing spaces */
+ if (s)
+ *s = '\0';
+ *str = strim(*str);
+ return s ? ++s : NULL;
+}
+
+/**
+ * dm_parse_table_entry - parse a table entry
+ * @dev: device to store the parsed information.
+ * @str: the pointer to a string with the format:
+ * <start_sector> <num_sectors> <target_type> <target_args>[, ...]
+ *
+ * Return the remainder string after the table entry, i.e, after the comma which
+ * delimits the entry or NULL if reached the end of the string.
+ */
+static char __init *dm_parse_table_entry(struct dm_device *dev, char *str)
+{
+ const unsigned int n = dev->dmi.target_count - 1;
+ struct dm_target_spec *sp;
+ unsigned int i;
+ /* fields: */
+ char *field[4];
+ char *next;
+
+ field[0] = str;
+ /* Delimit first 3 fields that are separated by space */
+ for (i = 0; i < ARRAY_SIZE(field) - 1; i++) {
+ field[i + 1] = str_field_delimit(&field[i], ' ');
+ if (!field[i + 1])
+ return ERR_PTR(-EINVAL);
+ }
+ /* Delimit last field that can be terminated by comma */
+ next = str_field_delimit(&field[i], ',');
+
+ sp = kzalloc(sizeof(*sp), GFP_KERNEL);
+ if (!sp)
+ return ERR_PTR(-ENOMEM);
+ dev->table[n] = sp;
+
+ /* start_sector */
+ if (kstrtoull(field[0], 0, &sp->sector_start))
+ return ERR_PTR(-EINVAL);
+ /* num_sector */
+ if (kstrtoull(field[1], 0, &sp->length))
+ return ERR_PTR(-EINVAL);
+ /* target_type */
+ strscpy(sp->target_type, field[2], sizeof(sp->target_type));
+ if (dm_verify_target_type(sp->target_type)) {
+ DMERR("invalid type \"%s\"", sp->target_type);
+ return ERR_PTR(-EINVAL);
+ }
+ /* target_args */
+ dev->target_args_array[n] = kstrndup(field[3], GFP_KERNEL,
+ DM_MAX_STR_SIZE);
+ if (!dev->target_args_array[n])
+ return ERR_PTR(-ENOMEM);
+
+ return next;
+}
+
+/**
+ * dm_parse_table - parse "dm-mod.create=" table field
+ * @dev: device to store the parsed information.
+ * @str: the pointer to a string with the format:
+ * <table>[,<table>+]
+ */
+static int __init dm_parse_table(struct dm_device *dev, char *str)
+{
+ char *table_entry = str;
+
+ while (table_entry) {
+ DMDEBUG("parsing table \"%s\"", str);
+ if (++dev->dmi.target_count >= DM_MAX_TARGETS) {
+ DMERR("too many targets %u > %d",
+ dev->dmi.target_count, DM_MAX_TARGETS);
+ return -EINVAL;
+ }
+ table_entry = dm_parse_table_entry(dev, table_entry);
+ if (IS_ERR(table_entry)) {
+ DMERR("couldn't parse table");
+ return PTR_ERR(table_entry);
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * dm_parse_device_entry - parse a device entry
+ * @dev: device to store the parsed information.
+ * @str: the pointer to a string with the format:
+ * name,uuid,minor,flags,table[; ...]
+ *
+ * Return the remainder string after the table entry, i.e, after the semi-colon
+ * which delimits the entry or NULL if reached the end of the string.
+ */
+static char __init *dm_parse_device_entry(struct dm_device *dev, char *str)
+{
+ /* There are 5 fields: name,uuid,minor,flags,table; */
+ char *field[5];
+ unsigned int i;
+ char *next;
+
+ field[0] = str;
+ /* Delimit first 4 fields that are separated by comma */
+ for (i = 0; i < ARRAY_SIZE(field) - 1; i++) {
+ field[i+1] = str_field_delimit(&field[i], ',');
+ if (!field[i+1])
+ return ERR_PTR(-EINVAL);
+ }
+ /* Delimit last field that can be delimited by semi-colon */
+ next = str_field_delimit(&field[i], ';');
+
+ /* name */
+ strscpy(dev->dmi.name, field[0], sizeof(dev->dmi.name));
+ /* uuid */
+ strscpy(dev->dmi.uuid, field[1], sizeof(dev->dmi.uuid));
+ /* minor */
+ if (strlen(field[2])) {
+ if (kstrtoull(field[2], 0, &dev->dmi.dev))
+ return ERR_PTR(-EINVAL);
+ dev->dmi.flags |= DM_PERSISTENT_DEV_FLAG;
+ }
+ /* flags */
+ if (!strcmp(field[3], "ro"))
+ dev->dmi.flags |= DM_READONLY_FLAG;
+ else if (strcmp(field[3], "rw"))
+ return ERR_PTR(-EINVAL);
+ /* table */
+ if (dm_parse_table(dev, field[4]))
+ return ERR_PTR(-EINVAL);
+
+ return next;
+}
+
+/**
+ * dm_parse_devices - parse "dm-mod.create=" argument
+ * @devices: list of struct dm_device to store the parsed information.
+ * @str: the pointer to a string with the format:
+ * <device>[;<device>+]
+ */
+static int __init dm_parse_devices(struct list_head *devices, char *str)
+{
+ unsigned long ndev = 0;
+ struct dm_device *dev;
+ char *device = str;
+
+ DMDEBUG("parsing \"%s\"", str);
+ while (device) {
+ dev = kzalloc(sizeof(*dev), GFP_KERNEL);
+ if (!dev)
+ return -ENOMEM;
+ list_add_tail(&dev->list, devices);
+
+ if (++ndev >= DM_MAX_DEVICES) {
+ DMERR("too many targets %u > %d",
+ dev->dmi.target_count, DM_MAX_TARGETS);
+ return -EINVAL;
+ }
+
+ device = dm_parse_device_entry(dev, device);
+ if (IS_ERR(device)) {
+ DMERR("couldn't parse device");
+ return PTR_ERR(device);
+ }
+ }
+
+ return 0;
+}
+
+/**
+ * dm_init_init - parse "dm-mod.create=" argument and configure drivers
+ */
+static int __init dm_init_init(void)
+{
+ struct dm_device *dev;
+ LIST_HEAD(devices);
+ char *str;
+ int r;
+
+ if (!create)
+ return 0;
+
+ if (strlen(create) >= DM_MAX_STR_SIZE) {
+ DMERR("Argument is too big. Limit is %d\n", DM_MAX_STR_SIZE);
+ return -EINVAL;
+ }
+ str = kstrndup(create, GFP_KERNEL, DM_MAX_STR_SIZE);
+ if (!str)
+ return -ENOMEM;
+
+ r = dm_parse_devices(&devices, str);
+ if (r)
+ goto out;
+
+ DMINFO("waiting for all devices to be available before creating mapped devices\n");
+ wait_for_device_probe();
+
+ list_for_each_entry(dev, &devices, list) {
+ if (dm_early_create(&dev->dmi, dev->table,
+ dev->target_args_array))
+ break;
+ }
+out:
+ kfree(str);
+ dm_setup_cleanup(&devices);
+ return r;
+}
+
+late_initcall(dm_init_init);
+
+module_param(create, charp, 0);
+MODULE_PARM_DESC(create, "Create a mapped device in early boot");
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 457200ca6287..d57d997a52c8 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -1122,7 +1122,7 @@ static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, se
return r;
data = dm_bufio_read(ic->bufio, *metadata_block, &b);
- if (unlikely(IS_ERR(data)))
+ if (IS_ERR(data))
return PTR_ERR(data);
to_copy = min((1U << SECTOR_SHIFT << ic->log2_buffer_sectors) - *metadata_offset, total_size);
@@ -1368,8 +1368,8 @@ again:
checksums_ptr - checksums, !dio->write ? TAG_CMP : TAG_WRITE);
if (unlikely(r)) {
if (r > 0) {
- DMERR("Checksum failed at sector 0x%llx",
- (unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size)));
+ DMERR_LIMIT("Checksum failed at sector 0x%llx",
+ (unsigned long long)(sector - ((r + ic->tag_size - 1) / ic->tag_size)));
r = -EILSEQ;
atomic64_inc(&ic->number_of_mismatches);
}
@@ -1561,8 +1561,8 @@ retry_kmap:
integrity_sector_checksum(ic, logical_sector, mem + bv.bv_offset, checksums_onstack);
if (unlikely(memcmp(checksums_onstack, journal_entry_tag(ic, je), ic->tag_size))) {
- DMERR("Checksum failed when reading from journal, at sector 0x%llx",
- (unsigned long long)logical_sector);
+ DMERR_LIMIT("Checksum failed when reading from journal, at sector 0x%llx",
+ (unsigned long long)logical_sector);
}
}
#endif
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index f666778ad237..c740153b4e52 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -2018,3 +2018,106 @@ out:
return r;
}
+
+
+/**
+ * dm_early_create - create a mapped device in early boot.
+ *
+ * @dmi: Contains main information of the device mapping to be created.
+ * @spec_array: array of pointers to struct dm_target_spec. Describes the
+ * mapping table of the device.
+ * @target_params_array: array of strings with the parameters to a specific
+ * target.
+ *
+ * Instead of having the struct dm_target_spec and the parameters for every
+ * target embedded at the end of struct dm_ioctl (as performed in a normal
+ * ioctl), pass them as arguments, so the caller doesn't need to serialize them.
+ * The size of the spec_array and target_params_array is given by
+ * @dmi->target_count.
+ * This function is supposed to be called in early boot, so locking mechanisms
+ * to protect against concurrent loads are not required.
+ */
+int __init dm_early_create(struct dm_ioctl *dmi,
+ struct dm_target_spec **spec_array,
+ char **target_params_array)
+{
+ int r, m = DM_ANY_MINOR;
+ struct dm_table *t, *old_map;
+ struct mapped_device *md;
+ unsigned int i;
+
+ if (!dmi->target_count)
+ return -EINVAL;
+
+ r = check_name(dmi->name);
+ if (r)
+ return r;
+
+ if (dmi->flags & DM_PERSISTENT_DEV_FLAG)
+ m = MINOR(huge_decode_dev(dmi->dev));
+
+ /* alloc dm device */
+ r = dm_create(m, &md);
+ if (r)
+ return r;
+
+ /* hash insert */
+ r = dm_hash_insert(dmi->name, *dmi->uuid ? dmi->uuid : NULL, md);
+ if (r)
+ goto err_destroy_dm;
+
+ /* alloc table */
+ r = dm_table_create(&t, get_mode(dmi), dmi->target_count, md);
+ if (r)
+ goto err_destroy_dm;
+
+ /* add targets */
+ for (i = 0; i < dmi->target_count; i++) {
+ r = dm_table_add_target(t, spec_array[i]->target_type,
+ (sector_t) spec_array[i]->sector_start,
+ (sector_t) spec_array[i]->length,
+ target_params_array[i]);
+ if (r) {
+ DMWARN("error adding target to table");
+ goto err_destroy_table;
+ }
+ }
+
+ /* finish table */
+ r = dm_table_complete(t);
+ if (r)
+ goto err_destroy_table;
+
+ md->type = dm_table_get_type(t);
+ /* setup md->queue to reflect md's type (may block) */
+ r = dm_setup_md_queue(md, t);
+ if (r) {
+ DMWARN("unable to set up device queue for new table.");
+ goto err_destroy_table;
+ }
+
+ /* Set new map */
+ dm_suspend(md, 0);
+ old_map = dm_swap_table(md, t);
+ if (IS_ERR(old_map)) {
+ r = PTR_ERR(old_map);
+ goto err_destroy_table;
+ }
+ set_disk_ro(dm_disk(md), !!(dmi->flags & DM_READONLY_FLAG));
+
+ /* resume device */
+ r = dm_resume(md);
+ if (r)
+ goto err_destroy_table;
+
+ DMINFO("%s (%s) is ready", md->disk->disk_name, dmi->name);
+ dm_put(md);
+ return 0;
+
+err_destroy_table:
+ dm_table_destroy(t);
+err_destroy_dm:
+ dm_put(md);
+ dm_destroy(md);
+ return r;
+}
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index adcfe8ae10aa..9fdef6897316 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -2986,11 +2986,6 @@ static void configure_discard_support(struct raid_set *rs)
}
}
- /*
- * RAID1 and RAID10 personalities require bio splitting,
- * RAID0/4/5/6 don't and process large discard bios properly.
- */
- ti->split_discard_bios = !!(rs_is_raid1(rs) || rs_is_raid10(rs));
ti->num_discard_bios = 1;
}
@@ -3747,6 +3742,15 @@ static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits)
blk_limits_io_min(limits, chunk_size);
blk_limits_io_opt(limits, chunk_size * mddev_data_stripes(rs));
+
+ /*
+ * RAID1 and RAID10 personalities require bio splitting,
+ * RAID0/4/5/6 don't and process large discard bios properly.
+ */
+ if (rs_is_raid1(rs) || rs_is_raid10(rs)) {
+ limits->discard_granularity = chunk_size;
+ limits->max_discard_sectors = chunk_size;
+ }
}
static void raid_postsuspend(struct dm_target *ti)
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index b2c03c079a8d..09773636602d 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -12,6 +12,22 @@
#define DM_MSG_PREFIX "core-rq"
+/*
+ * One of these is allocated per request.
+ */
+struct dm_rq_target_io {
+ struct mapped_device *md;
+ struct dm_target *ti;
+ struct request *orig, *clone;
+ struct kthread_work work;
+ blk_status_t error;
+ union map_info info;
+ struct dm_stats_aux stats_aux;
+ unsigned long duration_jiffies;
+ unsigned n_sectors;
+ unsigned completed;
+};
+
#define DM_MQ_NR_HW_QUEUES 1
#define DM_MQ_QUEUE_DEPTH 2048
static unsigned dm_mq_nr_hw_queues = DM_MQ_NR_HW_QUEUES;
diff --git a/drivers/md/dm-rq.h b/drivers/md/dm-rq.h
index b39245545229..1eea0da641db 100644
--- a/drivers/md/dm-rq.h
+++ b/drivers/md/dm-rq.h
@@ -17,22 +17,6 @@
struct mapped_device;
/*
- * One of these is allocated per request.
- */
-struct dm_rq_target_io {
- struct mapped_device *md;
- struct dm_target *ti;
- struct request *orig, *clone;
- struct kthread_work work;
- blk_status_t error;
- union map_info info;
- struct dm_stats_aux stats_aux;
- unsigned long duration_jiffies;
- unsigned n_sectors;
- unsigned completed;
-};
-
-/*
* For request-based dm - the bio clones we allocate are embedded in these
* structs.
*
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 36805b12661e..a168963b757d 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -2338,13 +2338,6 @@ static int origin_map(struct dm_target *ti, struct bio *bio)
return do_origin(o->dev, bio);
}
-static long origin_dax_direct_access(struct dm_target *ti, pgoff_t pgoff,
- long nr_pages, void **kaddr, pfn_t *pfn)
-{
- DMWARN("device does not support dax.");
- return -EIO;
-}
-
/*
* Set the target "max_io_len" field to the minimum of all the snapshots'
* chunk sizes.
@@ -2404,7 +2397,6 @@ static struct target_type origin_target = {
.postsuspend = origin_postsuspend,
.status = origin_status,
.iterate_devices = origin_iterate_devices,
- .direct_access = origin_dax_direct_access,
};
static struct target_type snapshot_target = {
diff --git a/drivers/md/dm-switch.c b/drivers/md/dm-switch.c
index fae35caf3672..8a0f057b8122 100644
--- a/drivers/md/dm-switch.c
+++ b/drivers/md/dm-switch.c
@@ -61,8 +61,7 @@ static struct switch_ctx *alloc_switch_ctx(struct dm_target *ti, unsigned nr_pat
{
struct switch_ctx *sctx;
- sctx = kzalloc(sizeof(struct switch_ctx) + nr_paths * sizeof(struct switch_path),
- GFP_KERNEL);
+ sctx = kzalloc(struct_size(sctx, path_list, nr_paths), GFP_KERNEL);
if (!sctx)
return NULL;
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index e83b63608262..fcd887703f95 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -3283,6 +3283,13 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
as.argc = argc;
as.argv = argv;
+ /* make sure metadata and data are different devices */
+ if (!strcmp(argv[0], argv[1])) {
+ ti->error = "Error setting metadata or data device";
+ r = -EINVAL;
+ goto out_unlock;
+ }
+
/*
* Set default pool features.
*/
@@ -4167,6 +4174,12 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
tc->sort_bio_list = RB_ROOT;
if (argc == 3) {
+ if (!strcmp(argv[0], argv[2])) {
+ ti->error = "Error setting origin device";
+ r = -EINVAL;
+ goto bad_origin_dev;
+ }
+
r = dm_get_device(ti, argv[2], FMODE_READ, &origin_dev);
if (r) {
ti->error = "Error opening origin device";
@@ -4227,7 +4240,6 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
if (tc->pool->pf.discard_enabled) {
ti->discards_supported = true;
ti->num_discard_bios = 1;
- ti->split_discard_bios = false;
}
mutex_unlock(&dm_thin_pool_table.mutex);
diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c
index 0ce04e5b4afb..b634fa23f4c4 100644
--- a/drivers/md/dm-verity-fec.c
+++ b/drivers/md/dm-verity-fec.c
@@ -73,7 +73,7 @@ static u8 *fec_read_parity(struct dm_verity *v, u64 rsb, int index,
*offset = (unsigned)(position - (block << v->data_dev_block_bits));
res = dm_bufio_read(v->fec->bufio, v->fec->start + block, buf);
- if (unlikely(IS_ERR(res))) {
+ if (IS_ERR(res)) {
DMERR("%s: FEC %llu: parity read failed (block %llu): %ld",
v->data_dev->name, (unsigned long long)rsb,
(unsigned long long)(v->fec->start + block),
@@ -163,7 +163,7 @@ static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_fec_io *fio,
dm_bufio_release(buf);
par = fec_read_parity(v, rsb, block_offset, &offset, &buf);
- if (unlikely(IS_ERR(par)))
+ if (IS_ERR(par))
return PTR_ERR(par);
}
}
@@ -253,7 +253,7 @@ static int fec_read_bufs(struct dm_verity *v, struct dm_verity_io *io,
}
bbuf = dm_bufio_read(bufio, block, &buf);
- if (unlikely(IS_ERR(bbuf))) {
+ if (IS_ERR(bbuf)) {
DMWARN_LIMIT("%s: FEC %llu: read failed (%llu): %ld",
v->data_dev->name,
(unsigned long long)rsb,
diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index 2b8cee35e4d5..f7822875589e 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -1859,7 +1859,7 @@ static int writecache_ctr(struct dm_target *ti, unsigned argc, char **argv)
goto bad;
}
- wc->writeback_wq = alloc_workqueue("writecache-writeabck", WQ_MEM_RECLAIM, 1);
+ wc->writeback_wq = alloc_workqueue("writecache-writeback", WQ_MEM_RECLAIM, 1);
if (!wc->writeback_wq) {
r = -ENOMEM;
ti->error = "Could not allocate writeback workqueue";
diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c
index 6af5babe6837..8865c1709e16 100644
--- a/drivers/md/dm-zoned-target.c
+++ b/drivers/md/dm-zoned-target.c
@@ -727,7 +727,6 @@ static int dmz_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ti->per_io_data_size = sizeof(struct dmz_bioctx);
ti->flush_supported = true;
ti->discards_supported = true;
- ti->split_discard_bios = true;
/* The exposed capacity is the number of chunks that can be mapped */
ti->len = (sector_t)dmz_nr_chunks(dmz->metadata) << dev->zone_nr_sectors_shift;
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 515e6af9bed2..68d24056d0b1 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -158,9 +158,6 @@ struct table_device {
struct dm_dev dm_dev;
};
-static struct kmem_cache *_rq_tio_cache;
-static struct kmem_cache *_rq_cache;
-
/*
* Bio-based DM's mempools' reserved IOs set by the user.
*/
@@ -222,20 +219,11 @@ static unsigned dm_get_numa_node(void)
static int __init local_init(void)
{
- int r = -ENOMEM;
-
- _rq_tio_cache = KMEM_CACHE(dm_rq_target_io, 0);
- if (!_rq_tio_cache)
- return r;
-
- _rq_cache = kmem_cache_create("dm_old_clone_request", sizeof(struct request),
- __alignof__(struct request), 0, NULL);
- if (!_rq_cache)
- goto out_free_rq_tio_cache;
+ int r;
r = dm_uevent_init();
if (r)
- goto out_free_rq_cache;
+ return r;
deferred_remove_workqueue = alloc_workqueue("kdmremove", WQ_UNBOUND, 1);
if (!deferred_remove_workqueue) {
@@ -257,10 +245,6 @@ out_free_workqueue:
destroy_workqueue(deferred_remove_workqueue);
out_uevent_exit:
dm_uevent_exit();
-out_free_rq_cache:
- kmem_cache_destroy(_rq_cache);
-out_free_rq_tio_cache:
- kmem_cache_destroy(_rq_tio_cache);
return r;
}
@@ -270,8 +254,6 @@ static void local_exit(void)
flush_scheduled_work();
destroy_workqueue(deferred_remove_workqueue);
- kmem_cache_destroy(_rq_cache);
- kmem_cache_destroy(_rq_tio_cache);
unregister_blkdev(_major, _name);
dm_uevent_exit();
@@ -1478,17 +1460,10 @@ static unsigned get_num_write_zeroes_bios(struct dm_target *ti)
return ti->num_write_zeroes_bios;
}
-typedef bool (*is_split_required_fn)(struct dm_target *ti);
-
-static bool is_split_required_for_discard(struct dm_target *ti)
-{
- return ti->split_discard_bios;
-}
-
static int __send_changing_extent_only(struct clone_info *ci, struct dm_target *ti,
- unsigned num_bios, bool is_split_required)
+ unsigned num_bios)
{
- unsigned len;
+ unsigned len = ci->sector_count;
/*
* Even though the device advertised support for this type of
@@ -1499,11 +1474,6 @@ static int __send_changing_extent_only(struct clone_info *ci, struct dm_target *
if (!num_bios)
return -EOPNOTSUPP;
- if (!is_split_required)
- len = min((sector_t)ci->sector_count, max_io_len_target_boundary(ci->sector, ti));
- else
- len = min((sector_t)ci->sector_count, max_io_len(ci->sector, ti));
-
__send_duplicate_bios(ci, ti, num_bios, &len);
ci->sector += len;
@@ -1514,23 +1484,38 @@ static int __send_changing_extent_only(struct clone_info *ci, struct dm_target *
static int __send_discard(struct clone_info *ci, struct dm_target *ti)
{
- return __send_changing_extent_only(ci, ti, get_num_discard_bios(ti),
- is_split_required_for_discard(ti));
+ return __send_changing_extent_only(ci, ti, get_num_discard_bios(ti));
}
static int __send_secure_erase(struct clone_info *ci, struct dm_target *ti)
{
- return __send_changing_extent_only(ci, ti, get_num_secure_erase_bios(ti), false);
+ return __send_changing_extent_only(ci, ti, get_num_secure_erase_bios(ti));
}
static int __send_write_same(struct clone_info *ci, struct dm_target *ti)
{
- return __send_changing_extent_only(ci, ti, get_num_write_same_bios(ti), false);
+ return __send_changing_extent_only(ci, ti, get_num_write_same_bios(ti));
}
static int __send_write_zeroes(struct clone_info *ci, struct dm_target *ti)
{
- return __send_changing_extent_only(ci, ti, get_num_write_zeroes_bios(ti), false);
+ return __send_changing_extent_only(ci, ti, get_num_write_zeroes_bios(ti));
+}
+
+static bool is_abnormal_io(struct bio *bio)
+{
+ bool r = false;
+
+ switch (bio_op(bio)) {
+ case REQ_OP_DISCARD:
+ case REQ_OP_SECURE_ERASE:
+ case REQ_OP_WRITE_SAME:
+ case REQ_OP_WRITE_ZEROES:
+ r = true;
+ break;
+ }
+
+ return r;
}
static bool __process_abnormal_io(struct clone_info *ci, struct dm_target *ti,
@@ -1565,7 +1550,7 @@ static int __split_and_process_non_flush(struct clone_info *ci)
if (!dm_target_is_valid(ti))
return -EIO;
- if (unlikely(__process_abnormal_io(ci, ti, &r)))
+ if (__process_abnormal_io(ci, ti, &r))
return r;
len = min_t(sector_t, max_io_len(ci->sector, ti), ci->sector_count);
@@ -1601,13 +1586,6 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md,
blk_qc_t ret = BLK_QC_T_NONE;
int error = 0;
- if (unlikely(!map)) {
- bio_io_error(bio);
- return ret;
- }
-
- blk_queue_split(md->queue, &bio);
-
init_clone_info(&ci, md, map, bio);
if (bio->bi_opf & REQ_PREFLUSH) {
@@ -1675,18 +1653,13 @@ static blk_qc_t __split_and_process_bio(struct mapped_device *md,
* Optimized variant of __split_and_process_bio that leverages the
* fact that targets that use it do _not_ have a need to split bios.
*/
-static blk_qc_t __process_bio(struct mapped_device *md,
- struct dm_table *map, struct bio *bio)
+static blk_qc_t __process_bio(struct mapped_device *md, struct dm_table *map,
+ struct bio *bio, struct dm_target *ti)
{
struct clone_info ci;
blk_qc_t ret = BLK_QC_T_NONE;
int error = 0;
- if (unlikely(!map)) {
- bio_io_error(bio);
- return ret;
- }
-
init_clone_info(&ci, md, map, bio);
if (bio->bi_opf & REQ_PREFLUSH) {
@@ -1704,21 +1677,11 @@ static blk_qc_t __process_bio(struct mapped_device *md,
error = __send_empty_flush(&ci);
/* dec_pending submits any data associated with flush */
} else {
- struct dm_target *ti = md->immutable_target;
struct dm_target_io *tio;
- /*
- * Defend against IO still getting in during teardown
- * - as was seen for a time with nvme-fcloop
- */
- if (WARN_ON_ONCE(!ti || !dm_target_is_valid(ti))) {
- error = -EIO;
- goto out;
- }
-
ci.bio = bio;
ci.sector_count = bio_sectors(bio);
- if (unlikely(__process_abnormal_io(&ci, ti, &error)))
+ if (__process_abnormal_io(&ci, ti, &error))
goto out;
tio = alloc_tio(&ci, ti, 0, GFP_NOIO);
@@ -1730,11 +1693,55 @@ out:
return ret;
}
+static void dm_queue_split(struct mapped_device *md, struct dm_target *ti, struct bio **bio)
+{
+ unsigned len, sector_count;
+
+ sector_count = bio_sectors(*bio);
+ len = min_t(sector_t, max_io_len((*bio)->bi_iter.bi_sector, ti), sector_count);
+
+ if (sector_count > len) {
+ struct bio *split = bio_split(*bio, len, GFP_NOIO, &md->queue->bio_split);
+
+ bio_chain(split, *bio);
+ trace_block_split(md->queue, split, (*bio)->bi_iter.bi_sector);
+ generic_make_request(*bio);
+ *bio = split;
+ }
+}
+
static blk_qc_t dm_process_bio(struct mapped_device *md,
struct dm_table *map, struct bio *bio)
{
+ blk_qc_t ret = BLK_QC_T_NONE;
+ struct dm_target *ti = md->immutable_target;
+
+ if (unlikely(!map)) {
+ bio_io_error(bio);
+ return ret;
+ }
+
+ if (!ti) {
+ ti = dm_table_find_target(map, bio->bi_iter.bi_sector);
+ if (unlikely(!ti || !dm_target_is_valid(ti))) {
+ bio_io_error(bio);
+ return ret;
+ }
+ }
+
+ /*
+ * If in ->make_request_fn we need to use blk_queue_split(), otherwise
+ * queue_limits for abnormal requests (e.g. discard, writesame, etc)
+ * won't be imposed.
+ */
+ if (current->bio_list) {
+ blk_queue_split(md->queue, &bio);
+ if (!is_abnormal_io(bio))
+ dm_queue_split(md, ti, &bio);
+ }
+
if (dm_get_md_type(md) == DM_TYPE_NVME_BIO_BASED)
- return __process_bio(md, map, bio);
+ return __process_bio(md, map, bio, ti);
else
return __split_and_process_bio(md, map, bio);
}
diff --git a/drivers/md/persistent-data/dm-block-manager.c b/drivers/md/persistent-data/dm-block-manager.c
index 492a3f8ac119..3972232b8037 100644
--- a/drivers/md/persistent-data/dm-block-manager.c
+++ b/drivers/md/persistent-data/dm-block-manager.c
@@ -462,7 +462,7 @@ int dm_bm_read_lock(struct dm_block_manager *bm, dm_block_t b,
int r;
p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result);
- if (unlikely(IS_ERR(p)))
+ if (IS_ERR(p))
return PTR_ERR(p);
aux = dm_bufio_get_aux_data(to_buffer(*result));
@@ -498,7 +498,7 @@ int dm_bm_write_lock(struct dm_block_manager *bm,
return -EPERM;
p = dm_bufio_read(bm->bufio, b, (struct dm_buffer **) result);
- if (unlikely(IS_ERR(p)))
+ if (IS_ERR(p))
return PTR_ERR(p);
aux = dm_bufio_get_aux_data(to_buffer(*result));
@@ -531,7 +531,7 @@ int dm_bm_read_try_lock(struct dm_block_manager *bm,
int r;
p = dm_bufio_get(bm->bufio, b, (struct dm_buffer **) result);
- if (unlikely(IS_ERR(p)))
+ if (IS_ERR(p))
return PTR_ERR(p);
if (unlikely(!p))
return -EWOULDBLOCK;
@@ -567,7 +567,7 @@ int dm_bm_write_lock_zero(struct dm_block_manager *bm,
return -EPERM;
p = dm_bufio_new(bm->bufio, b, (struct dm_buffer **) result);
- if (unlikely(IS_ERR(p)))
+ if (IS_ERR(p))
return PTR_ERR(p);
memset(p, 0, dm_bm_block_size(bm));