summaryrefslogtreecommitdiff
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/bcache/btree.c3
-rw-r--r--drivers/md/bcache/super.c3
-rw-r--r--drivers/md/dm-bufio.c189
-rw-r--r--drivers/md/dm-core.h4
-rw-r--r--drivers/md/dm-delay.c17
-rw-r--r--drivers/md/dm-dust.c4
-rw-r--r--drivers/md/dm-ebs-target.c3
-rw-r--r--drivers/md/dm-flakey.c118
-rw-r--r--drivers/md/dm-ioctl.c1
-rw-r--r--drivers/md/dm-linear.c4
-rw-r--r--drivers/md/dm-log-writes.c4
-rw-r--r--drivers/md/dm-mpath.c243
-rw-r--r--drivers/md/dm-raid1.c5
-rw-r--r--drivers/md/dm-rq.c4
-rw-r--r--drivers/md/dm-stripe.c5
-rw-r--r--drivers/md/dm-switch.c4
-rw-r--r--drivers/md/dm-table.c263
-rw-r--r--drivers/md/dm-vdo/indexer/volume.c24
-rw-r--r--drivers/md/dm-verity-fec.c4
-rw-r--r--drivers/md/dm-verity-target.c15
-rw-r--r--drivers/md/dm-verity-verify-sig.c17
-rw-r--r--drivers/md/dm-zone.c98
-rw-r--r--drivers/md/dm-zoned-target.c3
-rw-r--r--drivers/md/dm.c73
-rw-r--r--drivers/md/dm.h6
25 files changed, 723 insertions, 391 deletions
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c
index ed40d8600656..2cc2eb24dc8a 100644
--- a/drivers/md/bcache/btree.c
+++ b/drivers/md/bcache/btree.c
@@ -36,6 +36,7 @@
#include <linux/sched/clock.h>
#include <linux/rculist.h>
#include <linux/delay.h>
+#include <linux/sort.h>
#include <trace/events/bcache.h>
/*
@@ -559,8 +560,6 @@ static void mca_data_alloc(struct btree *b, struct bkey *k, gfp_t gfp)
}
}
-#define cmp_int(l, r) ((l > r) - (l < r))
-
#ifdef CONFIG_PROVE_LOCKING
static int btree_lock_cmp_fn(const struct lockdep_map *_a,
const struct lockdep_map *_b)
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index c40db9c161c1..1a2ce1a4b456 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -545,7 +545,8 @@ static struct uuid_entry *uuid_find(struct cache_set *c, const char *uuid)
static struct uuid_entry *uuid_find_empty(struct cache_set *c)
{
- static const char zero_uuid[16] = { 0 };
+ static const char zero_uuid[16] __nonstring =
+ { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 };
return uuid_find(c, zero_uuid);
}
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index d098e75e3461..ec84ba5e93e5 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -41,16 +41,6 @@
#define DM_BUFIO_LOW_WATERMARK_RATIO 16
/*
- * Check buffer ages in this interval (seconds)
- */
-#define DM_BUFIO_WORK_TIMER_SECS 30
-
-/*
- * Free buffers when they are older than this (seconds)
- */
-#define DM_BUFIO_DEFAULT_AGE_SECS 300
-
-/*
* The nr of bytes of cached data to keep around.
*/
#define DM_BUFIO_DEFAULT_RETAIN_BYTES (256 * 1024)
@@ -1057,10 +1047,8 @@ static unsigned long dm_bufio_cache_size_latch;
static DEFINE_SPINLOCK(global_spinlock);
-/*
- * Buffers are freed after this timeout
- */
-static unsigned int dm_bufio_max_age = DM_BUFIO_DEFAULT_AGE_SECS;
+static unsigned int dm_bufio_max_age; /* No longer does anything */
+
static unsigned long dm_bufio_retain_bytes = DM_BUFIO_DEFAULT_RETAIN_BYTES;
static unsigned long dm_bufio_peak_allocated;
@@ -1088,7 +1076,6 @@ static LIST_HEAD(dm_bufio_all_clients);
static DEFINE_MUTEX(dm_bufio_clients_lock);
static struct workqueue_struct *dm_bufio_wq;
-static struct delayed_work dm_bufio_cleanup_old_work;
static struct work_struct dm_bufio_replacement_work;
@@ -2680,130 +2667,6 @@ EXPORT_SYMBOL_GPL(dm_bufio_set_sector_offset);
/*--------------------------------------------------------------*/
-static unsigned int get_max_age_hz(void)
-{
- unsigned int max_age = READ_ONCE(dm_bufio_max_age);
-
- if (max_age > UINT_MAX / HZ)
- max_age = UINT_MAX / HZ;
-
- return max_age * HZ;
-}
-
-static bool older_than(struct dm_buffer *b, unsigned long age_hz)
-{
- return time_after_eq(jiffies, READ_ONCE(b->last_accessed) + age_hz);
-}
-
-struct evict_params {
- gfp_t gfp;
- unsigned long age_hz;
-
- /*
- * This gets updated with the largest last_accessed (ie. most
- * recently used) of the evicted buffers. It will not be reinitialised
- * by __evict_many(), so you can use it across multiple invocations.
- */
- unsigned long last_accessed;
-};
-
-/*
- * We may not be able to evict this buffer if IO pending or the client
- * is still using it.
- *
- * And if GFP_NOFS is used, we must not do any I/O because we hold
- * dm_bufio_clients_lock and we would risk deadlock if the I/O gets
- * rerouted to different bufio client.
- */
-static enum evict_result select_for_evict(struct dm_buffer *b, void *context)
-{
- struct evict_params *params = context;
-
- if (!(params->gfp & __GFP_FS) ||
- (static_branch_unlikely(&no_sleep_enabled) && b->c->no_sleep)) {
- if (test_bit_acquire(B_READING, &b->state) ||
- test_bit(B_WRITING, &b->state) ||
- test_bit(B_DIRTY, &b->state))
- return ER_DONT_EVICT;
- }
-
- return older_than(b, params->age_hz) ? ER_EVICT : ER_STOP;
-}
-
-static unsigned long __evict_many(struct dm_bufio_client *c,
- struct evict_params *params,
- int list_mode, unsigned long max_count)
-{
- unsigned long count;
- unsigned long last_accessed;
- struct dm_buffer *b;
-
- for (count = 0; count < max_count; count++) {
- b = cache_evict(&c->cache, list_mode, select_for_evict, params);
- if (!b)
- break;
-
- last_accessed = READ_ONCE(b->last_accessed);
- if (time_after_eq(params->last_accessed, last_accessed))
- params->last_accessed = last_accessed;
-
- __make_buffer_clean(b);
- __free_buffer_wake(b);
-
- cond_resched();
- }
-
- return count;
-}
-
-static void evict_old_buffers(struct dm_bufio_client *c, unsigned long age_hz)
-{
- struct evict_params params = {.gfp = 0, .age_hz = age_hz, .last_accessed = 0};
- unsigned long retain = get_retain_buffers(c);
- unsigned long count;
- LIST_HEAD(write_list);
-
- dm_bufio_lock(c);
-
- __check_watermark(c, &write_list);
- if (unlikely(!list_empty(&write_list))) {
- dm_bufio_unlock(c);
- __flush_write_list(&write_list);
- dm_bufio_lock(c);
- }
-
- count = cache_total(&c->cache);
- if (count > retain)
- __evict_many(c, &params, LIST_CLEAN, count - retain);
-
- dm_bufio_unlock(c);
-}
-
-static void cleanup_old_buffers(void)
-{
- unsigned long max_age_hz = get_max_age_hz();
- struct dm_bufio_client *c;
-
- mutex_lock(&dm_bufio_clients_lock);
-
- __cache_size_refresh();
-
- list_for_each_entry(c, &dm_bufio_all_clients, client_list)
- evict_old_buffers(c, max_age_hz);
-
- mutex_unlock(&dm_bufio_clients_lock);
-}
-
-static void work_fn(struct work_struct *w)
-{
- cleanup_old_buffers();
-
- queue_delayed_work(dm_bufio_wq, &dm_bufio_cleanup_old_work,
- DM_BUFIO_WORK_TIMER_SECS * HZ);
-}
-
-/*--------------------------------------------------------------*/
-
/*
* Global cleanup tries to evict the oldest buffers from across _all_
* the clients. It does this by repeatedly evicting a few buffers from
@@ -2841,27 +2704,51 @@ static void __insert_client(struct dm_bufio_client *new_client)
list_add_tail(&new_client->client_list, h);
}
+static enum evict_result select_for_evict(struct dm_buffer *b, void *context)
+{
+ /* In no-sleep mode, we cannot wait on IO. */
+ if (static_branch_unlikely(&no_sleep_enabled) && b->c->no_sleep) {
+ if (test_bit_acquire(B_READING, &b->state) ||
+ test_bit(B_WRITING, &b->state) ||
+ test_bit(B_DIRTY, &b->state))
+ return ER_DONT_EVICT;
+ }
+ return ER_EVICT;
+}
+
static unsigned long __evict_a_few(unsigned long nr_buffers)
{
- unsigned long count;
struct dm_bufio_client *c;
- struct evict_params params = {
- .gfp = GFP_KERNEL,
- .age_hz = 0,
- /* set to jiffies in case there are no buffers in this client */
- .last_accessed = jiffies
- };
+ unsigned long oldest_buffer = jiffies;
+ unsigned long last_accessed;
+ unsigned long count;
+ struct dm_buffer *b;
c = __pop_client();
if (!c)
return 0;
dm_bufio_lock(c);
- count = __evict_many(c, &params, LIST_CLEAN, nr_buffers);
+
+ for (count = 0; count < nr_buffers; count++) {
+ b = cache_evict(&c->cache, LIST_CLEAN, select_for_evict, NULL);
+ if (!b)
+ break;
+
+ last_accessed = READ_ONCE(b->last_accessed);
+ if (time_after_eq(oldest_buffer, last_accessed))
+ oldest_buffer = last_accessed;
+
+ __make_buffer_clean(b);
+ __free_buffer_wake(b);
+
+ cond_resched();
+ }
+
dm_bufio_unlock(c);
if (count)
- c->oldest_buffer = params.last_accessed;
+ c->oldest_buffer = oldest_buffer;
__insert_client(c);
return count;
@@ -2944,10 +2831,7 @@ static int __init dm_bufio_init(void)
if (!dm_bufio_wq)
return -ENOMEM;
- INIT_DELAYED_WORK(&dm_bufio_cleanup_old_work, work_fn);
INIT_WORK(&dm_bufio_replacement_work, do_global_cleanup);
- queue_delayed_work(dm_bufio_wq, &dm_bufio_cleanup_old_work,
- DM_BUFIO_WORK_TIMER_SECS * HZ);
return 0;
}
@@ -2959,7 +2843,6 @@ static void __exit dm_bufio_exit(void)
{
int bug = 0;
- cancel_delayed_work_sync(&dm_bufio_cleanup_old_work);
destroy_workqueue(dm_bufio_wq);
if (dm_bufio_client_count) {
@@ -2996,7 +2879,7 @@ module_param_named(max_cache_size_bytes, dm_bufio_cache_size, ulong, 0644);
MODULE_PARM_DESC(max_cache_size_bytes, "Size of metadata cache");
module_param_named(max_age_seconds, dm_bufio_max_age, uint, 0644);
-MODULE_PARM_DESC(max_age_seconds, "Max age of a buffer in seconds");
+MODULE_PARM_DESC(max_age_seconds, "No longer does anything");
module_param_named(retain_bytes, dm_bufio_retain_bytes, ulong, 0644);
MODULE_PARM_DESC(retain_bytes, "Try to keep at least this many bytes cached in memory");
diff --git a/drivers/md/dm-core.h b/drivers/md/dm-core.h
index 3637761f3585..c889332e533b 100644
--- a/drivers/md/dm-core.h
+++ b/drivers/md/dm-core.h
@@ -141,6 +141,7 @@ struct mapped_device {
#ifdef CONFIG_BLK_DEV_ZONED
unsigned int nr_zones;
void *zone_revalidate_map;
+ struct task_struct *revalidate_map_task;
#endif
#ifdef CONFIG_IMA
@@ -162,9 +163,6 @@ struct mapped_device {
#define DMF_POST_SUSPENDING 8
#define DMF_EMULATE_ZONE_APPEND 9
-void disable_discard(struct mapped_device *md);
-void disable_write_zeroes(struct mapped_device *md);
-
static inline sector_t dm_get_size(struct mapped_device *md)
{
return get_capacity(md->disk);
diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c
index d4cf0ac2a7aa..16d3d454fb0a 100644
--- a/drivers/md/dm-delay.c
+++ b/drivers/md/dm-delay.c
@@ -14,11 +14,14 @@
#include <linux/bio.h>
#include <linux/slab.h>
#include <linux/kthread.h>
+#include <linux/delay.h>
#include <linux/device-mapper.h>
#define DM_MSG_PREFIX "delay"
+#define SLEEP_SHIFT 3
+
struct delay_class {
struct dm_dev *dev;
sector_t start;
@@ -34,6 +37,7 @@ struct delay_c {
struct work_struct flush_expired_bios;
struct list_head delayed_bios;
struct task_struct *worker;
+ unsigned int worker_sleep_us;
bool may_delay;
struct delay_class read;
@@ -136,6 +140,7 @@ static int flush_worker_fn(void *data)
schedule();
} else {
spin_unlock(&dc->delayed_bios_lock);
+ fsleep(dc->worker_sleep_us);
cond_resched();
}
}
@@ -212,7 +217,7 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
{
struct delay_c *dc;
int ret;
- unsigned int max_delay;
+ unsigned int max_delay, min_delay;
if (argc != 3 && argc != 6 && argc != 9) {
ti->error = "Requires exactly 3, 6 or 9 arguments";
@@ -235,7 +240,7 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ret = delay_class_ctr(ti, &dc->read, argv);
if (ret)
goto bad;
- max_delay = dc->read.delay;
+ min_delay = max_delay = dc->read.delay;
if (argc == 3) {
ret = delay_class_ctr(ti, &dc->write, argv);
@@ -251,6 +256,7 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
if (ret)
goto bad;
max_delay = max(max_delay, dc->write.delay);
+ min_delay = min_not_zero(min_delay, dc->write.delay);
if (argc == 6) {
ret = delay_class_ctr(ti, &dc->flush, argv + 3);
@@ -263,9 +269,14 @@ static int delay_ctr(struct dm_target *ti, unsigned int argc, char **argv)
if (ret)
goto bad;
max_delay = max(max_delay, dc->flush.delay);
+ min_delay = min_not_zero(min_delay, dc->flush.delay);
out:
if (max_delay < 50) {
+ if (min_delay >> SLEEP_SHIFT)
+ dc->worker_sleep_us = 1000;
+ else
+ dc->worker_sleep_us = (min_delay * 1000) >> SLEEP_SHIFT;
/*
* In case of small requested delays, use kthread instead of
* timers and workqueue to achieve better latency.
@@ -438,7 +449,7 @@ out:
static struct target_type delay_target = {
.name = "delay",
- .version = {1, 4, 0},
+ .version = {1, 5, 0},
.features = DM_TARGET_PASSES_INTEGRITY | DM_TARGET_ZONED_HM,
.module = THIS_MODULE,
.ctr = delay_ctr,
diff --git a/drivers/md/dm-dust.c b/drivers/md/dm-dust.c
index 1a33820c9f46..e75310232bbf 100644
--- a/drivers/md/dm-dust.c
+++ b/drivers/md/dm-dust.c
@@ -534,7 +534,9 @@ static void dust_status(struct dm_target *ti, status_type_t type,
}
}
-static int dust_prepare_ioctl(struct dm_target *ti, struct block_device **bdev)
+static int dust_prepare_ioctl(struct dm_target *ti, struct block_device **bdev,
+ unsigned int cmd, unsigned long arg,
+ bool *forward)
{
struct dust_device *dd = ti->private;
struct dm_dev *dev = dd->dev;
diff --git a/drivers/md/dm-ebs-target.c b/drivers/md/dm-ebs-target.c
index b19b0142a690..6abb31ca9662 100644
--- a/drivers/md/dm-ebs-target.c
+++ b/drivers/md/dm-ebs-target.c
@@ -415,7 +415,8 @@ static void ebs_status(struct dm_target *ti, status_type_t type,
}
}
-static int ebs_prepare_ioctl(struct dm_target *ti, struct block_device **bdev)
+static int ebs_prepare_ioctl(struct dm_target *ti, struct block_device **bdev,
+ unsigned int cmd, unsigned long arg, bool *forward)
{
struct ebs_c *ec = ti->private;
struct dm_dev *dev = ec->dev;
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index b690905ab89f..c711db6f8f5c 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -47,14 +47,15 @@ enum feature_flag_bits {
};
struct per_bio_data {
- bool bio_submitted;
+ bool bio_can_corrupt;
+ struct bvec_iter saved_iter;
};
static int parse_features(struct dm_arg_set *as, struct flakey_c *fc,
struct dm_target *ti)
{
- int r;
- unsigned int argc;
+ int r = 0;
+ unsigned int argc = 0;
const char *arg_name;
static const struct dm_arg _args[] = {
@@ -65,14 +66,13 @@ static int parse_features(struct dm_arg_set *as, struct flakey_c *fc,
{0, PROBABILITY_BASE, "Invalid random corrupt argument"},
};
- /* No feature arguments supplied. */
- if (!as->argc)
- return 0;
-
- r = dm_read_arg_group(_args, as, &argc, &ti->error);
- if (r)
+ if (as->argc && (r = dm_read_arg_group(_args, as, &argc, &ti->error)))
return r;
+ /* No feature arguments supplied. */
+ if (!argc)
+ goto error_all_io;
+
while (argc) {
arg_name = dm_shift_arg(as);
argc--;
@@ -128,8 +128,11 @@ static int parse_features(struct dm_arg_set *as, struct flakey_c *fc,
* corrupt_bio_byte <Nth_byte> <direction> <value> <bio_flags>
*/
if (!strcasecmp(arg_name, "corrupt_bio_byte")) {
- if (!argc) {
- ti->error = "Feature corrupt_bio_byte requires parameters";
+ if (fc->corrupt_bio_byte) {
+ ti->error = "Feature corrupt_bio_byte duplicated";
+ return -EINVAL;
+ } else if (argc < 4) {
+ ti->error = "Feature corrupt_bio_byte requires 4 parameters";
return -EINVAL;
}
@@ -176,7 +179,10 @@ static int parse_features(struct dm_arg_set *as, struct flakey_c *fc,
}
if (!strcasecmp(arg_name, "random_read_corrupt")) {
- if (!argc) {
+ if (fc->random_read_corrupt) {
+ ti->error = "Feature random_read_corrupt duplicated";
+ return -EINVAL;
+ } else if (!argc) {
ti->error = "Feature random_read_corrupt requires a parameter";
return -EINVAL;
}
@@ -189,7 +195,10 @@ static int parse_features(struct dm_arg_set *as, struct flakey_c *fc,
}
if (!strcasecmp(arg_name, "random_write_corrupt")) {
- if (!argc) {
+ if (fc->random_write_corrupt) {
+ ti->error = "Feature random_write_corrupt duplicated";
+ return -EINVAL;
+ } else if (!argc) {
ti->error = "Feature random_write_corrupt requires a parameter";
return -EINVAL;
}
@@ -205,18 +214,25 @@ static int parse_features(struct dm_arg_set *as, struct flakey_c *fc,
return -EINVAL;
}
- if (test_bit(DROP_WRITES, &fc->flags) && (fc->corrupt_bio_rw == WRITE)) {
- ti->error = "drop_writes is incompatible with corrupt_bio_byte with the WRITE flag set";
+ if (test_bit(DROP_WRITES, &fc->flags) &&
+ (fc->corrupt_bio_rw == WRITE || fc->random_write_corrupt)) {
+ ti->error = "drop_writes is incompatible with random_write_corrupt or corrupt_bio_byte with the WRITE flag set";
return -EINVAL;
- } else if (test_bit(ERROR_WRITES, &fc->flags) && (fc->corrupt_bio_rw == WRITE)) {
- ti->error = "error_writes is incompatible with corrupt_bio_byte with the WRITE flag set";
+ } else if (test_bit(ERROR_WRITES, &fc->flags) &&
+ (fc->corrupt_bio_rw == WRITE || fc->random_write_corrupt)) {
+ ti->error = "error_writes is incompatible with random_write_corrupt or corrupt_bio_byte with the WRITE flag set";
+ return -EINVAL;
+ } else if (test_bit(ERROR_READS, &fc->flags) &&
+ (fc->corrupt_bio_rw == READ || fc->random_read_corrupt)) {
+ ti->error = "error_reads is incompatible with random_read_corrupt or corrupt_bio_byte with the READ flag set";
return -EINVAL;
}
if (!fc->corrupt_bio_byte && !test_bit(ERROR_READS, &fc->flags) &&
!test_bit(DROP_WRITES, &fc->flags) && !test_bit(ERROR_WRITES, &fc->flags) &&
!fc->random_read_corrupt && !fc->random_write_corrupt) {
+error_all_io:
set_bit(ERROR_WRITES, &fc->flags);
set_bit(ERROR_READS, &fc->flags);
}
@@ -278,7 +294,7 @@ static int flakey_ctr(struct dm_target *ti, unsigned int argc, char **argv)
if (r)
goto bad;
- r = dm_read_arg(_args, &as, &fc->down_interval, &ti->error);
+ r = dm_read_arg(_args + 1, &as, &fc->down_interval, &ti->error);
if (r)
goto bad;
@@ -339,7 +355,8 @@ static void flakey_map_bio(struct dm_target *ti, struct bio *bio)
}
static void corrupt_bio_common(struct bio *bio, unsigned int corrupt_bio_byte,
- unsigned char corrupt_bio_value)
+ unsigned char corrupt_bio_value,
+ struct bvec_iter start)
{
struct bvec_iter iter;
struct bio_vec bvec;
@@ -348,7 +365,7 @@ static void corrupt_bio_common(struct bio *bio, unsigned int corrupt_bio_byte,
* Overwrite the Nth byte of the bio's data, on whichever page
* it falls.
*/
- bio_for_each_segment(bvec, bio, iter) {
+ __bio_for_each_segment(bvec, bio, iter, start) {
if (bio_iter_len(bio, iter) > corrupt_bio_byte) {
unsigned char *segment = bvec_kmap_local(&bvec);
segment[corrupt_bio_byte] = corrupt_bio_value;
@@ -357,36 +374,31 @@ static void corrupt_bio_common(struct bio *bio, unsigned int corrupt_bio_byte,
"(rw=%c bi_opf=%u bi_sector=%llu size=%u)\n",
bio, corrupt_bio_value, corrupt_bio_byte,
(bio_data_dir(bio) == WRITE) ? 'w' : 'r', bio->bi_opf,
- (unsigned long long)bio->bi_iter.bi_sector,
- bio->bi_iter.bi_size);
+ (unsigned long long)start.bi_sector,
+ start.bi_size);
break;
}
corrupt_bio_byte -= bio_iter_len(bio, iter);
}
}
-static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc)
+static void corrupt_bio_data(struct bio *bio, struct flakey_c *fc,
+ struct bvec_iter start)
{
unsigned int corrupt_bio_byte = fc->corrupt_bio_byte - 1;
- if (!bio_has_data(bio))
- return;
-
- corrupt_bio_common(bio, corrupt_bio_byte, fc->corrupt_bio_value);
+ corrupt_bio_common(bio, corrupt_bio_byte, fc->corrupt_bio_value, start);
}
-static void corrupt_bio_random(struct bio *bio)
+static void corrupt_bio_random(struct bio *bio, struct bvec_iter start)
{
unsigned int corrupt_byte;
unsigned char corrupt_value;
- if (!bio_has_data(bio))
- return;
-
- corrupt_byte = get_random_u32() % bio->bi_iter.bi_size;
+ corrupt_byte = get_random_u32() % start.bi_size;
corrupt_value = get_random_u8();
- corrupt_bio_common(bio, corrupt_byte, corrupt_value);
+ corrupt_bio_common(bio, corrupt_byte, corrupt_value, start);
}
static void clone_free(struct bio *clone)
@@ -481,7 +493,7 @@ static int flakey_map(struct dm_target *ti, struct bio *bio)
unsigned int elapsed;
struct per_bio_data *pb = dm_per_bio_data(bio, sizeof(struct per_bio_data));
- pb->bio_submitted = false;
+ pb->bio_can_corrupt = false;
if (op_is_zone_mgmt(bio_op(bio)))
goto map_bio;
@@ -490,14 +502,15 @@ static int flakey_map(struct dm_target *ti, struct bio *bio)
elapsed = (jiffies - fc->start_time) / HZ;
if (elapsed % (fc->up_interval + fc->down_interval) >= fc->up_interval) {
bool corrupt_fixed, corrupt_random;
- /*
- * Flag this bio as submitted while down.
- */
- pb->bio_submitted = true;
+
+ if (bio_has_data(bio)) {
+ pb->bio_can_corrupt = true;
+ pb->saved_iter = bio->bi_iter;
+ }
/*
- * Error reads if neither corrupt_bio_byte or drop_writes or error_writes are set.
- * Otherwise, flakey_end_io() will decide if the reads should be modified.
+ * If ERROR_READS isn't set flakey_end_io() will decide if the
+ * reads should be modified.
*/
if (bio_data_dir(bio) == READ) {
if (test_bit(ERROR_READS, &fc->flags))
@@ -516,6 +529,8 @@ static int flakey_map(struct dm_target *ti, struct bio *bio)
return DM_MAPIO_SUBMITTED;
}
+ if (!pb->bio_can_corrupt)
+ goto map_bio;
/*
* Corrupt matching writes.
*/
@@ -535,9 +550,11 @@ static int flakey_map(struct dm_target *ti, struct bio *bio)
struct bio *clone = clone_bio(ti, fc, bio);
if (clone) {
if (corrupt_fixed)
- corrupt_bio_data(clone, fc);
+ corrupt_bio_data(clone, fc,
+ clone->bi_iter);
if (corrupt_random)
- corrupt_bio_random(clone);
+ corrupt_bio_random(clone,
+ clone->bi_iter);
submit_bio(clone);
return DM_MAPIO_SUBMITTED;
}
@@ -559,28 +576,21 @@ static int flakey_end_io(struct dm_target *ti, struct bio *bio,
if (op_is_zone_mgmt(bio_op(bio)))
return DM_ENDIO_DONE;
- if (!*error && pb->bio_submitted && (bio_data_dir(bio) == READ)) {
+ if (!*error && pb->bio_can_corrupt && (bio_data_dir(bio) == READ)) {
if (fc->corrupt_bio_byte) {
if ((fc->corrupt_bio_rw == READ) &&
all_corrupt_bio_flags_match(bio, fc)) {
/*
* Corrupt successful matching READs while in down state.
*/
- corrupt_bio_data(bio, fc);
+ corrupt_bio_data(bio, fc, pb->saved_iter);
}
}
if (fc->random_read_corrupt) {
u64 rnd = get_random_u64();
u32 rem = do_div(rnd, PROBABILITY_BASE);
if (rem < fc->random_read_corrupt)
- corrupt_bio_random(bio);
- }
- if (test_bit(ERROR_READS, &fc->flags)) {
- /*
- * Error read during the down_interval if drop_writes
- * and error_writes were not configured.
- */
- *error = BLK_STS_IOERR;
+ corrupt_bio_random(bio, pb->saved_iter);
}
}
@@ -638,7 +648,9 @@ static void flakey_status(struct dm_target *ti, status_type_t type,
}
}
-static int flakey_prepare_ioctl(struct dm_target *ti, struct block_device **bdev)
+static int flakey_prepare_ioctl(struct dm_target *ti, struct block_device **bdev,
+ unsigned int cmd, unsigned long arg,
+ bool *forward)
{
struct flakey_c *fc = ti->private;
diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c
index d42eac944eb5..4165fef4c170 100644
--- a/drivers/md/dm-ioctl.c
+++ b/drivers/md/dm-ioctl.c
@@ -1885,6 +1885,7 @@ static ioctl_fn lookup_ioctl(unsigned int cmd, int *ioctl_flags)
{DM_DEV_SET_GEOMETRY_CMD, 0, dev_set_geometry},
{DM_DEV_ARM_POLL_CMD, IOCTL_FLAGS_NO_PARAMS, dev_arm_poll},
{DM_GET_TARGET_VERSION_CMD, 0, get_target_version},
+ {DM_MPATH_PROBE_PATHS_CMD, 0, NULL}, /* block device ioctl */
};
if (unlikely(cmd >= ARRAY_SIZE(_ioctls)))
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 66318aba4bdb..15538ec58f8e 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -119,7 +119,9 @@ static void linear_status(struct dm_target *ti, status_type_t type,
}
}
-static int linear_prepare_ioctl(struct dm_target *ti, struct block_device **bdev)
+static int linear_prepare_ioctl(struct dm_target *ti, struct block_device **bdev,
+ unsigned int cmd, unsigned long arg,
+ bool *forward)
{
struct linear_c *lc = ti->private;
struct dm_dev *dev = lc->dev;
diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c
index 8d7df8303d0a..d484e8e1d48a 100644
--- a/drivers/md/dm-log-writes.c
+++ b/drivers/md/dm-log-writes.c
@@ -818,7 +818,9 @@ static void log_writes_status(struct dm_target *ti, status_type_t type,
}
static int log_writes_prepare_ioctl(struct dm_target *ti,
- struct block_device **bdev)
+ struct block_device **bdev,
+ unsigned int cmd, unsigned long arg,
+ bool *forward)
{
struct log_writes_c *lc = ti->private;
struct dm_dev *dev = lc->dev;
diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c
index 6c98f4ae5ea9..81fec2e1e0ef 100644
--- a/drivers/md/dm-mpath.c
+++ b/drivers/md/dm-mpath.c
@@ -79,6 +79,7 @@ struct multipath {
struct pgpath *current_pgpath;
struct priority_group *current_pg;
struct priority_group *next_pg; /* Switch to this PG if set */
+ struct priority_group *last_probed_pg;
atomic_t nr_valid_paths; /* Total number of usable paths */
unsigned int nr_priority_groups;
@@ -87,6 +88,7 @@ struct multipath {
const char *hw_handler_name;
char *hw_handler_params;
wait_queue_head_t pg_init_wait; /* Wait for pg_init completion */
+ wait_queue_head_t probe_wait; /* Wait for probing paths */
unsigned int pg_init_retries; /* Number of times to retry pg_init */
unsigned int pg_init_delay_msecs; /* Number of msecs before pg_init retry */
atomic_t pg_init_in_progress; /* Only one pg_init allowed at once */
@@ -100,6 +102,7 @@ struct multipath {
struct bio_list queued_bios;
struct timer_list nopath_timer; /* Timeout for queue_if_no_path */
+ bool is_suspending;
};
/*
@@ -132,6 +135,8 @@ static void queue_if_no_path_timeout_work(struct timer_list *t);
#define MPATHF_PG_INIT_DISABLED 4 /* pg_init is not currently allowed */
#define MPATHF_PG_INIT_REQUIRED 5 /* pg_init needs calling? */
#define MPATHF_PG_INIT_DELAY_RETRY 6 /* Delay pg_init retry? */
+#define MPATHF_DELAY_PG_SWITCH 7 /* Delay switching pg if it still has paths */
+#define MPATHF_NEED_PG_SWITCH 8 /* Need to switch pgs after the delay has ended */
static bool mpath_double_check_test_bit(int MPATHF_bit, struct multipath *m)
{
@@ -254,6 +259,7 @@ static int alloc_multipath_stage2(struct dm_target *ti, struct multipath *m)
atomic_set(&m->pg_init_count, 0);
m->pg_init_delay_msecs = DM_PG_INIT_DELAY_DEFAULT;
init_waitqueue_head(&m->pg_init_wait);
+ init_waitqueue_head(&m->probe_wait);
return 0;
}
@@ -413,13 +419,21 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes)
goto failed;
}
+ /* Don't change PG until it has no remaining paths */
+ pg = READ_ONCE(m->current_pg);
+ if (pg) {
+ pgpath = choose_path_in_pg(m, pg, nr_bytes);
+ if (!IS_ERR_OR_NULL(pgpath))
+ return pgpath;
+ }
+
/* Were we instructed to switch PG? */
if (READ_ONCE(m->next_pg)) {
spin_lock_irqsave(&m->lock, flags);
pg = m->next_pg;
if (!pg) {
spin_unlock_irqrestore(&m->lock, flags);
- goto check_current_pg;
+ goto check_all_pgs;
}
m->next_pg = NULL;
spin_unlock_irqrestore(&m->lock, flags);
@@ -427,16 +441,7 @@ static struct pgpath *choose_pgpath(struct multipath *m, size_t nr_bytes)
if (!IS_ERR_OR_NULL(pgpath))
return pgpath;
}
-
- /* Don't change PG until it has no remaining paths */
-check_current_pg:
- pg = READ_ONCE(m->current_pg);
- if (pg) {
- pgpath = choose_path_in_pg(m, pg, nr_bytes);
- if (!IS_ERR_OR_NULL(pgpath))
- return pgpath;
- }
-
+check_all_pgs:
/*
* Loop through priority groups until we find a valid path.
* First time we skip PGs marked 'bypassed'.
@@ -612,7 +617,6 @@ static void multipath_queue_bio(struct multipath *m, struct bio *bio)
static struct pgpath *__map_bio(struct multipath *m, struct bio *bio)
{
struct pgpath *pgpath;
- unsigned long flags;
/* Do we need to select a new pgpath? */
pgpath = READ_ONCE(m->current_pgpath);
@@ -620,12 +624,12 @@ static struct pgpath *__map_bio(struct multipath *m, struct bio *bio)
pgpath = choose_pgpath(m, bio->bi_iter.bi_size);
if (!pgpath) {
- spin_lock_irqsave(&m->lock, flags);
+ spin_lock_irq(&m->lock);
if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags)) {
__multipath_queue_bio(m, bio);
pgpath = ERR_PTR(-EAGAIN);
}
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
} else if (mpath_double_check_test_bit(MPATHF_QUEUE_IO, m) ||
mpath_double_check_test_bit(MPATHF_PG_INIT_REQUIRED, m)) {
@@ -688,7 +692,6 @@ static void process_queued_io_list(struct multipath *m)
static void process_queued_bios(struct work_struct *work)
{
int r;
- unsigned long flags;
struct bio *bio;
struct bio_list bios;
struct blk_plug plug;
@@ -697,16 +700,16 @@ static void process_queued_bios(struct work_struct *work)
bio_list_init(&bios);
- spin_lock_irqsave(&m->lock, flags);
+ spin_lock_irq(&m->lock);
if (bio_list_empty(&m->queued_bios)) {
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
return;
}
bio_list_merge_init(&bios, &m->queued_bios);
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
blk_start_plug(&plug);
while ((bio = bio_list_pop(&bios))) {
@@ -1190,7 +1193,6 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc, char **argv)
struct dm_arg_set as;
unsigned int pg_count = 0;
unsigned int next_pg_num;
- unsigned long flags;
as.argc = argc;
as.argv = argv;
@@ -1255,9 +1257,9 @@ static int multipath_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto bad;
}
- spin_lock_irqsave(&m->lock, flags);
+ spin_lock_irq(&m->lock);
enable_nopath_timeout(m);
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
ti->num_flush_bios = 1;
ti->num_discard_bios = 1;
@@ -1292,23 +1294,21 @@ static void multipath_wait_for_pg_init_completion(struct multipath *m)
static void flush_multipath_work(struct multipath *m)
{
if (m->hw_handler_name) {
- unsigned long flags;
-
if (!atomic_read(&m->pg_init_in_progress))
goto skip;
- spin_lock_irqsave(&m->lock, flags);
+ spin_lock_irq(&m->lock);
if (atomic_read(&m->pg_init_in_progress) &&
!test_and_set_bit(MPATHF_PG_INIT_DISABLED, &m->flags)) {
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
flush_workqueue(kmpath_handlerd);
multipath_wait_for_pg_init_completion(m);
- spin_lock_irqsave(&m->lock, flags);
+ spin_lock_irq(&m->lock);
clear_bit(MPATHF_PG_INIT_DISABLED, &m->flags);
}
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
}
skip:
if (m->queue_mode == DM_TYPE_BIO_BASED)
@@ -1370,11 +1370,10 @@ out:
static int reinstate_path(struct pgpath *pgpath)
{
int r = 0, run_queue = 0;
- unsigned long flags;
struct multipath *m = pgpath->pg->m;
unsigned int nr_valid_paths;
- spin_lock_irqsave(&m->lock, flags);
+ spin_lock_irq(&m->lock);
if (pgpath->is_active)
goto out;
@@ -1404,7 +1403,7 @@ static int reinstate_path(struct pgpath *pgpath)
schedule_work(&m->trigger_event);
out:
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
if (run_queue) {
dm_table_run_md_queue_async(m->ti->table);
process_queued_io_list(m);
@@ -1439,15 +1438,19 @@ static int action_dev(struct multipath *m, dev_t dev, action_fn action)
* Temporarily try to avoid having to use the specified PG
*/
static void bypass_pg(struct multipath *m, struct priority_group *pg,
- bool bypassed)
+ bool bypassed, bool can_be_delayed)
{
unsigned long flags;
spin_lock_irqsave(&m->lock, flags);
pg->bypassed = bypassed;
- m->current_pgpath = NULL;
- m->current_pg = NULL;
+ if (can_be_delayed && test_bit(MPATHF_DELAY_PG_SWITCH, &m->flags))
+ set_bit(MPATHF_NEED_PG_SWITCH, &m->flags);
+ else {
+ m->current_pgpath = NULL;
+ m->current_pg = NULL;
+ }
spin_unlock_irqrestore(&m->lock, flags);
@@ -1461,7 +1464,6 @@ static int switch_pg_num(struct multipath *m, const char *pgstr)
{
struct priority_group *pg;
unsigned int pgnum;
- unsigned long flags;
char dummy;
if (!pgstr || (sscanf(pgstr, "%u%c", &pgnum, &dummy) != 1) || !pgnum ||
@@ -1470,17 +1472,21 @@ static int switch_pg_num(struct multipath *m, const char *pgstr)
return -EINVAL;
}
- spin_lock_irqsave(&m->lock, flags);
+ spin_lock_irq(&m->lock);
list_for_each_entry(pg, &m->priority_groups, list) {
pg->bypassed = false;
if (--pgnum)
continue;
- m->current_pgpath = NULL;
- m->current_pg = NULL;
+ if (test_bit(MPATHF_DELAY_PG_SWITCH, &m->flags))
+ set_bit(MPATHF_NEED_PG_SWITCH, &m->flags);
+ else {
+ m->current_pgpath = NULL;
+ m->current_pg = NULL;
+ }
m->next_pg = pg;
}
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
schedule_work(&m->trigger_event);
return 0;
@@ -1507,7 +1513,7 @@ static int bypass_pg_num(struct multipath *m, const char *pgstr, bool bypassed)
break;
}
- bypass_pg(m, pg, bypassed);
+ bypass_pg(m, pg, bypassed, true);
return 0;
}
@@ -1561,7 +1567,7 @@ static void pg_init_done(void *data, int errors)
* Probably doing something like FW upgrade on the
* controller so try the other pg.
*/
- bypass_pg(m, pg, true);
+ bypass_pg(m, pg, true, false);
break;
case SCSI_DH_RETRY:
/* Wait before retrying. */
@@ -1742,6 +1748,9 @@ static void multipath_presuspend(struct dm_target *ti)
{
struct multipath *m = ti->private;
+ spin_lock_irq(&m->lock);
+ m->is_suspending = true;
+ spin_unlock_irq(&m->lock);
/* FIXME: bio-based shouldn't need to always disable queue_if_no_path */
if (m->queue_mode == DM_TYPE_BIO_BASED || !dm_noflush_suspending(m->ti))
queue_if_no_path(m, false, true, __func__);
@@ -1762,9 +1771,9 @@ static void multipath_postsuspend(struct dm_target *ti)
static void multipath_resume(struct dm_target *ti)
{
struct multipath *m = ti->private;
- unsigned long flags;
- spin_lock_irqsave(&m->lock, flags);
+ spin_lock_irq(&m->lock);
+ m->is_suspending = false;
if (test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags)) {
set_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags);
clear_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags);
@@ -1775,7 +1784,7 @@ static void multipath_resume(struct dm_target *ti)
test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags),
test_bit(MPATHF_SAVED_QUEUE_IF_NO_PATH, &m->flags));
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
}
/*
@@ -1798,14 +1807,13 @@ static void multipath_status(struct dm_target *ti, status_type_t type,
unsigned int status_flags, char *result, unsigned int maxlen)
{
int sz = 0, pg_counter, pgpath_counter;
- unsigned long flags;
struct multipath *m = ti->private;
struct priority_group *pg;
struct pgpath *p;
unsigned int pg_num;
char state;
- spin_lock_irqsave(&m->lock, flags);
+ spin_lock_irq(&m->lock);
/* Features */
if (type == STATUSTYPE_INFO)
@@ -1845,10 +1853,10 @@ static void multipath_status(struct dm_target *ti, status_type_t type,
DMEMIT("%u ", m->nr_priority_groups);
- if (m->next_pg)
- pg_num = m->next_pg->pg_num;
- else if (m->current_pg)
+ if (m->current_pg)
pg_num = m->current_pg->pg_num;
+ else if (m->next_pg)
+ pg_num = m->next_pg->pg_num;
else
pg_num = (m->nr_priority_groups ? 1 : 0);
@@ -1951,7 +1959,7 @@ static void multipath_status(struct dm_target *ti, status_type_t type,
break;
}
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
}
static int multipath_message(struct dm_target *ti, unsigned int argc, char **argv,
@@ -1961,7 +1969,6 @@ static int multipath_message(struct dm_target *ti, unsigned int argc, char **arg
dev_t dev;
struct multipath *m = ti->private;
action_fn action;
- unsigned long flags;
mutex_lock(&m->work_mutex);
@@ -1973,9 +1980,9 @@ static int multipath_message(struct dm_target *ti, unsigned int argc, char **arg
if (argc == 1) {
if (!strcasecmp(argv[0], "queue_if_no_path")) {
r = queue_if_no_path(m, true, false, __func__);
- spin_lock_irqsave(&m->lock, flags);
+ spin_lock_irq(&m->lock);
enable_nopath_timeout(m);
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
goto out;
} else if (!strcasecmp(argv[0], "fail_if_no_path")) {
r = queue_if_no_path(m, false, false, __func__);
@@ -2021,14 +2028,132 @@ out:
return r;
}
+/*
+ * Perform a minimal read from the given path to find out whether the
+ * path still works. If a path error occurs, fail it.
+ */
+static int probe_path(struct pgpath *pgpath)
+{
+ struct block_device *bdev = pgpath->path.dev->bdev;
+ unsigned int read_size = bdev_logical_block_size(bdev);
+ struct page *page;
+ struct bio *bio;
+ blk_status_t status;
+ int r = 0;
+
+ if (WARN_ON_ONCE(read_size > PAGE_SIZE))
+ return -EINVAL;
+
+ page = alloc_page(GFP_KERNEL);
+ if (!page)
+ return -ENOMEM;
+
+ /* Perform a minimal read: Sector 0, length read_size */
+ bio = bio_alloc(bdev, 1, REQ_OP_READ, GFP_KERNEL);
+ if (!bio) {
+ r = -ENOMEM;
+ goto out;
+ }
+
+ bio->bi_iter.bi_sector = 0;
+ __bio_add_page(bio, page, read_size, 0);
+ submit_bio_wait(bio);
+ status = bio->bi_status;
+ bio_put(bio);
+
+ if (status && blk_path_error(status))
+ fail_path(pgpath);
+
+out:
+ __free_page(page);
+ return r;
+}
+
+/*
+ * Probe all active paths in current_pg to find out whether they still work.
+ * Fail all paths that do not work.
+ *
+ * Return -ENOTCONN if no valid path is left (even outside of current_pg). We
+ * cannot probe paths in other pgs without switching current_pg, so if valid
+ * paths are only in different pgs, they may or may not work. Additionally
+ * we should not probe paths in a pathgroup that is in the process of
+ * Initializing. Userspace can submit a request and we'll switch and wait
+ * for the pathgroup to be initialized. If the request fails, it may need to
+ * probe again.
+ */
+static int probe_active_paths(struct multipath *m)
+{
+ struct pgpath *pgpath;
+ struct priority_group *pg = NULL;
+ int r = 0;
+
+ spin_lock_irq(&m->lock);
+ if (test_bit(MPATHF_DELAY_PG_SWITCH, &m->flags)) {
+ wait_event_lock_irq(m->probe_wait,
+ !test_bit(MPATHF_DELAY_PG_SWITCH, &m->flags),
+ m->lock);
+ /*
+ * if we waited because a probe was already in progress,
+ * and it probed the current active pathgroup, don't
+ * reprobe. Just return the number of valid paths
+ */
+ if (m->current_pg == m->last_probed_pg)
+ goto skip_probe;
+ }
+ if (!m->current_pg || m->is_suspending ||
+ test_bit(MPATHF_QUEUE_IO, &m->flags))
+ goto skip_probe;
+ set_bit(MPATHF_DELAY_PG_SWITCH, &m->flags);
+ pg = m->last_probed_pg = m->current_pg;
+ spin_unlock_irq(&m->lock);
+
+ list_for_each_entry(pgpath, &pg->pgpaths, list) {
+ if (pg != READ_ONCE(m->current_pg) ||
+ READ_ONCE(m->is_suspending))
+ goto out;
+ if (!pgpath->is_active)
+ continue;
+
+ r = probe_path(pgpath);
+ if (r < 0)
+ goto out;
+ }
+
+out:
+ spin_lock_irq(&m->lock);
+ clear_bit(MPATHF_DELAY_PG_SWITCH, &m->flags);
+ if (test_and_clear_bit(MPATHF_NEED_PG_SWITCH, &m->flags)) {
+ m->current_pgpath = NULL;
+ m->current_pg = NULL;
+ }
+skip_probe:
+ if (r == 0 && !atomic_read(&m->nr_valid_paths))
+ r = -ENOTCONN;
+ spin_unlock_irq(&m->lock);
+ if (pg)
+ wake_up(&m->probe_wait);
+ return r;
+}
+
static int multipath_prepare_ioctl(struct dm_target *ti,
- struct block_device **bdev)
+ struct block_device **bdev,
+ unsigned int cmd, unsigned long arg,
+ bool *forward)
{
struct multipath *m = ti->private;
struct pgpath *pgpath;
- unsigned long flags;
int r;
+ if (_IOC_TYPE(cmd) == DM_IOCTL) {
+ *forward = false;
+ switch (cmd) {
+ case DM_MPATH_PROBE_PATHS:
+ return probe_active_paths(m);
+ default:
+ return -ENOTTY;
+ }
+ }
+
pgpath = READ_ONCE(m->current_pgpath);
if (!pgpath || !mpath_double_check_test_bit(MPATHF_QUEUE_IO, m))
pgpath = choose_pgpath(m, 0);
@@ -2044,10 +2169,10 @@ static int multipath_prepare_ioctl(struct dm_target *ti,
} else {
/* No path is available */
r = -EIO;
- spin_lock_irqsave(&m->lock, flags);
+ spin_lock_irq(&m->lock);
if (test_bit(MPATHF_QUEUE_IF_NO_PATH, &m->flags))
r = -ENOTCONN;
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
}
if (r == -ENOTCONN) {
@@ -2055,10 +2180,10 @@ static int multipath_prepare_ioctl(struct dm_target *ti,
/* Path status changed, redo selection */
(void) choose_pgpath(m, 0);
}
- spin_lock_irqsave(&m->lock, flags);
+ spin_lock_irq(&m->lock);
if (test_bit(MPATHF_PG_INIT_REQUIRED, &m->flags))
(void) __pg_init_all_paths(m);
- spin_unlock_irqrestore(&m->lock, flags);
+ spin_unlock_irq(&m->lock);
dm_table_run_md_queue_async(m->ti->table);
process_queued_io_list(m);
}
@@ -2180,7 +2305,7 @@ static int multipath_busy(struct dm_target *ti)
*/
static struct target_type multipath_target = {
.name = "multipath",
- .version = {1, 14, 0},
+ .version = {1, 15, 0},
.features = DM_TARGET_SINGLETON | DM_TARGET_IMMUTABLE |
DM_TARGET_PASSES_INTEGRITY,
.module = THIS_MODULE,
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 9e615b4f1f5e..785af4816584 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -133,10 +133,9 @@ static void queue_bio(struct mirror_set *ms, struct bio *bio, int rw)
spin_lock_irqsave(&ms->lock, flags);
should_wake = !(bl->head);
bio_list_add(bl, bio);
- spin_unlock_irqrestore(&ms->lock, flags);
-
if (should_wake)
wakeup_mirrord(ms);
+ spin_unlock_irqrestore(&ms->lock, flags);
}
static void dispatch_bios(void *context, struct bio_list *bio_list)
@@ -646,9 +645,9 @@ static void write_callback(unsigned long error, void *context)
if (!ms->failures.head)
should_wake = 1;
bio_list_add(&ms->failures, bio);
- spin_unlock_irqrestore(&ms->lock, flags);
if (should_wake)
wakeup_mirrord(ms);
+ spin_unlock_irqrestore(&ms->lock, flags);
}
static void do_write(struct mirror_set *ms, struct bio *bio)
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index e23076f7ece2..a6ca92049c10 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -217,10 +217,10 @@ static void dm_done(struct request *clone, blk_status_t error, bool mapped)
if (unlikely(error == BLK_STS_TARGET)) {
if (req_op(clone) == REQ_OP_DISCARD &&
!clone->q->limits.max_discard_sectors)
- disable_discard(tio->md);
+ blk_queue_disable_discard(tio->md->queue);
else if (req_op(clone) == REQ_OP_WRITE_ZEROES &&
!clone->q->limits.max_write_zeroes_sectors)
- disable_write_zeroes(tio->md);
+ blk_queue_disable_write_zeroes(tio->md->queue);
}
switch (r) {
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index a1b7535c508a..a7dc04bd55e5 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -405,7 +405,7 @@ static int stripe_end_io(struct dm_target *ti, struct bio *bio,
blk_status_t *error)
{
unsigned int i;
- char major_minor[16];
+ char major_minor[22];
struct stripe_c *sc = ti->private;
if (!*error)
@@ -417,8 +417,7 @@ static int stripe_end_io(struct dm_target *ti, struct bio *bio,
if (*error == BLK_STS_NOTSUPP)
return DM_ENDIO_DONE;
- memset(major_minor, 0, sizeof(major_minor));
- sprintf(major_minor, "%d:%d", MAJOR(bio_dev(bio)), MINOR(bio_dev(bio)));
+ format_dev_t(major_minor, bio_dev(bio));
/*
* Test to see which stripe drive triggered the event
diff --git a/drivers/md/dm-switch.c b/drivers/md/dm-switch.c
index dfd9fb52a6f3..bb1a70b5a215 100644
--- a/drivers/md/dm-switch.c
+++ b/drivers/md/dm-switch.c
@@ -517,7 +517,9 @@ static void switch_status(struct dm_target *ti, status_type_t type,
*
* Passthrough all ioctls to the path for sector 0
*/
-static int switch_prepare_ioctl(struct dm_target *ti, struct block_device **bdev)
+static int switch_prepare_ioctl(struct dm_target *ti, struct block_device **bdev,
+ unsigned int cmd, unsigned long arg,
+ bool *forward)
{
struct switch_ctx *sctx = ti->private;
unsigned int path_nr;
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 6b23e777e10e..24a857ff6d0b 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -117,7 +117,6 @@ static int alloc_targets(struct dm_table *t, unsigned int num)
n_targets = (struct dm_target *) (n_highs + num);
memset(n_highs, -1, sizeof(*n_highs) * num);
- kvfree(t->highs);
t->num_allocated = num;
t->highs = n_highs;
@@ -257,7 +256,7 @@ static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev,
if (bdev_is_zoned(bdev)) {
unsigned int zone_sectors = bdev_zone_sectors(bdev);
- if (start & (zone_sectors - 1)) {
+ if (!bdev_is_zone_aligned(bdev, start)) {
DMERR("%s: start=%llu not aligned to h/w zone size %u of %pg",
dm_device_name(ti->table->md),
(unsigned long long)start,
@@ -274,7 +273,7 @@ static int device_area_is_invalid(struct dm_target *ti, struct dm_dev *dev,
* devices do not end up with a smaller zone in the middle of
* the sector range.
*/
- if (len & (zone_sectors - 1)) {
+ if (!bdev_is_zone_aligned(bdev, len)) {
DMERR("%s: len=%llu not aligned to h/w zone size %u of %pg",
dm_device_name(ti->table->md),
(unsigned long long)len,
@@ -431,6 +430,13 @@ static int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
return 0;
}
+ mutex_lock(&q->limits_lock);
+ /*
+ * BLK_FEAT_ATOMIC_WRITES is not inherited from the bottom device in
+ * blk_stack_limits(), so do it manually.
+ */
+ limits->features |= (q->limits.features & BLK_FEAT_ATOMIC_WRITES);
+
if (blk_stack_limits(limits, &q->limits,
get_start_sect(bdev) + start) < 0)
DMWARN("%s: adding target device %pg caused an alignment inconsistency: "
@@ -448,6 +454,7 @@ static int dm_set_device_limits(struct dm_target *ti, struct dm_dev *dev,
*/
if (!dm_target_has_integrity(ti->type))
queue_limits_stack_integrity_bdev(limits, bdev);
+ mutex_unlock(&q->limits_lock);
return 0;
}
@@ -1189,6 +1196,176 @@ put_live_table:
return 0;
}
+enum dm_wrappedkey_op {
+ DERIVE_SW_SECRET,
+ IMPORT_KEY,
+ GENERATE_KEY,
+ PREPARE_KEY,
+};
+
+struct dm_wrappedkey_op_args {
+ enum dm_wrappedkey_op op;
+ int err;
+ union {
+ struct {
+ const u8 *eph_key;
+ size_t eph_key_size;
+ u8 *sw_secret;
+ } derive_sw_secret;
+ struct {
+ const u8 *raw_key;
+ size_t raw_key_size;
+ u8 *lt_key;
+ } import_key;
+ struct {
+ u8 *lt_key;
+ } generate_key;
+ struct {
+ const u8 *lt_key;
+ size_t lt_key_size;
+ u8 *eph_key;
+ } prepare_key;
+ };
+};
+
+static int dm_wrappedkey_op_callback(struct dm_target *ti, struct dm_dev *dev,
+ sector_t start, sector_t len, void *data)
+{
+ struct dm_wrappedkey_op_args *args = data;
+ struct block_device *bdev = dev->bdev;
+ struct blk_crypto_profile *profile =
+ bdev_get_queue(bdev)->crypto_profile;
+ int err = -EOPNOTSUPP;
+
+ if (!args->err)
+ return 0;
+
+ switch (args->op) {
+ case DERIVE_SW_SECRET:
+ err = blk_crypto_derive_sw_secret(
+ bdev,
+ args->derive_sw_secret.eph_key,
+ args->derive_sw_secret.eph_key_size,
+ args->derive_sw_secret.sw_secret);
+ break;
+ case IMPORT_KEY:
+ err = blk_crypto_import_key(profile,
+ args->import_key.raw_key,
+ args->import_key.raw_key_size,
+ args->import_key.lt_key);
+ break;
+ case GENERATE_KEY:
+ err = blk_crypto_generate_key(profile,
+ args->generate_key.lt_key);
+ break;
+ case PREPARE_KEY:
+ err = blk_crypto_prepare_key(profile,
+ args->prepare_key.lt_key,
+ args->prepare_key.lt_key_size,
+ args->prepare_key.eph_key);
+ break;
+ }
+ args->err = err;
+
+ /* Try another device in case this fails. */
+ return 0;
+}
+
+static int dm_exec_wrappedkey_op(struct blk_crypto_profile *profile,
+ struct dm_wrappedkey_op_args *args)
+{
+ struct mapped_device *md =
+ container_of(profile, struct dm_crypto_profile, profile)->md;
+ struct dm_target *ti;
+ struct dm_table *t;
+ int srcu_idx;
+ int i;
+
+ args->err = -EOPNOTSUPP;
+
+ t = dm_get_live_table(md, &srcu_idx);
+ if (!t)
+ goto out;
+
+ /*
+ * blk-crypto currently has no support for multiple incompatible
+ * implementations of wrapped inline crypto keys on a single system.
+ * It was already checked earlier that support for wrapped keys was
+ * declared on all underlying devices. Thus, all the underlying devices
+ * should support all wrapped key operations and they should behave
+ * identically, i.e. work with the same keys. So, just executing the
+ * operation on the first device on which it works suffices for now.
+ */
+ for (i = 0; i < t->num_targets; i++) {
+ ti = dm_table_get_target(t, i);
+ if (!ti->type->iterate_devices)
+ continue;
+ ti->type->iterate_devices(ti, dm_wrappedkey_op_callback, args);
+ if (!args->err)
+ break;
+ }
+out:
+ dm_put_live_table(md, srcu_idx);
+ return args->err;
+}
+
+static int dm_derive_sw_secret(struct blk_crypto_profile *profile,
+ const u8 *eph_key, size_t eph_key_size,
+ u8 sw_secret[BLK_CRYPTO_SW_SECRET_SIZE])
+{
+ struct dm_wrappedkey_op_args args = {
+ .op = DERIVE_SW_SECRET,
+ .derive_sw_secret = {
+ .eph_key = eph_key,
+ .eph_key_size = eph_key_size,
+ .sw_secret = sw_secret,
+ },
+ };
+ return dm_exec_wrappedkey_op(profile, &args);
+}
+
+static int dm_import_key(struct blk_crypto_profile *profile,
+ const u8 *raw_key, size_t raw_key_size,
+ u8 lt_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE])
+{
+ struct dm_wrappedkey_op_args args = {
+ .op = IMPORT_KEY,
+ .import_key = {
+ .raw_key = raw_key,
+ .raw_key_size = raw_key_size,
+ .lt_key = lt_key,
+ },
+ };
+ return dm_exec_wrappedkey_op(profile, &args);
+}
+
+static int dm_generate_key(struct blk_crypto_profile *profile,
+ u8 lt_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE])
+{
+ struct dm_wrappedkey_op_args args = {
+ .op = GENERATE_KEY,
+ .generate_key = {
+ .lt_key = lt_key,
+ },
+ };
+ return dm_exec_wrappedkey_op(profile, &args);
+}
+
+static int dm_prepare_key(struct blk_crypto_profile *profile,
+ const u8 *lt_key, size_t lt_key_size,
+ u8 eph_key[BLK_CRYPTO_MAX_HW_WRAPPED_KEY_SIZE])
+{
+ struct dm_wrappedkey_op_args args = {
+ .op = PREPARE_KEY,
+ .prepare_key = {
+ .lt_key = lt_key,
+ .lt_key_size = lt_key_size,
+ .eph_key = eph_key,
+ },
+ };
+ return dm_exec_wrappedkey_op(profile, &args);
+}
+
static int
device_intersect_crypto_capabilities(struct dm_target *ti, struct dm_dev *dev,
sector_t start, sector_t len, void *data)
@@ -1263,6 +1440,13 @@ static int dm_table_construct_crypto_profile(struct dm_table *t)
profile);
}
+ if (profile->key_types_supported & BLK_CRYPTO_KEY_TYPE_HW_WRAPPED) {
+ profile->ll_ops.derive_sw_secret = dm_derive_sw_secret;
+ profile->ll_ops.import_key = dm_import_key;
+ profile->ll_ops.generate_key = dm_generate_key;
+ profile->ll_ops.prepare_key = dm_prepare_key;
+ }
+
if (t->md->queue &&
!blk_crypto_has_capabilities(profile,
t->md->queue->crypto_profile)) {
@@ -1490,6 +1674,18 @@ bool dm_table_has_no_data_devices(struct dm_table *t)
return true;
}
+bool dm_table_is_wildcard(struct dm_table *t)
+{
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ struct dm_target *ti = dm_table_get_target(t, i);
+
+ if (!dm_target_is_wildcard(ti->type))
+ return false;
+ }
+
+ return true;
+}
+
static int device_not_zoned(struct dm_target *ti, struct dm_dev *dev,
sector_t start, sector_t len, void *data)
{
@@ -1721,8 +1917,12 @@ static int device_not_write_zeroes_capable(struct dm_target *ti, struct dm_dev *
sector_t start, sector_t len, void *data)
{
struct request_queue *q = bdev_get_queue(dev->bdev);
+ int b;
- return !q->limits.max_write_zeroes_sectors;
+ mutex_lock(&q->limits_lock);
+ b = !q->limits.max_write_zeroes_sectors;
+ mutex_unlock(&q->limits_lock);
+ return b;
}
static bool dm_table_supports_write_zeroes(struct dm_table *t)
@@ -1830,10 +2030,24 @@ static bool dm_table_supports_atomic_writes(struct dm_table *t)
return true;
}
+bool dm_table_supports_size_change(struct dm_table *t, sector_t old_size,
+ sector_t new_size)
+{
+ if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) && dm_has_zone_plugs(t->md) &&
+ old_size != new_size) {
+ DMWARN("%s: device has zone write plug resources. "
+ "Cannot change size",
+ dm_device_name(t->md));
+ return false;
+ }
+ return true;
+}
+
int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
struct queue_limits *limits)
{
int r;
+ struct queue_limits old_limits;
if (!dm_table_supports_nowait(t))
limits->features &= ~BLK_FEAT_NOWAIT;
@@ -1860,28 +2074,30 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
if (dm_table_supports_flush(t))
limits->features |= BLK_FEAT_WRITE_CACHE | BLK_FEAT_FUA;
- if (dm_table_supports_dax(t, device_not_dax_capable)) {
+ if (dm_table_supports_dax(t, device_not_dax_capable))
limits->features |= BLK_FEAT_DAX;
- if (dm_table_supports_dax(t, device_not_dax_synchronous_capable))
- set_dax_synchronous(t->md->dax_dev);
- } else
+ else
limits->features &= ~BLK_FEAT_DAX;
- if (dm_table_any_dev_attr(t, device_dax_write_cache_enabled, NULL))
- dax_write_cache(t->md->dax_dev, true);
-
/* For a zoned table, setup the zone related queue attributes. */
- if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
- (limits->features & BLK_FEAT_ZONED)) {
- r = dm_set_zones_restrictions(t, q, limits);
- if (r)
- return r;
+ if (IS_ENABLED(CONFIG_BLK_DEV_ZONED)) {
+ if (limits->features & BLK_FEAT_ZONED) {
+ r = dm_set_zones_restrictions(t, q, limits);
+ if (r)
+ return r;
+ } else if (dm_has_zone_plugs(t->md)) {
+ DMWARN("%s: device has zone write plug resources. "
+ "Cannot switch to non-zoned table.",
+ dm_device_name(t->md));
+ return -EINVAL;
+ }
}
if (dm_table_supports_atomic_writes(t))
limits->features |= BLK_FEAT_ATOMIC_WRITES;
- r = queue_limits_set(q, limits);
+ old_limits = queue_limits_start_update(q);
+ r = queue_limits_commit_update(q, limits);
if (r)
return r;
@@ -1892,10 +2108,21 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
if (IS_ENABLED(CONFIG_BLK_DEV_ZONED) &&
(limits->features & BLK_FEAT_ZONED)) {
r = dm_revalidate_zones(t, q);
- if (r)
+ if (r) {
+ queue_limits_set(q, &old_limits);
return r;
+ }
}
+ if (IS_ENABLED(CONFIG_BLK_DEV_ZONED))
+ dm_finalize_zone_settings(t, limits);
+
+ if (dm_table_supports_dax(t, device_not_dax_synchronous_capable))
+ set_dax_synchronous(t->md->dax_dev);
+
+ if (dm_table_any_dev_attr(t, device_dax_write_cache_enabled, NULL))
+ dax_write_cache(t->md->dax_dev, true);
+
dm_update_crypto_profile(q, t);
return 0;
}
diff --git a/drivers/md/dm-vdo/indexer/volume.c b/drivers/md/dm-vdo/indexer/volume.c
index 655453bb276b..425b3a74f4db 100644
--- a/drivers/md/dm-vdo/indexer/volume.c
+++ b/drivers/md/dm-vdo/indexer/volume.c
@@ -754,10 +754,11 @@ static int get_volume_page_protected(struct volume *volume, struct uds_request *
u32 physical_page, struct cached_page **page_ptr)
{
struct cached_page *page;
+ unsigned int zone_number = request->zone_number;
get_page_from_cache(&volume->page_cache, physical_page, &page);
if (page != NULL) {
- if (request->zone_number == 0) {
+ if (zone_number == 0) {
/* Only one zone is allowed to update the LRU. */
make_page_most_recent(&volume->page_cache, page);
}
@@ -767,7 +768,7 @@ static int get_volume_page_protected(struct volume *volume, struct uds_request *
}
/* Prepare to enqueue a read for the page. */
- end_pending_search(&volume->page_cache, request->zone_number);
+ end_pending_search(&volume->page_cache, zone_number);
mutex_lock(&volume->read_threads_mutex);
/*
@@ -787,8 +788,7 @@ static int get_volume_page_protected(struct volume *volume, struct uds_request *
* the order does not matter for correctness as it does below.
*/
mutex_unlock(&volume->read_threads_mutex);
- begin_pending_search(&volume->page_cache, physical_page,
- request->zone_number);
+ begin_pending_search(&volume->page_cache, physical_page, zone_number);
return UDS_QUEUED;
}
@@ -797,7 +797,7 @@ static int get_volume_page_protected(struct volume *volume, struct uds_request *
* "search pending" state in careful order so no other thread can mess with the data before
* the caller gets to look at it.
*/
- begin_pending_search(&volume->page_cache, physical_page, request->zone_number);
+ begin_pending_search(&volume->page_cache, physical_page, zone_number);
mutex_unlock(&volume->read_threads_mutex);
*page_ptr = page;
return UDS_SUCCESS;
@@ -849,6 +849,7 @@ static int search_cached_index_page(struct volume *volume, struct uds_request *r
{
int result;
struct cached_page *page = NULL;
+ unsigned int zone_number = request->zone_number;
u32 physical_page = map_to_physical_page(volume->geometry, chapter,
index_page_number);
@@ -858,18 +859,18 @@ static int search_cached_index_page(struct volume *volume, struct uds_request *r
* invalidation by the reader thread, before the reader thread has noticed that the
* invalidate_counter has been incremented.
*/
- begin_pending_search(&volume->page_cache, physical_page, request->zone_number);
+ begin_pending_search(&volume->page_cache, physical_page, zone_number);
result = get_volume_page_protected(volume, request, physical_page, &page);
if (result != UDS_SUCCESS) {
- end_pending_search(&volume->page_cache, request->zone_number);
+ end_pending_search(&volume->page_cache, zone_number);
return result;
}
result = uds_search_chapter_index_page(&page->index_page, volume->geometry,
&request->record_name,
record_page_number);
- end_pending_search(&volume->page_cache, request->zone_number);
+ end_pending_search(&volume->page_cache, zone_number);
return result;
}
@@ -882,6 +883,7 @@ int uds_search_cached_record_page(struct volume *volume, struct uds_request *req
{
struct cached_page *record_page;
struct index_geometry *geometry = volume->geometry;
+ unsigned int zone_number = request->zone_number;
int result;
u32 physical_page, page_number;
@@ -905,11 +907,11 @@ int uds_search_cached_record_page(struct volume *volume, struct uds_request *req
* invalidation by the reader thread, before the reader thread has noticed that the
* invalidate_counter has been incremented.
*/
- begin_pending_search(&volume->page_cache, physical_page, request->zone_number);
+ begin_pending_search(&volume->page_cache, physical_page, zone_number);
result = get_volume_page_protected(volume, request, physical_page, &record_page);
if (result != UDS_SUCCESS) {
- end_pending_search(&volume->page_cache, request->zone_number);
+ end_pending_search(&volume->page_cache, zone_number);
return result;
}
@@ -917,7 +919,7 @@ int uds_search_cached_record_page(struct volume *volume, struct uds_request *req
&request->record_name, geometry, &request->old_metadata))
*found = true;
- end_pending_search(&volume->page_cache, request->zone_number);
+ end_pending_search(&volume->page_cache, zone_number);
return UDS_SUCCESS;
}
diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c
index 0c41949db784..631a887b487c 100644
--- a/drivers/md/dm-verity-fec.c
+++ b/drivers/md/dm-verity-fec.c
@@ -593,6 +593,10 @@ int verity_fec_parse_opt_args(struct dm_arg_set *as, struct dm_verity *v,
(*argc)--;
if (!strcasecmp(arg_name, DM_VERITY_OPT_FEC_DEV)) {
+ if (v->fec->dev) {
+ ti->error = "FEC device already specified";
+ return -EINVAL;
+ }
r = dm_get_device(ti, arg_value, BLK_OPEN_READ, &v->fec->dev);
if (r) {
ti->error = "FEC device lookup failed";
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index 3c427f18a04b..81186bded1ce 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -682,7 +682,8 @@ static void verity_bh_work(struct work_struct *w)
static inline bool verity_use_bh(unsigned int bytes, unsigned short ioprio)
{
return ioprio <= IOPRIO_CLASS_IDLE &&
- bytes <= READ_ONCE(dm_verity_use_bh_bytes[ioprio]);
+ bytes <= READ_ONCE(dm_verity_use_bh_bytes[ioprio]) &&
+ !need_resched();
}
static void verity_end_io(struct bio *bio)
@@ -993,7 +994,9 @@ static void verity_status(struct dm_target *ti, status_type_t type,
}
}
-static int verity_prepare_ioctl(struct dm_target *ti, struct block_device **bdev)
+static int verity_prepare_ioctl(struct dm_target *ti, struct block_device **bdev,
+ unsigned int cmd, unsigned long arg,
+ bool *forward)
{
struct dm_verity *v = ti->private;
@@ -1120,6 +1123,9 @@ static int verity_alloc_most_once(struct dm_verity *v)
{
struct dm_target *ti = v->ti;
+ if (v->validated_blocks)
+ return 0;
+
/* the bitset can only handle INT_MAX blocks */
if (v->data_blocks > INT_MAX) {
ti->error = "device too large to use check_at_most_once";
@@ -1143,6 +1149,9 @@ static int verity_alloc_zero_digest(struct dm_verity *v)
struct dm_verity_io *io;
u8 *zero_data;
+ if (v->zero_digest)
+ return 0;
+
v->zero_digest = kmalloc(v->digest_size, GFP_KERNEL);
if (!v->zero_digest)
@@ -1577,7 +1586,7 @@ static int verity_ctr(struct dm_target *ti, unsigned int argc, char **argv)
goto bad;
}
- /* Root hash signature is a optional parameter*/
+ /* Root hash signature is an optional parameter */
r = verity_verify_root_hash(root_hash_digest_to_validate,
strlen(root_hash_digest_to_validate),
verify_args.sig,
diff --git a/drivers/md/dm-verity-verify-sig.c b/drivers/md/dm-verity-verify-sig.c
index a9e2c6c0a33c..d5261a0e4232 100644
--- a/drivers/md/dm-verity-verify-sig.c
+++ b/drivers/md/dm-verity-verify-sig.c
@@ -71,9 +71,14 @@ int verity_verify_sig_parse_opt_args(struct dm_arg_set *as,
const char *arg_name)
{
struct dm_target *ti = v->ti;
- int ret = 0;
+ int ret;
const char *sig_key = NULL;
+ if (v->signature_key_desc) {
+ ti->error = DM_VERITY_VERIFY_ERR("root_hash_sig_key_desc already specified");
+ return -EINVAL;
+ }
+
if (!*argc) {
ti->error = DM_VERITY_VERIFY_ERR("Signature key not specified");
return -EINVAL;
@@ -83,14 +88,18 @@ int verity_verify_sig_parse_opt_args(struct dm_arg_set *as,
(*argc)--;
ret = verity_verify_get_sig_from_key(sig_key, sig_opts);
- if (ret < 0)
+ if (ret < 0) {
ti->error = DM_VERITY_VERIFY_ERR("Invalid key specified");
+ return ret;
+ }
v->signature_key_desc = kstrdup(sig_key, GFP_KERNEL);
- if (!v->signature_key_desc)
+ if (!v->signature_key_desc) {
+ ti->error = DM_VERITY_VERIFY_ERR("Could not allocate memory for signature key");
return -ENOMEM;
+ }
- return ret;
+ return 0;
}
/*
diff --git a/drivers/md/dm-zone.c b/drivers/md/dm-zone.c
index 20edd3fabbab..3d31b82e0730 100644
--- a/drivers/md/dm-zone.c
+++ b/drivers/md/dm-zone.c
@@ -56,24 +56,31 @@ int dm_blk_report_zones(struct gendisk *disk, sector_t sector,
{
struct mapped_device *md = disk->private_data;
struct dm_table *map;
- int srcu_idx, ret;
+ struct dm_table *zone_revalidate_map = md->zone_revalidate_map;
+ int srcu_idx, ret = -EIO;
+ bool put_table = false;
- if (!md->zone_revalidate_map) {
- /* Regular user context */
+ if (!zone_revalidate_map || md->revalidate_map_task != current) {
+ /*
+ * Regular user context or
+ * Zone revalidation during __bind() is in progress, but this
+ * call is from a different process
+ */
if (dm_suspended_md(md))
return -EAGAIN;
map = dm_get_live_table(md, &srcu_idx);
- if (!map)
- return -EIO;
+ put_table = true;
} else {
/* Zone revalidation during __bind() */
- map = md->zone_revalidate_map;
+ map = zone_revalidate_map;
}
- ret = dm_blk_do_report_zones(md, map, sector, nr_zones, cb, data);
+ if (map)
+ ret = dm_blk_do_report_zones(md, map, sector, nr_zones, cb,
+ data);
- if (!md->zone_revalidate_map)
+ if (put_table)
dm_put_live_table(md, srcu_idx);
return ret;
@@ -153,33 +160,36 @@ int dm_revalidate_zones(struct dm_table *t, struct request_queue *q)
{
struct mapped_device *md = t->md;
struct gendisk *disk = md->disk;
+ unsigned int nr_zones = disk->nr_zones;
int ret;
if (!get_capacity(disk))
return 0;
- /* Revalidate only if something changed. */
- if (!disk->nr_zones || disk->nr_zones != md->nr_zones) {
- DMINFO("%s using %s zone append",
- disk->disk_name,
- queue_emulates_zone_append(q) ? "emulated" : "native");
- md->nr_zones = 0;
- }
-
- if (md->nr_zones)
+ /*
+ * Do not revalidate if zone write plug resources have already
+ * been allocated.
+ */
+ if (dm_has_zone_plugs(md))
return 0;
+ DMINFO("%s using %s zone append", disk->disk_name,
+ queue_emulates_zone_append(q) ? "emulated" : "native");
+
/*
* Our table is not live yet. So the call to dm_get_live_table()
* in dm_blk_report_zones() will fail. Set a temporary pointer to
* our table for dm_blk_report_zones() to use directly.
*/
md->zone_revalidate_map = t;
+ md->revalidate_map_task = current;
ret = blk_revalidate_disk_zones(disk);
+ md->revalidate_map_task = NULL;
md->zone_revalidate_map = NULL;
if (ret) {
DMERR("Revalidate zones failed %d", ret);
+ disk->nr_zones = nr_zones;
return ret;
}
@@ -337,15 +347,15 @@ int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q,
/*
* Check if zone append is natively supported, and if not, set the
- * mapped device queue as needing zone append emulation.
+ * mapped device queue as needing zone append emulation. If zone
+ * append is natively supported, make sure that
+ * max_hw_zone_append_sectors is not set to 0.
*/
WARN_ON_ONCE(queue_is_mq(q));
- if (dm_table_supports_zone_append(t)) {
- clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags);
- } else {
- set_bit(DMF_EMULATE_ZONE_APPEND, &md->flags);
+ if (!dm_table_supports_zone_append(t))
lim->max_hw_zone_append_sectors = 0;
- }
+ else if (lim->max_hw_zone_append_sectors == 0)
+ lim->max_hw_zone_append_sectors = lim->max_zone_append_sectors;
/*
* Determine the max open and max active zone limits for the mapped
@@ -380,15 +390,28 @@ int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q,
lim->max_open_zones = 0;
lim->max_active_zones = 0;
lim->max_hw_zone_append_sectors = 0;
+ lim->max_zone_append_sectors = 0;
lim->zone_write_granularity = 0;
lim->chunk_sectors = 0;
lim->features &= ~BLK_FEAT_ZONED;
- clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags);
- md->nr_zones = 0;
- disk->nr_zones = 0;
return 0;
}
+ if (get_capacity(disk) && dm_has_zone_plugs(t->md)) {
+ if (q->limits.chunk_sectors != lim->chunk_sectors) {
+ DMWARN("%s: device has zone write plug resources. "
+ "Cannot change zone size",
+ disk->disk_name);
+ return -EINVAL;
+ }
+ if (lim->max_hw_zone_append_sectors != 0 &&
+ !dm_table_is_wildcard(t)) {
+ DMWARN("%s: device has zone write plug resources. "
+ "New table must emulate zone append",
+ disk->disk_name);
+ return -EINVAL;
+ }
+ }
/*
* Warn once (when the capacity is not yet set) if the mapped device is
* partially using zone resources of the target devices as that leads to
@@ -408,6 +431,23 @@ int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q,
return 0;
}
+void dm_finalize_zone_settings(struct dm_table *t, struct queue_limits *lim)
+{
+ struct mapped_device *md = t->md;
+
+ if (lim->features & BLK_FEAT_ZONED) {
+ if (dm_table_supports_zone_append(t))
+ clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags);
+ else
+ set_bit(DMF_EMULATE_ZONE_APPEND, &md->flags);
+ } else {
+ clear_bit(DMF_EMULATE_ZONE_APPEND, &md->flags);
+ md->nr_zones = 0;
+ md->disk->nr_zones = 0;
+ }
+}
+
+
/*
* IO completion callback called from clone_endio().
*/
@@ -423,9 +463,9 @@ void dm_zone_endio(struct dm_io *io, struct bio *clone)
*/
if (clone->bi_status == BLK_STS_OK &&
bio_op(clone) == REQ_OP_ZONE_APPEND) {
- sector_t mask = bdev_zone_sectors(disk->part0) - 1;
-
- orig_bio->bi_iter.bi_sector += clone->bi_iter.bi_sector & mask;
+ orig_bio->bi_iter.bi_sector +=
+ bdev_offset_from_zone_start(disk->part0,
+ clone->bi_iter.bi_sector);
}
return;
diff --git a/drivers/md/dm-zoned-target.c b/drivers/md/dm-zoned-target.c
index 6141fc25d842..5da3db06da10 100644
--- a/drivers/md/dm-zoned-target.c
+++ b/drivers/md/dm-zoned-target.c
@@ -1015,7 +1015,8 @@ static void dmz_io_hints(struct dm_target *ti, struct queue_limits *limits)
/*
* Pass on ioctl to the backend device.
*/
-static int dmz_prepare_ioctl(struct dm_target *ti, struct block_device **bdev)
+static int dmz_prepare_ioctl(struct dm_target *ti, struct block_device **bdev,
+ unsigned int cmd, unsigned long arg, bool *forward)
{
struct dmz_target *dmz = ti->private;
struct dmz_dev *dev = &dmz->dev[0];
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 5ab7574c0c76..1726f0f828cc 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -411,7 +411,8 @@ static int dm_blk_getgeo(struct block_device *bdev, struct hd_geometry *geo)
}
static int dm_prepare_ioctl(struct mapped_device *md, int *srcu_idx,
- struct block_device **bdev)
+ struct block_device **bdev, unsigned int cmd,
+ unsigned long arg, bool *forward)
{
struct dm_target *ti;
struct dm_table *map;
@@ -434,8 +435,8 @@ retry:
if (dm_suspended_md(md))
return -EAGAIN;
- r = ti->type->prepare_ioctl(ti, bdev);
- if (r == -ENOTCONN && !fatal_signal_pending(current)) {
+ r = ti->type->prepare_ioctl(ti, bdev, cmd, arg, forward);
+ if (r == -ENOTCONN && *forward && !fatal_signal_pending(current)) {
dm_put_live_table(md, *srcu_idx);
fsleep(10000);
goto retry;
@@ -454,9 +455,10 @@ static int dm_blk_ioctl(struct block_device *bdev, blk_mode_t mode,
{
struct mapped_device *md = bdev->bd_disk->private_data;
int r, srcu_idx;
+ bool forward = true;
- r = dm_prepare_ioctl(md, &srcu_idx, &bdev);
- if (r < 0)
+ r = dm_prepare_ioctl(md, &srcu_idx, &bdev, cmd, arg, &forward);
+ if (!forward || r < 0)
goto out;
if (r > 0) {
@@ -1082,22 +1084,6 @@ static inline struct queue_limits *dm_get_queue_limits(struct mapped_device *md)
return &md->queue->limits;
}
-void disable_discard(struct mapped_device *md)
-{
- struct queue_limits *limits = dm_get_queue_limits(md);
-
- /* device doesn't really support DISCARD, disable it */
- limits->max_hw_discard_sectors = 0;
-}
-
-void disable_write_zeroes(struct mapped_device *md)
-{
- struct queue_limits *limits = dm_get_queue_limits(md);
-
- /* device doesn't really support WRITE ZEROES, disable it */
- limits->max_write_zeroes_sectors = 0;
-}
-
static bool swap_bios_limit(struct dm_target *ti, struct bio *bio)
{
return unlikely((bio->bi_opf & REQ_SWAP) != 0) && unlikely(ti->limit_swap_bios);
@@ -1115,10 +1101,10 @@ static void clone_endio(struct bio *bio)
if (unlikely(error == BLK_STS_TARGET)) {
if (bio_op(bio) == REQ_OP_DISCARD &&
!bdev_max_discard_sectors(bio->bi_bdev))
- disable_discard(md);
+ blk_queue_disable_discard(md->queue);
else if (bio_op(bio) == REQ_OP_WRITE_ZEROES &&
!bdev_write_zeroes_sectors(bio->bi_bdev))
- disable_write_zeroes(md);
+ blk_queue_disable_write_zeroes(md->queue);
}
if (static_branch_unlikely(&zoned_enabled) &&
@@ -2421,21 +2407,35 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
struct queue_limits *limits)
{
struct dm_table *old_map;
- sector_t size;
+ sector_t size, old_size;
int ret;
lockdep_assert_held(&md->suspend_lock);
size = dm_table_get_size(t);
+ old_size = dm_get_size(md);
+
+ if (!dm_table_supports_size_change(t, old_size, size)) {
+ old_map = ERR_PTR(-EINVAL);
+ goto out;
+ }
+
+ set_capacity(md->disk, size);
+
+ ret = dm_table_set_restrictions(t, md->queue, limits);
+ if (ret) {
+ set_capacity(md->disk, old_size);
+ old_map = ERR_PTR(ret);
+ goto out;
+ }
+
/*
* Wipe any geometry if the size of the table changed.
*/
- if (size != dm_get_size(md))
+ if (size != old_size)
memset(&md->geometry, 0, sizeof(md->geometry));
- set_capacity(md->disk, size);
-
dm_table_event_callback(t, event_callback, md);
if (dm_table_request_based(t)) {
@@ -2453,10 +2453,10 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
* requests in the queue may refer to bio from the old bioset,
* so you must walk through the queue to unprep.
*/
- if (!md->mempools) {
+ if (!md->mempools)
md->mempools = t->mempools;
- t->mempools = NULL;
- }
+ else
+ dm_free_md_mempools(t->mempools);
} else {
/*
* The md may already have mempools that need changing.
@@ -2465,14 +2465,8 @@ static struct dm_table *__bind(struct mapped_device *md, struct dm_table *t,
*/
dm_free_md_mempools(md->mempools);
md->mempools = t->mempools;
- t->mempools = NULL;
- }
-
- ret = dm_table_set_restrictions(t, md->queue, limits);
- if (ret) {
- old_map = ERR_PTR(ret);
- goto out;
}
+ t->mempools = NULL;
old_map = rcu_dereference_protected(md->map, lockdep_is_held(&md->suspend_lock));
rcu_assign_pointer(md->map, (void *)t);
@@ -3638,10 +3632,13 @@ static int dm_pr_clear(struct block_device *bdev, u64 key)
struct mapped_device *md = bdev->bd_disk->private_data;
const struct pr_ops *ops;
int r, srcu_idx;
+ bool forward = true;
- r = dm_prepare_ioctl(md, &srcu_idx, &bdev);
+ /* Not a real ioctl, but targets must not interpret non-DM ioctls */
+ r = dm_prepare_ioctl(md, &srcu_idx, &bdev, 0, 0, &forward);
if (r < 0)
goto out;
+ WARN_ON_ONCE(!forward);
ops = bdev->bd_disk->fops->pr_ops;
if (ops && ops->pr_clear)
diff --git a/drivers/md/dm.h b/drivers/md/dm.h
index a0a8ff119815..245f52b59215 100644
--- a/drivers/md/dm.h
+++ b/drivers/md/dm.h
@@ -58,6 +58,7 @@ void dm_table_event_callback(struct dm_table *t,
void (*fn)(void *), void *context);
struct dm_target *dm_table_find_target(struct dm_table *t, sector_t sector);
bool dm_table_has_no_data_devices(struct dm_table *table);
+bool dm_table_is_wildcard(struct dm_table *t);
int dm_calculate_queue_limits(struct dm_table *table,
struct queue_limits *limits);
int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
@@ -72,6 +73,8 @@ struct target_type *dm_table_get_immutable_target_type(struct dm_table *t);
struct dm_target *dm_table_get_immutable_target(struct dm_table *t);
struct dm_target *dm_table_get_wildcard_target(struct dm_table *t);
bool dm_table_request_based(struct dm_table *t);
+bool dm_table_supports_size_change(struct dm_table *t, sector_t old_size,
+ sector_t new_size);
void dm_lock_md_type(struct mapped_device *md);
void dm_unlock_md_type(struct mapped_device *md);
@@ -102,6 +105,7 @@ int dm_setup_md_queue(struct mapped_device *md, struct dm_table *t);
int dm_set_zones_restrictions(struct dm_table *t, struct request_queue *q,
struct queue_limits *lim);
int dm_revalidate_zones(struct dm_table *t, struct request_queue *q);
+void dm_finalize_zone_settings(struct dm_table *t, struct queue_limits *lim);
void dm_zone_endio(struct dm_io *io, struct bio *clone);
#ifdef CONFIG_BLK_DEV_ZONED
int dm_blk_report_zones(struct gendisk *disk, sector_t sector,
@@ -110,12 +114,14 @@ bool dm_is_zone_write(struct mapped_device *md, struct bio *bio);
int dm_zone_get_reset_bitmap(struct mapped_device *md, struct dm_table *t,
sector_t sector, unsigned int nr_zones,
unsigned long *need_reset);
+#define dm_has_zone_plugs(md) ((md)->disk->zone_wplugs_hash != NULL)
#else
#define dm_blk_report_zones NULL
static inline bool dm_is_zone_write(struct mapped_device *md, struct bio *bio)
{
return false;
}
+#define dm_has_zone_plugs(md) false
#endif
/*