summaryrefslogtreecommitdiff
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/Kconfig2
-rw-r--r--drivers/md/bcache/features.c2
-rw-r--r--drivers/md/bcache/features.h30
-rw-r--r--drivers/md/bcache/super.c53
-rw-r--r--drivers/md/dm-bufio.c6
-rw-r--r--drivers/md/dm-crypt.c177
-rw-r--r--drivers/md/dm-integrity.c92
-rw-r--r--drivers/md/dm-raid.c6
-rw-r--r--drivers/md/dm-snap.c24
-rw-r--r--drivers/md/dm-table.c15
-rw-r--r--drivers/md/dm.c2
-rw-r--r--drivers/md/md.c2
12 files changed, 359 insertions, 52 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index b7e2d9666614..9e44c09f6410 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -605,6 +605,7 @@ config DM_INTEGRITY
select BLK_DEV_INTEGRITY
select DM_BUFIO
select CRYPTO
+ select CRYPTO_SKCIPHER
select ASYNC_XOR
help
This device-mapper target emulates a block device that has
@@ -622,6 +623,7 @@ config DM_ZONED
tristate "Drive-managed zoned block device target support"
depends on BLK_DEV_DM
depends on BLK_DEV_ZONED
+ select CRC32
help
This device-mapper target takes a host-managed or host-aware zoned
block device and exposes most of its capacity as a regular block
diff --git a/drivers/md/bcache/features.c b/drivers/md/bcache/features.c
index 6469223f0b77..d636b7b2d070 100644
--- a/drivers/md/bcache/features.c
+++ b/drivers/md/bcache/features.c
@@ -17,7 +17,7 @@ struct feature {
};
static struct feature feature_list[] = {
- {BCH_FEATURE_INCOMPAT, BCH_FEATURE_INCOMPAT_LARGE_BUCKET,
+ {BCH_FEATURE_INCOMPAT, BCH_FEATURE_INCOMPAT_LOG_LARGE_BUCKET_SIZE,
"large_bucket"},
{0, 0, 0 },
};
diff --git a/drivers/md/bcache/features.h b/drivers/md/bcache/features.h
index a1653c478041..84fc2c0f0101 100644
--- a/drivers/md/bcache/features.h
+++ b/drivers/md/bcache/features.h
@@ -13,11 +13,15 @@
/* Feature set definition */
/* Incompat feature set */
-#define BCH_FEATURE_INCOMPAT_LARGE_BUCKET 0x0001 /* 32bit bucket size */
+/* 32bit bucket size, obsoleted */
+#define BCH_FEATURE_INCOMPAT_OBSO_LARGE_BUCKET 0x0001
+/* real bucket size is (1 << bucket_size) */
+#define BCH_FEATURE_INCOMPAT_LOG_LARGE_BUCKET_SIZE 0x0002
-#define BCH_FEATURE_COMPAT_SUUP 0
-#define BCH_FEATURE_RO_COMPAT_SUUP 0
-#define BCH_FEATURE_INCOMPAT_SUUP BCH_FEATURE_INCOMPAT_LARGE_BUCKET
+#define BCH_FEATURE_COMPAT_SUPP 0
+#define BCH_FEATURE_RO_COMPAT_SUPP 0
+#define BCH_FEATURE_INCOMPAT_SUPP (BCH_FEATURE_INCOMPAT_OBSO_LARGE_BUCKET| \
+ BCH_FEATURE_INCOMPAT_LOG_LARGE_BUCKET_SIZE)
#define BCH_HAS_COMPAT_FEATURE(sb, mask) \
((sb)->feature_compat & (mask))
@@ -77,7 +81,23 @@ static inline void bch_clear_feature_##name(struct cache_sb *sb) \
~BCH##_FEATURE_INCOMPAT_##flagname; \
}
-BCH_FEATURE_INCOMPAT_FUNCS(large_bucket, LARGE_BUCKET);
+BCH_FEATURE_INCOMPAT_FUNCS(obso_large_bucket, OBSO_LARGE_BUCKET);
+BCH_FEATURE_INCOMPAT_FUNCS(large_bucket, LOG_LARGE_BUCKET_SIZE);
+
+static inline bool bch_has_unknown_compat_features(struct cache_sb *sb)
+{
+ return ((sb->feature_compat & ~BCH_FEATURE_COMPAT_SUPP) != 0);
+}
+
+static inline bool bch_has_unknown_ro_compat_features(struct cache_sb *sb)
+{
+ return ((sb->feature_ro_compat & ~BCH_FEATURE_RO_COMPAT_SUPP) != 0);
+}
+
+static inline bool bch_has_unknown_incompat_features(struct cache_sb *sb)
+{
+ return ((sb->feature_incompat & ~BCH_FEATURE_INCOMPAT_SUPP) != 0);
+}
int bch_print_cache_set_feature_compat(struct cache_set *c, char *buf, int size);
int bch_print_cache_set_feature_ro_compat(struct cache_set *c, char *buf, int size);
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index a4752ac410dc..2047a9cccdb5 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -64,9 +64,25 @@ static unsigned int get_bucket_size(struct cache_sb *sb, struct cache_sb_disk *s
{
unsigned int bucket_size = le16_to_cpu(s->bucket_size);
- if (sb->version >= BCACHE_SB_VERSION_CDEV_WITH_FEATURES &&
- bch_has_feature_large_bucket(sb))
- bucket_size |= le16_to_cpu(s->bucket_size_hi) << 16;
+ if (sb->version >= BCACHE_SB_VERSION_CDEV_WITH_FEATURES) {
+ if (bch_has_feature_large_bucket(sb)) {
+ unsigned int max, order;
+
+ max = sizeof(unsigned int) * BITS_PER_BYTE - 1;
+ order = le16_to_cpu(s->bucket_size);
+ /*
+ * bcache tool will make sure the overflow won't
+ * happen, an error message here is enough.
+ */
+ if (order > max)
+ pr_err("Bucket size (1 << %u) overflows\n",
+ order);
+ bucket_size = 1 << order;
+ } else if (bch_has_feature_obso_large_bucket(sb)) {
+ bucket_size +=
+ le16_to_cpu(s->obso_bucket_size_hi) << 16;
+ }
+ }
return bucket_size;
}
@@ -228,6 +244,20 @@ static const char *read_super(struct cache_sb *sb, struct block_device *bdev,
sb->feature_compat = le64_to_cpu(s->feature_compat);
sb->feature_incompat = le64_to_cpu(s->feature_incompat);
sb->feature_ro_compat = le64_to_cpu(s->feature_ro_compat);
+
+ /* Check incompatible features */
+ err = "Unsupported compatible feature found";
+ if (bch_has_unknown_compat_features(sb))
+ goto err;
+
+ err = "Unsupported read-only compatible feature found";
+ if (bch_has_unknown_ro_compat_features(sb))
+ goto err;
+
+ err = "Unsupported incompatible feature found";
+ if (bch_has_unknown_incompat_features(sb))
+ goto err;
+
err = read_super_common(sb, bdev, s);
if (err)
goto err;
@@ -1302,6 +1332,12 @@ int bch_cached_dev_attach(struct cached_dev *dc, struct cache_set *c,
bcache_device_link(&dc->disk, c, "bdev");
atomic_inc(&c->attached_dev_nr);
+ if (bch_has_feature_obso_large_bucket(&(c->cache->sb))) {
+ pr_err("The obsoleted large bucket layout is unsupported, set the bcache device into read-only\n");
+ pr_err("Please update to the latest bcache-tools to create the cache device\n");
+ set_disk_ro(dc->disk.disk, 1);
+ }
+
/* Allow the writeback thread to proceed */
up_write(&dc->writeback_lock);
@@ -1524,6 +1560,12 @@ static int flash_dev_run(struct cache_set *c, struct uuid_entry *u)
bcache_device_link(d, c, "volume");
+ if (bch_has_feature_obso_large_bucket(&c->cache->sb)) {
+ pr_err("The obsoleted large bucket layout is unsupported, set the bcache device into read-only\n");
+ pr_err("Please update to the latest bcache-tools to create the cache device\n");
+ set_disk_ro(d->disk, 1);
+ }
+
return 0;
err:
kobject_put(&d->kobj);
@@ -2083,6 +2125,9 @@ static int run_cache_set(struct cache_set *c)
c->cache->sb.last_mount = (u32)ktime_get_real_seconds();
bcache_write_super(c);
+ if (bch_has_feature_obso_large_bucket(&c->cache->sb))
+ pr_err("Detect obsoleted large bucket layout, all attached bcache device will be read-only\n");
+
list_for_each_entry_safe(dc, t, &uncached_devices, list)
bch_cached_dev_attach(dc, c, NULL);
@@ -2644,8 +2689,8 @@ static ssize_t bch_pending_bdevs_cleanup(struct kobject *k,
}
list_for_each_entry_safe(pdev, tpdev, &pending_devs, list) {
+ char *pdev_set_uuid = pdev->dc->sb.set_uuid;
list_for_each_entry_safe(c, tc, &bch_cache_sets, list) {
- char *pdev_set_uuid = pdev->dc->sb.set_uuid;
char *set_uuid = c->set_uuid;
if (!memcmp(pdev_set_uuid, set_uuid, 16)) {
diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c
index 9c1a86bde658..fce4cbf9529d 100644
--- a/drivers/md/dm-bufio.c
+++ b/drivers/md/dm-bufio.c
@@ -1534,6 +1534,12 @@ sector_t dm_bufio_get_device_size(struct dm_bufio_client *c)
}
EXPORT_SYMBOL_GPL(dm_bufio_get_device_size);
+struct dm_io_client *dm_bufio_get_dm_io_client(struct dm_bufio_client *c)
+{
+ return c->dm_io;
+}
+EXPORT_SYMBOL_GPL(dm_bufio_get_dm_io_client);
+
sector_t dm_bufio_get_block_number(struct dm_buffer *b)
{
return b->block;
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 5f9f9b3a226d..5a55617a08e6 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1454,13 +1454,16 @@ static int crypt_convert_block_skcipher(struct crypt_config *cc,
static void kcryptd_async_done(struct crypto_async_request *async_req,
int error);
-static void crypt_alloc_req_skcipher(struct crypt_config *cc,
+static int crypt_alloc_req_skcipher(struct crypt_config *cc,
struct convert_context *ctx)
{
unsigned key_index = ctx->cc_sector & (cc->tfms_count - 1);
- if (!ctx->r.req)
- ctx->r.req = mempool_alloc(&cc->req_pool, GFP_NOIO);
+ if (!ctx->r.req) {
+ ctx->r.req = mempool_alloc(&cc->req_pool, in_interrupt() ? GFP_ATOMIC : GFP_NOIO);
+ if (!ctx->r.req)
+ return -ENOMEM;
+ }
skcipher_request_set_tfm(ctx->r.req, cc->cipher_tfm.tfms[key_index]);
@@ -1471,13 +1474,18 @@ static void crypt_alloc_req_skcipher(struct crypt_config *cc,
skcipher_request_set_callback(ctx->r.req,
CRYPTO_TFM_REQ_MAY_BACKLOG,
kcryptd_async_done, dmreq_of_req(cc, ctx->r.req));
+
+ return 0;
}
-static void crypt_alloc_req_aead(struct crypt_config *cc,
+static int crypt_alloc_req_aead(struct crypt_config *cc,
struct convert_context *ctx)
{
- if (!ctx->r.req_aead)
- ctx->r.req_aead = mempool_alloc(&cc->req_pool, GFP_NOIO);
+ if (!ctx->r.req_aead) {
+ ctx->r.req_aead = mempool_alloc(&cc->req_pool, in_interrupt() ? GFP_ATOMIC : GFP_NOIO);
+ if (!ctx->r.req_aead)
+ return -ENOMEM;
+ }
aead_request_set_tfm(ctx->r.req_aead, cc->cipher_tfm.tfms_aead[0]);
@@ -1488,15 +1496,17 @@ static void crypt_alloc_req_aead(struct crypt_config *cc,
aead_request_set_callback(ctx->r.req_aead,
CRYPTO_TFM_REQ_MAY_BACKLOG,
kcryptd_async_done, dmreq_of_req(cc, ctx->r.req_aead));
+
+ return 0;
}
-static void crypt_alloc_req(struct crypt_config *cc,
+static int crypt_alloc_req(struct crypt_config *cc,
struct convert_context *ctx)
{
if (crypt_integrity_aead(cc))
- crypt_alloc_req_aead(cc, ctx);
+ return crypt_alloc_req_aead(cc, ctx);
else
- crypt_alloc_req_skcipher(cc, ctx);
+ return crypt_alloc_req_skcipher(cc, ctx);
}
static void crypt_free_req_skcipher(struct crypt_config *cc,
@@ -1529,17 +1539,28 @@ static void crypt_free_req(struct crypt_config *cc, void *req, struct bio *base_
* Encrypt / decrypt data from one bio to another one (can be the same one)
*/
static blk_status_t crypt_convert(struct crypt_config *cc,
- struct convert_context *ctx, bool atomic)
+ struct convert_context *ctx, bool atomic, bool reset_pending)
{
unsigned int tag_offset = 0;
unsigned int sector_step = cc->sector_size >> SECTOR_SHIFT;
int r;
- atomic_set(&ctx->cc_pending, 1);
+ /*
+ * if reset_pending is set we are dealing with the bio for the first time,
+ * else we're continuing to work on the previous bio, so don't mess with
+ * the cc_pending counter
+ */
+ if (reset_pending)
+ atomic_set(&ctx->cc_pending, 1);
while (ctx->iter_in.bi_size && ctx->iter_out.bi_size) {
- crypt_alloc_req(cc, ctx);
+ r = crypt_alloc_req(cc, ctx);
+ if (r) {
+ complete(&ctx->restart);
+ return BLK_STS_DEV_RESOURCE;
+ }
+
atomic_inc(&ctx->cc_pending);
if (crypt_integrity_aead(cc))
@@ -1553,7 +1574,25 @@ static blk_status_t crypt_convert(struct crypt_config *cc,
* but the driver request queue is full, let's wait.
*/
case -EBUSY:
- wait_for_completion(&ctx->restart);
+ if (in_interrupt()) {
+ if (try_wait_for_completion(&ctx->restart)) {
+ /*
+ * we don't have to block to wait for completion,
+ * so proceed
+ */
+ } else {
+ /*
+ * we can't wait for completion without blocking
+ * exit and continue processing in a workqueue
+ */
+ ctx->r.req = NULL;
+ ctx->cc_sector += sector_step;
+ tag_offset++;
+ return BLK_STS_DEV_RESOURCE;
+ }
+ } else {
+ wait_for_completion(&ctx->restart);
+ }
reinit_completion(&ctx->restart);
fallthrough;
/*
@@ -1691,6 +1730,12 @@ static void crypt_inc_pending(struct dm_crypt_io *io)
atomic_inc(&io->io_pending);
}
+static void kcryptd_io_bio_endio(struct work_struct *work)
+{
+ struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work);
+ bio_endio(io->base_bio);
+}
+
/*
* One of the bios was finished. Check for completion of
* the whole request and correctly clean up the buffer.
@@ -1713,7 +1758,23 @@ static void crypt_dec_pending(struct dm_crypt_io *io)
kfree(io->integrity_metadata);
base_bio->bi_status = error;
- bio_endio(base_bio);
+
+ /*
+ * If we are running this function from our tasklet,
+ * we can't call bio_endio() here, because it will call
+ * clone_endio() from dm.c, which in turn will
+ * free the current struct dm_crypt_io structure with
+ * our tasklet. In this case we need to delay bio_endio()
+ * execution to after the tasklet is done and dequeued.
+ */
+ if (tasklet_trylock(&io->tasklet)) {
+ tasklet_unlock(&io->tasklet);
+ bio_endio(base_bio);
+ return;
+ }
+
+ INIT_WORK(&io->work, kcryptd_io_bio_endio);
+ queue_work(cc->io_queue, &io->work);
}
/*
@@ -1945,6 +2006,37 @@ static bool kcryptd_crypt_write_inline(struct crypt_config *cc,
}
}
+static void kcryptd_crypt_write_continue(struct work_struct *work)
+{
+ struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work);
+ struct crypt_config *cc = io->cc;
+ struct convert_context *ctx = &io->ctx;
+ int crypt_finished;
+ sector_t sector = io->sector;
+ blk_status_t r;
+
+ wait_for_completion(&ctx->restart);
+ reinit_completion(&ctx->restart);
+
+ r = crypt_convert(cc, &io->ctx, true, false);
+ if (r)
+ io->error = r;
+ crypt_finished = atomic_dec_and_test(&ctx->cc_pending);
+ if (!crypt_finished && kcryptd_crypt_write_inline(cc, ctx)) {
+ /* Wait for completion signaled by kcryptd_async_done() */
+ wait_for_completion(&ctx->restart);
+ crypt_finished = 1;
+ }
+
+ /* Encryption was already finished, submit io now */
+ if (crypt_finished) {
+ kcryptd_crypt_write_io_submit(io, 0);
+ io->sector = sector;
+ }
+
+ crypt_dec_pending(io);
+}
+
static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
{
struct crypt_config *cc = io->cc;
@@ -1973,7 +2065,17 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
crypt_inc_pending(io);
r = crypt_convert(cc, ctx,
- test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags));
+ test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags), true);
+ /*
+ * Crypto API backlogged the request, because its queue was full
+ * and we're in softirq context, so continue from a workqueue
+ * (TODO: is it actually possible to be in softirq in the write path?)
+ */
+ if (r == BLK_STS_DEV_RESOURCE) {
+ INIT_WORK(&io->work, kcryptd_crypt_write_continue);
+ queue_work(cc->crypt_queue, &io->work);
+ return;
+ }
if (r)
io->error = r;
crypt_finished = atomic_dec_and_test(&ctx->cc_pending);
@@ -1998,6 +2100,25 @@ static void kcryptd_crypt_read_done(struct dm_crypt_io *io)
crypt_dec_pending(io);
}
+static void kcryptd_crypt_read_continue(struct work_struct *work)
+{
+ struct dm_crypt_io *io = container_of(work, struct dm_crypt_io, work);
+ struct crypt_config *cc = io->cc;
+ blk_status_t r;
+
+ wait_for_completion(&io->ctx.restart);
+ reinit_completion(&io->ctx.restart);
+
+ r = crypt_convert(cc, &io->ctx, true, false);
+ if (r)
+ io->error = r;
+
+ if (atomic_dec_and_test(&io->ctx.cc_pending))
+ kcryptd_crypt_read_done(io);
+
+ crypt_dec_pending(io);
+}
+
static void kcryptd_crypt_read_convert(struct dm_crypt_io *io)
{
struct crypt_config *cc = io->cc;
@@ -2009,7 +2130,16 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io)
io->sector);
r = crypt_convert(cc, &io->ctx,
- test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags));
+ test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags), true);
+ /*
+ * Crypto API backlogged the request, because its queue was full
+ * and we're in softirq context, so continue from a workqueue
+ */
+ if (r == BLK_STS_DEV_RESOURCE) {
+ INIT_WORK(&io->work, kcryptd_crypt_read_continue);
+ queue_work(cc->crypt_queue, &io->work);
+ return;
+ }
if (r)
io->error = r;
@@ -2091,8 +2221,12 @@ static void kcryptd_queue_crypt(struct dm_crypt_io *io)
if ((bio_data_dir(io->base_bio) == READ && test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags)) ||
(bio_data_dir(io->base_bio) == WRITE && test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags))) {
- if (in_irq()) {
- /* Crypto API's "skcipher_walk_first() refuses to work in hard IRQ context */
+ /*
+ * in_irq(): Crypto API's skcipher_walk_first() refuses to work in hard IRQ context.
+ * irqs_disabled(): the kernel may run some IO completion from the idle thread, but
+ * it is being executed with irqs disabled.
+ */
+ if (in_irq() || irqs_disabled()) {
tasklet_init(&io->tasklet, kcryptd_crypt_tasklet, (unsigned long)&io->work);
tasklet_schedule(&io->tasklet);
return;
@@ -3166,12 +3300,11 @@ static int crypt_ctr(struct dm_target *ti, unsigned int argc, char **argv)
}
if (test_bit(DM_CRYPT_SAME_CPU, &cc->flags))
- cc->crypt_queue = alloc_workqueue("kcryptd-%s", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM,
+ cc->crypt_queue = alloc_workqueue("kcryptd/%s", WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM,
1, devname);
else
- cc->crypt_queue = alloc_workqueue("kcryptd-%s",
- WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM |
- WQ_UNBOUND | WQ_SYSFS,
+ cc->crypt_queue = alloc_workqueue("kcryptd/%s",
+ WQ_CPU_INTENSIVE | WQ_MEM_RECLAIM | WQ_UNBOUND,
num_online_cpus(), devname);
if (!cc->crypt_queue) {
ti->error = "Couldn't create kcryptd queue";
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 5a7a1b90e671..b64fede032dc 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -257,8 +257,9 @@ struct dm_integrity_c {
bool journal_uptodate;
bool just_formatted;
bool recalculate_flag;
- bool fix_padding;
bool discard;
+ bool fix_padding;
+ bool legacy_recalculate;
struct alg_spec internal_hash_alg;
struct alg_spec journal_crypt_alg;
@@ -386,6 +387,14 @@ static int dm_integrity_failed(struct dm_integrity_c *ic)
return READ_ONCE(ic->failed);
}
+static bool dm_integrity_disable_recalculate(struct dm_integrity_c *ic)
+{
+ if ((ic->internal_hash_alg.key || ic->journal_mac_alg.key) &&
+ !ic->legacy_recalculate)
+ return true;
+ return false;
+}
+
static commit_id_t dm_integrity_commit_id(struct dm_integrity_c *ic, unsigned i,
unsigned j, unsigned char seq)
{
@@ -1379,12 +1388,52 @@ thorough_test:
#undef MAY_BE_HASH
}
-static void dm_integrity_flush_buffers(struct dm_integrity_c *ic)
+struct flush_request {
+ struct dm_io_request io_req;
+ struct dm_io_region io_reg;
+ struct dm_integrity_c *ic;
+ struct completion comp;
+};
+
+static void flush_notify(unsigned long error, void *fr_)
+{
+ struct flush_request *fr = fr_;
+ if (unlikely(error != 0))
+ dm_integrity_io_error(fr->ic, "flusing disk cache", -EIO);
+ complete(&fr->comp);
+}
+
+static void dm_integrity_flush_buffers(struct dm_integrity_c *ic, bool flush_data)
{
int r;
+
+ struct flush_request fr;
+
+ if (!ic->meta_dev)
+ flush_data = false;
+ if (flush_data) {
+ fr.io_req.bi_op = REQ_OP_WRITE,
+ fr.io_req.bi_op_flags = REQ_PREFLUSH | REQ_SYNC,
+ fr.io_req.mem.type = DM_IO_KMEM,
+ fr.io_req.mem.ptr.addr = NULL,
+ fr.io_req.notify.fn = flush_notify,
+ fr.io_req.notify.context = &fr;
+ fr.io_req.client = dm_bufio_get_dm_io_client(ic->bufio),
+ fr.io_reg.bdev = ic->dev->bdev,
+ fr.io_reg.sector = 0,
+ fr.io_reg.count = 0,
+ fr.ic = ic;
+ init_completion(&fr.comp);
+ r = dm_io(&fr.io_req, 1, &fr.io_reg, NULL);
+ BUG_ON(r);
+ }
+
r = dm_bufio_write_dirty_buffers(ic->bufio);
if (unlikely(r))
dm_integrity_io_error(ic, "writing tags", r);
+
+ if (flush_data)
+ wait_for_completion(&fr.comp);
}
static void sleep_on_endio_wait(struct dm_integrity_c *ic)
@@ -2110,7 +2159,7 @@ offload_to_thread:
if (unlikely(dio->op == REQ_OP_DISCARD) && likely(ic->mode != 'D')) {
integrity_metadata(&dio->work);
- dm_integrity_flush_buffers(ic);
+ dm_integrity_flush_buffers(ic, false);
dio->in_flight = (atomic_t)ATOMIC_INIT(1);
dio->completion = NULL;
@@ -2195,7 +2244,7 @@ static void integrity_commit(struct work_struct *w)
flushes = bio_list_get(&ic->flush_bio_list);
if (unlikely(ic->mode != 'J')) {
spin_unlock_irq(&ic->endio_wait.lock);
- dm_integrity_flush_buffers(ic);
+ dm_integrity_flush_buffers(ic, true);
goto release_flush_bios;
}
@@ -2409,7 +2458,7 @@ skip_io:
complete_journal_op(&comp);
wait_for_completion_io(&comp.comp);
- dm_integrity_flush_buffers(ic);
+ dm_integrity_flush_buffers(ic, true);
}
static void integrity_writer(struct work_struct *w)
@@ -2451,7 +2500,7 @@ static void recalc_write_super(struct dm_integrity_c *ic)
{
int r;
- dm_integrity_flush_buffers(ic);
+ dm_integrity_flush_buffers(ic, false);
if (dm_integrity_failed(ic))
return;
@@ -2654,7 +2703,7 @@ static void bitmap_flush_work(struct work_struct *work)
unsigned long limit;
struct bio *bio;
- dm_integrity_flush_buffers(ic);
+ dm_integrity_flush_buffers(ic, false);
range.logical_sector = 0;
range.n_sectors = ic->provided_data_sectors;
@@ -2663,9 +2712,7 @@ static void bitmap_flush_work(struct work_struct *work)
add_new_range_and_wait(ic, &range);
spin_unlock_irq(&ic->endio_wait.lock);
- dm_integrity_flush_buffers(ic);
- if (ic->meta_dev)
- blkdev_issue_flush(ic->dev->bdev, GFP_NOIO);
+ dm_integrity_flush_buffers(ic, true);
limit = ic->provided_data_sectors;
if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) {
@@ -2934,11 +2981,11 @@ static void dm_integrity_postsuspend(struct dm_target *ti)
if (ic->meta_dev)
queue_work(ic->writer_wq, &ic->writer_work);
drain_workqueue(ic->writer_wq);
- dm_integrity_flush_buffers(ic);
+ dm_integrity_flush_buffers(ic, true);
}
if (ic->mode == 'B') {
- dm_integrity_flush_buffers(ic);
+ dm_integrity_flush_buffers(ic, true);
#if 1
/* set to 0 to test bitmap replay code */
init_journal(ic, 0, ic->journal_sections, 0);
@@ -3102,6 +3149,7 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type,
arg_count += !!ic->journal_crypt_alg.alg_string;
arg_count += !!ic->journal_mac_alg.alg_string;
arg_count += (ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING)) != 0;
+ arg_count += ic->legacy_recalculate;
DMEMIT("%s %llu %u %c %u", ic->dev->name, ic->start,
ic->tag_size, ic->mode, arg_count);
if (ic->meta_dev)
@@ -3125,6 +3173,8 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type,
}
if ((ic->sb->flags & cpu_to_le32(SB_FLAG_FIXED_PADDING)) != 0)
DMEMIT(" fix_padding");
+ if (ic->legacy_recalculate)
+ DMEMIT(" legacy_recalculate");
#define EMIT_ALG(a, n) \
do { \
@@ -3754,7 +3804,7 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
unsigned extra_args;
struct dm_arg_set as;
static const struct dm_arg _args[] = {
- {0, 9, "Invalid number of feature args"},
+ {0, 16, "Invalid number of feature args"},
};
unsigned journal_sectors, interleave_sectors, buffer_sectors, journal_watermark, sync_msec;
bool should_write_sb;
@@ -3902,6 +3952,8 @@ static int dm_integrity_ctr(struct dm_target *ti, unsigned argc, char **argv)
ic->discard = true;
} else if (!strcmp(opt_string, "fix_padding")) {
ic->fix_padding = true;
+ } else if (!strcmp(opt_string, "legacy_recalculate")) {
+ ic->legacy_recalculate = true;
} else {
r = -EINVAL;
ti->error = "Invalid argument";
@@ -4197,6 +4249,20 @@ try_smaller_buffer:
r = -ENOMEM;
goto bad;
}
+ } else {
+ if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING)) {
+ ti->error = "Recalculate can only be specified with internal_hash";
+ r = -EINVAL;
+ goto bad;
+ }
+ }
+
+ if (ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING) &&
+ le64_to_cpu(ic->sb->recalc_sector) < ic->provided_data_sectors &&
+ dm_integrity_disable_recalculate(ic)) {
+ ti->error = "Recalculating with HMAC is disabled for security reasons - if you really need it, use the argument \"legacy_recalculate\"";
+ r = -EOPNOTSUPP;
+ goto bad;
}
ic->bufio = dm_bufio_client_create(ic->meta_dev ? ic->meta_dev->bdev : ic->dev->bdev,
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 23c38777e8f6..cab12b2251ba 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -3729,10 +3729,10 @@ static void raid_io_hints(struct dm_target *ti, struct queue_limits *limits)
blk_limits_io_opt(limits, chunk_size_bytes * mddev_data_stripes(rs));
/*
- * RAID1 and RAID10 personalities require bio splitting,
- * RAID0/4/5/6 don't and process large discard bios properly.
+ * RAID0 and RAID10 personalities require bio splitting,
+ * RAID1/4/5/6 don't and process large discard bios properly.
*/
- if (rs_is_raid1(rs) || rs_is_raid10(rs)) {
+ if (rs_is_raid0(rs) || rs_is_raid10(rs)) {
limits->discard_granularity = chunk_size_bytes;
limits->max_discard_sectors = rs->md.chunk_sectors;
}
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index 4668b2cd98f4..11890db71f3f 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -141,6 +141,11 @@ struct dm_snapshot {
* for them to be committed.
*/
struct bio_list bios_queued_during_merge;
+
+ /*
+ * Flush data after merge.
+ */
+ struct bio flush_bio;
};
/*
@@ -1121,6 +1126,17 @@ shut:
static void error_bios(struct bio *bio);
+static int flush_data(struct dm_snapshot *s)
+{
+ struct bio *flush_bio = &s->flush_bio;
+
+ bio_reset(flush_bio);
+ bio_set_dev(flush_bio, s->origin->bdev);
+ flush_bio->bi_opf = REQ_OP_WRITE | REQ_PREFLUSH;
+
+ return submit_bio_wait(flush_bio);
+}
+
static void merge_callback(int read_err, unsigned long write_err, void *context)
{
struct dm_snapshot *s = context;
@@ -1134,6 +1150,11 @@ static void merge_callback(int read_err, unsigned long write_err, void *context)
goto shut;
}
+ if (flush_data(s) < 0) {
+ DMERR("Flush after merge failed: shutting down merge");
+ goto shut;
+ }
+
if (s->store->type->commit_merge(s->store,
s->num_merging_chunks) < 0) {
DMERR("Write error in exception store: shutting down merge");
@@ -1318,6 +1339,7 @@ static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
s->first_merging_chunk = 0;
s->num_merging_chunks = 0;
bio_list_init(&s->bios_queued_during_merge);
+ bio_init(&s->flush_bio, NULL, 0);
/* Allocate hash table for COW data */
if (init_hash_tables(s)) {
@@ -1504,6 +1526,8 @@ static void snapshot_dtr(struct dm_target *ti)
dm_exception_store_destroy(s->store);
+ bio_uninit(&s->flush_bio);
+
dm_put_device(ti, s->cow);
dm_put_device(ti, s->origin);
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 188f41287f18..4acf2342f7ad 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -363,14 +363,23 @@ int dm_get_device(struct dm_target *ti, const char *path, fmode_t mode,
{
int r;
dev_t dev;
+ unsigned int major, minor;
+ char dummy;
struct dm_dev_internal *dd;
struct dm_table *t = ti->table;
BUG_ON(!t);
- dev = dm_get_dev_t(path);
- if (!dev)
- return -ENODEV;
+ if (sscanf(path, "%u:%u%c", &major, &minor, &dummy) == 2) {
+ /* Extract the major/minor numbers */
+ dev = MKDEV(major, minor);
+ if (MAJOR(dev) != major || MINOR(dev) != minor)
+ return -EOVERFLOW;
+ } else {
+ dev = dm_get_dev_t(path);
+ if (!dev)
+ return -ENODEV;
+ }
dd = find_device(&t->devices, dev);
if (!dd) {
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index b3c3c8b4cb42..7bac564f3faa 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -562,7 +562,7 @@ static int dm_blk_ioctl(struct block_device *bdev, fmode_t mode,
* subset of the parent bdev; require extra privileges.
*/
if (!capable(CAP_SYS_RAWIO)) {
- DMWARN_LIMIT(
+ DMDEBUG_LIMIT(
"%s: sending ioctl %x to DM device without required privilege.",
current->comm, cmd);
r = -ENOIOCTLCMD;
diff --git a/drivers/md/md.c b/drivers/md/md.c
index ca409428b4fc..04384452a7ab 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -639,8 +639,10 @@ static void md_submit_flush_data(struct work_struct *ws)
* could wait for this and below md_handle_request could wait for those
* bios because of suspend check
*/
+ spin_lock_irq(&mddev->lock);
mddev->prev_flush_start = mddev->start_flush;
mddev->flush_bio = NULL;
+ spin_unlock_irq(&mddev->lock);
wake_up(&mddev->sb_wait);
if (bio->bi_iter.bi_size == 0) {