summaryrefslogtreecommitdiff
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/bcache/movinggc.c2
-rw-r--r--drivers/md/bcache/writeback.c2
-rw-r--r--drivers/md/dm-crypt.c15
-rw-r--r--drivers/md/dm-ebs-target.c7
-rw-r--r--drivers/md/dm-flakey.c2
-rw-r--r--drivers/md/dm-integrity.c65
-rw-r--r--drivers/md/dm-io.c1
-rw-r--r--drivers/md/dm-linear.c5
-rw-r--r--drivers/md/dm-ps-io-affinity.c2
-rw-r--r--drivers/md/dm-raid.c2
-rw-r--r--drivers/md/dm-raid1.c5
-rw-r--r--drivers/md/dm-rq.c2
-rw-r--r--drivers/md/dm-stripe.c5
-rw-r--r--drivers/md/dm-table.c29
-rw-r--r--drivers/md/dm-verity-fec.c6
-rw-r--r--drivers/md/dm-verity-target.c12
-rw-r--r--drivers/md/dm.c31
-rw-r--r--drivers/md/md-bitmap.c53
-rw-r--r--drivers/md/md.c73
-rw-r--r--drivers/md/md.h6
-rw-r--r--drivers/md/persistent-data/dm-transaction-manager.c54
-rw-r--r--drivers/md/raid0.c2
-rw-r--r--drivers/md/raid1-10.c2
-rw-r--r--drivers/md/raid1.c19
-rw-r--r--drivers/md/raid10.c22
-rw-r--r--drivers/md/raid5-cache.c16
26 files changed, 259 insertions, 181 deletions
diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c
index ef6abf33f926..45ca134cbf02 100644
--- a/drivers/md/bcache/movinggc.c
+++ b/drivers/md/bcache/movinggc.c
@@ -82,7 +82,7 @@ static void moving_init(struct moving_io *io)
bio_init(bio, NULL, bio->bi_inline_vecs,
DIV_ROUND_UP(KEY_SIZE(&io->w->key), PAGE_SECTORS), 0);
bio_get(bio);
- bio_set_prio(bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
+ bio->bi_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0);
bio->bi_iter.bi_size = KEY_SIZE(&io->w->key) << 9;
bio->bi_private = &io->cl;
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index c1d28e365910..453efbbdc8ee 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -334,7 +334,7 @@ static void dirty_init(struct keybuf_key *w)
bio_init(bio, NULL, bio->bi_inline_vecs,
DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS), 0);
if (!io->dc->writeback_percent)
- bio_set_prio(bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
+ bio->bi_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0);
bio->bi_iter.bi_size = KEY_SIZE(&w->key) << 9;
bio->bi_private = w;
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 78c975d7cd5f..02a2919f4e5a 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -1188,7 +1188,7 @@ static int dm_crypt_integrity_io_alloc(struct dm_crypt_io *io, struct bio *bio)
tag_len = io->cc->tuple_size * (bio_sectors(bio) >> io->cc->sector_shift);
- bip->bip_iter.bi_sector = io->cc->start + io->sector;
+ bip->bip_iter.bi_sector = bio->bi_iter.bi_sector;
ret = bio_integrity_add_page(bio, virt_to_page(io->integrity_metadata),
tag_len, offset_in_page(io->integrity_metadata));
@@ -1720,6 +1720,7 @@ retry:
clone->bi_private = io;
clone->bi_end_io = crypt_endio;
clone->bi_ioprio = io->base_bio->bi_ioprio;
+ clone->bi_iter.bi_sector = cc->start + io->sector;
remaining_size = size;
@@ -1910,7 +1911,6 @@ static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp)
crypt_dec_pending(io);
return 1;
}
- clone->bi_iter.bi_sector = cc->start + io->sector;
crypt_convert_init(cc, &io->ctx, clone, clone, io->sector);
io->saved_bi_iter = clone->bi_iter;
dm_submit_bio_remap(io->base_bio, clone);
@@ -1926,13 +1926,13 @@ static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp)
clone = bio_alloc_clone(cc->dev->bdev, io->base_bio, gfp, &cc->bs);
if (!clone)
return 1;
+
+ clone->bi_iter.bi_sector = cc->start + io->sector;
clone->bi_private = io;
clone->bi_end_io = crypt_endio;
crypt_inc_pending(io);
- clone->bi_iter.bi_sector = cc->start + io->sector;
-
if (dm_crypt_integrity_io_alloc(io, clone)) {
crypt_dec_pending(io);
bio_put(clone);
@@ -2040,8 +2040,6 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async)
/* crypt_convert should have filled the clone bio */
BUG_ON(io->ctx.iter_out.bi_size);
- clone->bi_iter.bi_sector = cc->start + io->sector;
-
if ((likely(!async) && test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags)) ||
test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags)) {
dm_submit_bio_remap(io->base_bio, clone);
@@ -2098,7 +2096,7 @@ static void kcryptd_crypt_write_continue(struct work_struct *work)
wait_for_completion(&ctx->restart);
reinit_completion(&ctx->restart);
- r = crypt_convert(cc, &io->ctx, true, false);
+ r = crypt_convert(cc, &io->ctx, false, false);
if (r)
io->error = r;
crypt_finished = atomic_dec_and_test(&ctx->cc_pending);
@@ -2196,7 +2194,7 @@ static void kcryptd_crypt_read_continue(struct work_struct *work)
wait_for_completion(&io->ctx.restart);
reinit_completion(&io->ctx.restart);
- r = crypt_convert(cc, &io->ctx, true, false);
+ r = crypt_convert(cc, &io->ctx, false, false);
if (r)
io->error = r;
@@ -2214,7 +2212,6 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io)
crypt_inc_pending(io);
if (io->ctx.aead_recheck) {
- io->ctx.cc_sector = io->sector + cc->iv_offset;
r = crypt_convert(cc, &io->ctx,
test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags), true);
} else {
diff --git a/drivers/md/dm-ebs-target.c b/drivers/md/dm-ebs-target.c
index 18ae45dcbfb2..b19b0142a690 100644
--- a/drivers/md/dm-ebs-target.c
+++ b/drivers/md/dm-ebs-target.c
@@ -390,6 +390,12 @@ static int ebs_map(struct dm_target *ti, struct bio *bio)
return DM_MAPIO_REMAPPED;
}
+static void ebs_postsuspend(struct dm_target *ti)
+{
+ struct ebs_c *ec = ti->private;
+ dm_bufio_client_reset(ec->bufio);
+}
+
static void ebs_status(struct dm_target *ti, status_type_t type,
unsigned int status_flags, char *result, unsigned int maxlen)
{
@@ -447,6 +453,7 @@ static struct target_type ebs_target = {
.ctr = ebs_ctr,
.dtr = ebs_dtr,
.map = ebs_map,
+ .postsuspend = ebs_postsuspend,
.status = ebs_status,
.io_hints = ebs_io_hints,
.prepare_ioctl = ebs_prepare_ioctl,
diff --git a/drivers/md/dm-flakey.c b/drivers/md/dm-flakey.c
index 731467d4ed10..b690905ab89f 100644
--- a/drivers/md/dm-flakey.c
+++ b/drivers/md/dm-flakey.c
@@ -426,7 +426,7 @@ static struct bio *clone_bio(struct dm_target *ti, struct flakey_c *fc, struct b
if (!clone)
return NULL;
- bio_init(clone, fc->dev->bdev, bio->bi_inline_vecs, nr_iovecs, bio->bi_opf);
+ bio_init(clone, fc->dev->bdev, clone->bi_inline_vecs, nr_iovecs, bio->bi_opf);
clone->bi_iter.bi_sector = flakey_map_sector(ti, bio->bi_iter.bi_sector);
clone->bi_private = bio;
diff --git a/drivers/md/dm-integrity.c b/drivers/md/dm-integrity.c
index 555dc06b9422..65ab609ac0cb 100644
--- a/drivers/md/dm-integrity.c
+++ b/drivers/md/dm-integrity.c
@@ -21,6 +21,7 @@
#include <linux/reboot.h>
#include <crypto/hash.h>
#include <crypto/skcipher.h>
+#include <crypto/utils.h>
#include <linux/async_tx.h>
#include <linux/dm-bufio.h>
@@ -516,7 +517,7 @@ static int sb_mac(struct dm_integrity_c *ic, bool wr)
dm_integrity_io_error(ic, "crypto_shash_digest", r);
return r;
}
- if (memcmp(mac, actual_mac, mac_size)) {
+ if (crypto_memneq(mac, actual_mac, mac_size)) {
dm_integrity_io_error(ic, "superblock mac", -EILSEQ);
dm_audit_log_target(DM_MSG_PREFIX, "mac-superblock", ic->ti, 0);
return -EILSEQ;
@@ -859,7 +860,7 @@ static void rw_section_mac(struct dm_integrity_c *ic, unsigned int section, bool
if (likely(wr))
memcpy(&js->mac, result + (j * JOURNAL_MAC_PER_SECTOR), JOURNAL_MAC_PER_SECTOR);
else {
- if (memcmp(&js->mac, result + (j * JOURNAL_MAC_PER_SECTOR), JOURNAL_MAC_PER_SECTOR)) {
+ if (crypto_memneq(&js->mac, result + (j * JOURNAL_MAC_PER_SECTOR), JOURNAL_MAC_PER_SECTOR)) {
dm_integrity_io_error(ic, "journal mac", -EILSEQ);
dm_audit_log_target(DM_MSG_PREFIX, "mac-journal", ic->ti, 0);
}
@@ -1401,10 +1402,9 @@ static bool find_newer_committed_node(struct dm_integrity_c *ic, struct journal_
static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, sector_t *metadata_block,
unsigned int *metadata_offset, unsigned int total_size, int op)
{
-#define MAY_BE_FILLER 1
-#define MAY_BE_HASH 2
unsigned int hash_offset = 0;
- unsigned int may_be = MAY_BE_HASH | (ic->discard ? MAY_BE_FILLER : 0);
+ unsigned char mismatch_hash = 0;
+ unsigned char mismatch_filler = !ic->discard;
do {
unsigned char *data, *dp;
@@ -1425,7 +1425,7 @@ static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, se
if (op == TAG_READ) {
memcpy(tag, dp, to_copy);
} else if (op == TAG_WRITE) {
- if (memcmp(dp, tag, to_copy)) {
+ if (crypto_memneq(dp, tag, to_copy)) {
memcpy(dp, tag, to_copy);
dm_bufio_mark_partial_buffer_dirty(b, *metadata_offset, *metadata_offset + to_copy);
}
@@ -1433,29 +1433,30 @@ static int dm_integrity_rw_tag(struct dm_integrity_c *ic, unsigned char *tag, se
/* e.g.: op == TAG_CMP */
if (likely(is_power_of_2(ic->tag_size))) {
- if (unlikely(memcmp(dp, tag, to_copy)))
- if (unlikely(!ic->discard) ||
- unlikely(memchr_inv(dp, DISCARD_FILLER, to_copy) != NULL)) {
- goto thorough_test;
- }
+ if (unlikely(crypto_memneq(dp, tag, to_copy)))
+ goto thorough_test;
} else {
unsigned int i, ts;
thorough_test:
ts = total_size;
for (i = 0; i < to_copy; i++, ts--) {
- if (unlikely(dp[i] != tag[i]))
- may_be &= ~MAY_BE_HASH;
- if (likely(dp[i] != DISCARD_FILLER))
- may_be &= ~MAY_BE_FILLER;
+ /*
+ * Warning: the control flow must not be
+ * dependent on match/mismatch of
+ * individual bytes.
+ */
+ mismatch_hash |= dp[i] ^ tag[i];
+ mismatch_filler |= dp[i] ^ DISCARD_FILLER;
hash_offset++;
if (unlikely(hash_offset == ic->tag_size)) {
- if (unlikely(!may_be)) {
+ if (unlikely(mismatch_hash) && unlikely(mismatch_filler)) {
dm_bufio_release(b);
return ts;
}
hash_offset = 0;
- may_be = MAY_BE_HASH | (ic->discard ? MAY_BE_FILLER : 0);
+ mismatch_hash = 0;
+ mismatch_filler = !ic->discard;
}
}
}
@@ -1476,8 +1477,6 @@ thorough_test:
} while (unlikely(total_size));
return 0;
-#undef MAY_BE_FILLER
-#undef MAY_BE_HASH
}
struct flush_request {
@@ -2076,7 +2075,7 @@ retry_kmap:
char checksums_onstack[MAX_T(size_t, HASH_MAX_DIGESTSIZE, MAX_TAG_SIZE)];
integrity_sector_checksum(ic, logical_sector, mem + bv.bv_offset, checksums_onstack);
- if (unlikely(memcmp(checksums_onstack, journal_entry_tag(ic, je), ic->tag_size))) {
+ if (unlikely(crypto_memneq(checksums_onstack, journal_entry_tag(ic, je), ic->tag_size))) {
DMERR_LIMIT("Checksum failed when reading from journal, at sector 0x%llx",
logical_sector);
dm_audit_log_bio(DM_MSG_PREFIX, "journal-checksum",
@@ -2595,7 +2594,7 @@ static void dm_integrity_inline_recheck(struct work_struct *w)
bio_put(outgoing_bio);
integrity_sector_checksum(ic, dio->bio_details.bi_iter.bi_sector, outgoing_data, digest);
- if (unlikely(memcmp(digest, dio->integrity_payload, min(crypto_shash_digestsize(ic->internal_hash), ic->tag_size)))) {
+ if (unlikely(crypto_memneq(digest, dio->integrity_payload, min(crypto_shash_digestsize(ic->internal_hash), ic->tag_size)))) {
DMERR_LIMIT("%pg: Checksum failed at sector 0x%llx",
ic->dev->bdev, dio->bio_details.bi_iter.bi_sector);
atomic64_inc(&ic->number_of_mismatches);
@@ -2634,7 +2633,7 @@ static int dm_integrity_end_io(struct dm_target *ti, struct bio *bio, blk_status
char *mem = bvec_kmap_local(&bv);
//memset(mem, 0xff, ic->sectors_per_block << SECTOR_SHIFT);
integrity_sector_checksum(ic, dio->bio_details.bi_iter.bi_sector, mem, digest);
- if (unlikely(memcmp(digest, dio->integrity_payload + pos,
+ if (unlikely(crypto_memneq(digest, dio->integrity_payload + pos,
min(crypto_shash_digestsize(ic->internal_hash), ic->tag_size)))) {
kunmap_local(mem);
dm_integrity_free_payload(dio);
@@ -2911,7 +2910,7 @@ static void do_journal_write(struct dm_integrity_c *ic, unsigned int write_start
integrity_sector_checksum(ic, sec + ((l - j) << ic->sb->log2_sectors_per_block),
(char *)access_journal_data(ic, i, l), test_tag);
- if (unlikely(memcmp(test_tag, journal_entry_tag(ic, je2), ic->tag_size))) {
+ if (unlikely(crypto_memneq(test_tag, journal_entry_tag(ic, je2), ic->tag_size))) {
dm_integrity_io_error(ic, "tag mismatch when replaying journal", -EILSEQ);
dm_audit_log_target(DM_MSG_PREFIX, "integrity-replay-journal", ic->ti, 0);
}
@@ -3790,16 +3789,18 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type,
break;
case STATUSTYPE_TABLE: {
- arg_count = 3;
+ arg_count = 1; /* buffer_sectors */
arg_count += !!ic->meta_dev;
arg_count += ic->sectors_per_block != 1;
arg_count += !!(ic->sb->flags & cpu_to_le32(SB_FLAG_RECALCULATING));
arg_count += ic->reset_recalculate_flag;
arg_count += ic->discard;
- arg_count += ic->mode == 'J';
- arg_count += ic->mode == 'J';
- arg_count += ic->mode == 'B';
- arg_count += ic->mode == 'B';
+ arg_count += ic->mode != 'I'; /* interleave_sectors */
+ arg_count += ic->mode == 'J'; /* journal_sectors */
+ arg_count += ic->mode == 'J'; /* journal_watermark */
+ arg_count += ic->mode == 'J'; /* commit_time */
+ arg_count += ic->mode == 'B'; /* sectors_per_bit */
+ arg_count += ic->mode == 'B'; /* bitmap_flush_interval */
arg_count += !!ic->internal_hash_alg.alg_string;
arg_count += !!ic->journal_crypt_alg.alg_string;
arg_count += !!ic->journal_mac_alg.alg_string;
@@ -3818,14 +3819,15 @@ static void dm_integrity_status(struct dm_target *ti, status_type_t type,
DMEMIT(" reset_recalculate");
if (ic->discard)
DMEMIT(" allow_discards");
- DMEMIT(" journal_sectors:%u", ic->initial_sectors - SB_SECTORS);
- DMEMIT(" interleave_sectors:%u", 1U << ic->sb->log2_interleave_sectors);
+ if (ic->mode != 'I')
+ DMEMIT(" interleave_sectors:%u", 1U << ic->sb->log2_interleave_sectors);
DMEMIT(" buffer_sectors:%u", 1U << ic->log2_buffer_sectors);
if (ic->mode == 'J') {
__u64 watermark_percentage = (__u64)(ic->journal_entries - ic->free_sectors_threshold) * 100;
watermark_percentage += ic->journal_entries / 2;
do_div(watermark_percentage, ic->journal_entries);
+ DMEMIT(" journal_sectors:%u", ic->initial_sectors - SB_SECTORS);
DMEMIT(" journal_watermark:%u", (unsigned int)watermark_percentage);
DMEMIT(" commit_time:%u", ic->autocommit_msec);
}
@@ -5081,16 +5083,19 @@ try_smaller_buffer:
ic->recalc_bitmap = dm_integrity_alloc_page_list(n_bitmap_pages);
if (!ic->recalc_bitmap) {
+ ti->error = "Could not allocate memory for bitmap";
r = -ENOMEM;
goto bad;
}
ic->may_write_bitmap = dm_integrity_alloc_page_list(n_bitmap_pages);
if (!ic->may_write_bitmap) {
+ ti->error = "Could not allocate memory for bitmap";
r = -ENOMEM;
goto bad;
}
ic->bbs = kvmalloc_array(ic->n_bitmap_blocks, sizeof(struct bitmap_block_status), GFP_KERNEL);
if (!ic->bbs) {
+ ti->error = "Could not allocate memory for bitmap";
r = -ENOMEM;
goto bad;
}
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index d7a8e2f40db3..c37668790577 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -379,6 +379,7 @@ static void do_region(const blk_opf_t opf, unsigned int region,
atomic_inc(&io->count);
submit_bio(bio);
+ WARN_ON_ONCE(opf & REQ_ATOMIC && remaining);
} while (remaining);
}
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 49fb0f684193..66318aba4bdb 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -199,9 +199,10 @@ static size_t linear_dax_recovery_write(struct dm_target *ti, pgoff_t pgoff,
static struct target_type linear_target = {
.name = "linear",
- .version = {1, 4, 0},
+ .version = {1, 5, 0},
.features = DM_TARGET_PASSES_INTEGRITY | DM_TARGET_NOWAIT |
- DM_TARGET_ZONED_HM | DM_TARGET_PASSES_CRYPTO,
+ DM_TARGET_ZONED_HM | DM_TARGET_PASSES_CRYPTO |
+ DM_TARGET_ATOMIC_WRITES,
.report_zones = linear_report_zones,
.module = THIS_MODULE,
.ctr = linear_ctr,
diff --git a/drivers/md/dm-ps-io-affinity.c b/drivers/md/dm-ps-io-affinity.c
index 461ee6b2044d..716807e511ee 100644
--- a/drivers/md/dm-ps-io-affinity.c
+++ b/drivers/md/dm-ps-io-affinity.c
@@ -116,7 +116,7 @@ static int ioa_create(struct path_selector *ps, unsigned int argc, char **argv)
if (!s)
return -ENOMEM;
- s->path_map = kzalloc(nr_cpu_ids * sizeof(struct path_info *),
+ s->path_map = kcalloc(nr_cpu_ids, sizeof(struct path_info *),
GFP_KERNEL);
if (!s->path_map)
goto free_selector;
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 1e0d3b9b75d6..6adc55fd90d3 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -3196,7 +3196,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
if (reshape_sectors || rs_is_raid1(rs)) {
/*
* We can only prepare for a reshape here, because the
- * raid set needs to run to provide the repective reshape
+ * raid set needs to run to provide the respective reshape
* check functions via its MD personality instance.
*
* So do the reshape check after md_run() succeeded.
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 9511dae5b556..8c6f1f7e6456 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -656,7 +656,7 @@ static void do_write(struct mirror_set *ms, struct bio *bio)
unsigned int i;
struct dm_io_region io[MAX_NR_MIRRORS], *dest = io;
struct mirror *m;
- blk_opf_t op_flags = bio->bi_opf & (REQ_FUA | REQ_PREFLUSH);
+ blk_opf_t op_flags = bio->bi_opf & (REQ_FUA | REQ_PREFLUSH | REQ_ATOMIC);
struct dm_io_request io_req = {
.bi_opf = REQ_OP_WRITE | op_flags,
.mem.type = DM_IO_BIO,
@@ -1483,8 +1483,9 @@ static int mirror_iterate_devices(struct dm_target *ti,
static struct target_type mirror_target = {
.name = "mirror",
- .version = {1, 14, 0},
+ .version = {1, 15, 0},
.module = THIS_MODULE,
+ .features = DM_TARGET_ATOMIC_WRITES,
.ctr = mirror_ctr,
.dtr = mirror_dtr,
.map = mirror_map,
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index 499f8cc8a39f..e23076f7ece2 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -547,7 +547,7 @@ int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t)
md->tag_set->ops = &dm_mq_ops;
md->tag_set->queue_depth = dm_get_blk_mq_queue_depth();
md->tag_set->numa_node = md->numa_node_id;
- md->tag_set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_STACKING;
+ md->tag_set->flags = BLK_MQ_F_STACKING;
md->tag_set->nr_hw_queues = dm_get_blk_mq_nr_hw_queues();
md->tag_set->driver_data = md;
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 4112071de0be..3786ac67cefe 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -465,8 +465,9 @@ static void stripe_io_hints(struct dm_target *ti,
static struct target_type stripe_target = {
.name = "striped",
- .version = {1, 6, 0},
- .features = DM_TARGET_PASSES_INTEGRITY | DM_TARGET_NOWAIT,
+ .version = {1, 7, 0},
+ .features = DM_TARGET_PASSES_INTEGRITY | DM_TARGET_NOWAIT |
+ DM_TARGET_ATOMIC_WRITES,
.module = THIS_MODULE,
.ctr = stripe_ctr,
.dtr = stripe_dtr,
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index bd8b796ae683..0ef5203387b2 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1806,6 +1806,32 @@ static bool dm_table_supports_secure_erase(struct dm_table *t)
return true;
}
+static int device_not_atomic_write_capable(struct dm_target *ti,
+ struct dm_dev *dev, sector_t start,
+ sector_t len, void *data)
+{
+ return !bdev_can_atomic_write(dev->bdev);
+}
+
+static bool dm_table_supports_atomic_writes(struct dm_table *t)
+{
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ struct dm_target *ti = dm_table_get_target(t, i);
+
+ if (!dm_target_supports_atomic_writes(ti->type))
+ return false;
+
+ if (!ti->type->iterate_devices)
+ return false;
+
+ if (ti->type->iterate_devices(ti,
+ device_not_atomic_write_capable, NULL)) {
+ return false;
+ }
+ }
+ return true;
+}
+
int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
struct queue_limits *limits)
{
@@ -1854,6 +1880,9 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
return r;
}
+ if (dm_table_supports_atomic_writes(t))
+ limits->features |= BLK_FEAT_ATOMIC_WRITES;
+
r = queue_limits_set(q, limits);
if (r)
return r;
diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c
index e61855da6461..0c41949db784 100644
--- a/drivers/md/dm-verity-fec.c
+++ b/drivers/md/dm-verity-fec.c
@@ -122,7 +122,7 @@ static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_io *io,
struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size);
par = fec_read_parity(v, rsb, block_offset, &offset,
- par_buf_offset, &buf, bio_prio(bio));
+ par_buf_offset, &buf, bio->bi_ioprio);
if (IS_ERR(par))
return PTR_ERR(par);
@@ -164,7 +164,7 @@ static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_io *io,
dm_bufio_release(buf);
par = fec_read_parity(v, rsb, block_offset, &offset,
- par_buf_offset, &buf, bio_prio(bio));
+ par_buf_offset, &buf, bio->bi_ioprio);
if (IS_ERR(par))
return PTR_ERR(par);
}
@@ -254,7 +254,7 @@ static int fec_read_bufs(struct dm_verity *v, struct dm_verity_io *io,
bufio = v->bufio;
}
- bbuf = dm_bufio_read_with_ioprio(bufio, block, &buf, bio_prio(bio));
+ bbuf = dm_bufio_read_with_ioprio(bufio, block, &buf, bio->bi_ioprio);
if (IS_ERR(bbuf)) {
DMWARN_LIMIT("%s: FEC %llu: read failed (%llu): %ld",
v->data_dev->name,
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index 47d595f6a76e..24b167f71c5f 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -321,7 +321,7 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io,
}
} else {
data = dm_bufio_read_with_ioprio(v->bufio, hash_block,
- &buf, bio_prio(bio));
+ &buf, bio->bi_ioprio);
}
if (IS_ERR(data))
@@ -789,13 +789,20 @@ static int verity_map(struct dm_target *ti, struct bio *bio)
verity_fec_init_io(io);
- verity_submit_prefetch(v, io, bio_prio(bio));
+ verity_submit_prefetch(v, io, bio->bi_ioprio);
submit_bio_noacct(bio);
return DM_MAPIO_SUBMITTED;
}
+static void verity_postsuspend(struct dm_target *ti)
+{
+ struct dm_verity *v = ti->private;
+ flush_workqueue(v->verify_wq);
+ dm_bufio_client_reset(v->bufio);
+}
+
/*
* Status: V (valid) or C (corruption found)
*/
@@ -1766,6 +1773,7 @@ static struct target_type verity_target = {
.ctr = verity_ctr,
.dtr = verity_dtr,
.map = verity_map,
+ .postsuspend = verity_postsuspend,
.status = verity_status,
.prepare_ioctl = verity_prepare_ioctl,
.iterate_devices = verity_iterate_devices,
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 12ecf07a3841..4d1e42891d24 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1479,12 +1479,12 @@ static void setup_split_accounting(struct clone_info *ci, unsigned int len)
static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci,
struct dm_target *ti, unsigned int num_bios,
- unsigned *len, gfp_t gfp_flag)
+ unsigned *len)
{
struct bio *bio;
- int try = (gfp_flag & GFP_NOWAIT) ? 0 : 1;
+ int try;
- for (; try < 2; try++) {
+ for (try = 0; try < 2; try++) {
int bio_nr;
if (try && num_bios > 1)
@@ -1508,8 +1508,7 @@ static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci,
}
static unsigned int __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti,
- unsigned int num_bios, unsigned int *len,
- gfp_t gfp_flag)
+ unsigned int num_bios, unsigned int *len)
{
struct bio_list blist = BIO_EMPTY_LIST;
struct bio *clone;
@@ -1526,7 +1525,7 @@ static unsigned int __send_duplicate_bios(struct clone_info *ci, struct dm_targe
* Using alloc_multiple_bios(), even if num_bios is 1, to consistently
* support allocating using GFP_NOWAIT with GFP_NOIO fallback.
*/
- alloc_multiple_bios(&blist, ci, ti, num_bios, len, gfp_flag);
+ alloc_multiple_bios(&blist, ci, ti, num_bios, len);
while ((clone = bio_list_pop(&blist))) {
if (num_bios > 1)
dm_tio_set_flag(clone_to_tio(clone), DM_TIO_IS_DUPLICATE_BIO);
@@ -1564,7 +1563,7 @@ static void __send_empty_flush(struct clone_info *ci)
atomic_add(ti->num_flush_bios, &ci->io->io_count);
bios = __send_duplicate_bios(ci, ti, ti->num_flush_bios,
- NULL, GFP_NOWAIT);
+ NULL);
atomic_sub(ti->num_flush_bios - bios, &ci->io->io_count);
}
} else {
@@ -1612,7 +1611,7 @@ static void __send_abnormal_io(struct clone_info *ci, struct dm_target *ti,
__max_io_len(ti, ci->sector, max_granularity, max_sectors));
atomic_add(num_bios, &ci->io->io_count);
- bios = __send_duplicate_bios(ci, ti, num_bios, &len, GFP_NOIO);
+ bios = __send_duplicate_bios(ci, ti, num_bios, &len);
/*
* alloc_io() takes one extra reference for submission, so the
* reference won't reach 0 without the following (+1) subtraction
@@ -1746,6 +1745,9 @@ static blk_status_t __split_and_process_bio(struct clone_info *ci)
ci->submit_as_polled = !!(ci->bio->bi_opf & REQ_POLLED);
len = min_t(sector_t, max_io_len(ti, ci->sector), ci->sector_count);
+ if (ci->bio->bi_opf & REQ_ATOMIC && len != ci->sector_count)
+ return BLK_STS_IOERR;
+
setup_split_accounting(ci, len);
if (unlikely(ci->bio->bi_opf & REQ_NOWAIT)) {
@@ -1849,7 +1851,7 @@ static blk_status_t __send_zone_reset_all_emulated(struct clone_info *ci,
* not go crazy with the clone allocation.
*/
alloc_multiple_bios(&blist, ci, ti, min(nr_reset, 32),
- NULL, GFP_NOIO);
+ NULL);
}
/* Get a clone and change it to a regular reset operation. */
@@ -1881,7 +1883,7 @@ static void __send_zone_reset_all_native(struct clone_info *ci,
unsigned int bios;
atomic_add(1, &ci->io->io_count);
- bios = __send_duplicate_bios(ci, ti, 1, NULL, GFP_NOIO);
+ bios = __send_duplicate_bios(ci, ti, 1, NULL);
atomic_sub(1 - bios, &ci->io->io_count);
ci->sector_count = 0;
@@ -1969,6 +1971,15 @@ static void dm_split_and_process_bio(struct mapped_device *md,
/* Only support nowait for normal IO */
if (unlikely(bio->bi_opf & REQ_NOWAIT) && !is_abnormal) {
+ /*
+ * Don't support NOWAIT for FLUSH because it may allocate
+ * multiple bios and there's no easy way how to undo the
+ * allocations.
+ */
+ if (bio->bi_opf & REQ_PREFLUSH) {
+ bio_wouldblock_error(bio);
+ return;
+ }
io = alloc_io(md, bio, GFP_NOWAIT);
if (unlikely(!io)) {
/* Unable to do anything without dm_io. */
diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c
index 2e3087556adb..27409d05f053 100644
--- a/drivers/md/md-bitmap.c
+++ b/drivers/md/md-bitmap.c
@@ -426,8 +426,8 @@ static int __write_sb_page(struct md_rdev *rdev, struct bitmap *bitmap,
struct block_device *bdev;
struct mddev *mddev = bitmap->mddev;
struct bitmap_storage *store = &bitmap->storage;
- unsigned int bitmap_limit = (bitmap->storage.file_pages - pg_index) <<
- PAGE_SHIFT;
+ unsigned long num_pages = bitmap->storage.file_pages;
+ unsigned int bitmap_limit = (num_pages - pg_index % num_pages) << PAGE_SHIFT;
loff_t sboff, offset = mddev->bitmap_info.offset;
sector_t ps = pg_index * PAGE_SIZE / SECTOR_SIZE;
unsigned int size = PAGE_SIZE;
@@ -436,7 +436,7 @@ static int __write_sb_page(struct md_rdev *rdev, struct bitmap *bitmap,
bdev = (rdev->meta_bdev) ? rdev->meta_bdev : rdev->bdev;
/* we compare length (page numbers), not page offset. */
- if ((pg_index - store->sb_index) == store->file_pages - 1) {
+ if ((pg_index - store->sb_index) == num_pages - 1) {
unsigned int last_page_size = store->bytes & (PAGE_SIZE - 1);
if (last_page_size == 0)
@@ -682,7 +682,7 @@ static void bitmap_update_sb(void *data)
return;
if (!bitmap->storage.sb_page) /* no superblock */
return;
- sb = kmap_atomic(bitmap->storage.sb_page);
+ sb = kmap_local_page(bitmap->storage.sb_page);
sb->events = cpu_to_le64(bitmap->mddev->events);
if (bitmap->mddev->events < bitmap->events_cleared)
/* rocking back to read-only */
@@ -702,7 +702,7 @@ static void bitmap_update_sb(void *data)
sb->nodes = cpu_to_le32(bitmap->mddev->bitmap_info.nodes);
sb->sectors_reserved = cpu_to_le32(bitmap->mddev->
bitmap_info.space);
- kunmap_atomic(sb);
+ kunmap_local(sb);
if (bitmap->storage.file)
write_file_page(bitmap, bitmap->storage.sb_page, 1);
@@ -717,7 +717,7 @@ static void bitmap_print_sb(struct bitmap *bitmap)
if (!bitmap || !bitmap->storage.sb_page)
return;
- sb = kmap_atomic(bitmap->storage.sb_page);
+ sb = kmap_local_page(bitmap->storage.sb_page);
pr_debug("%s: bitmap file superblock:\n", bmname(bitmap));
pr_debug(" magic: %08x\n", le32_to_cpu(sb->magic));
pr_debug(" version: %u\n", le32_to_cpu(sb->version));
@@ -736,7 +736,7 @@ static void bitmap_print_sb(struct bitmap *bitmap)
pr_debug(" sync size: %llu KB\n",
(unsigned long long)le64_to_cpu(sb->sync_size)/2);
pr_debug("max write behind: %u\n", le32_to_cpu(sb->write_behind));
- kunmap_atomic(sb);
+ kunmap_local(sb);
}
/*
@@ -760,7 +760,7 @@ static int md_bitmap_new_disk_sb(struct bitmap *bitmap)
return -ENOMEM;
bitmap->storage.sb_index = 0;
- sb = kmap_atomic(bitmap->storage.sb_page);
+ sb = kmap_local_page(bitmap->storage.sb_page);
sb->magic = cpu_to_le32(BITMAP_MAGIC);
sb->version = cpu_to_le32(BITMAP_MAJOR_HI);
@@ -768,7 +768,7 @@ static int md_bitmap_new_disk_sb(struct bitmap *bitmap)
chunksize = bitmap->mddev->bitmap_info.chunksize;
BUG_ON(!chunksize);
if (!is_power_of_2(chunksize)) {
- kunmap_atomic(sb);
+ kunmap_local(sb);
pr_warn("bitmap chunksize not a power of 2\n");
return -EINVAL;
}
@@ -803,7 +803,7 @@ static int md_bitmap_new_disk_sb(struct bitmap *bitmap)
sb->events_cleared = cpu_to_le64(bitmap->mddev->events);
bitmap->mddev->bitmap_info.nodes = 0;
- kunmap_atomic(sb);
+ kunmap_local(sb);
return 0;
}
@@ -865,7 +865,7 @@ re_read:
return err;
err = -EINVAL;
- sb = kmap_atomic(sb_page);
+ sb = kmap_local_page(sb_page);
chunksize = le32_to_cpu(sb->chunksize);
daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ;
@@ -932,7 +932,7 @@ re_read:
err = 0;
out:
- kunmap_atomic(sb);
+ kunmap_local(sb);
if (err == 0 && nodes && (bitmap->cluster_slot < 0)) {
/* Assigning chunksize is required for "re_read" */
bitmap->mddev->bitmap_info.chunksize = chunksize;
@@ -1161,12 +1161,12 @@ static void md_bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
bit = file_page_offset(&bitmap->storage, chunk);
/* set the bit */
- kaddr = kmap_atomic(page);
+ kaddr = kmap_local_page(page);
if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
set_bit(bit, kaddr);
else
set_bit_le(bit, kaddr);
- kunmap_atomic(kaddr);
+ kunmap_local(kaddr);
pr_debug("set file bit %lu page %lu\n", bit, index);
/* record page number so it gets flushed to disk when unplug occurs */
set_page_attr(bitmap, index - node_offset, BITMAP_PAGE_DIRTY);
@@ -1190,12 +1190,12 @@ static void md_bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block)
if (!page)
return;
bit = file_page_offset(&bitmap->storage, chunk);
- paddr = kmap_atomic(page);
+ paddr = kmap_local_page(page);
if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
clear_bit(bit, paddr);
else
clear_bit_le(bit, paddr);
- kunmap_atomic(paddr);
+ kunmap_local(paddr);
if (!test_page_attr(bitmap, index - node_offset, BITMAP_PAGE_NEEDWRITE)) {
set_page_attr(bitmap, index - node_offset, BITMAP_PAGE_PENDING);
bitmap->allclean = 0;
@@ -1214,12 +1214,12 @@ static int md_bitmap_file_test_bit(struct bitmap *bitmap, sector_t block)
if (!page)
return -EINVAL;
bit = file_page_offset(&bitmap->storage, chunk);
- paddr = kmap_atomic(page);
+ paddr = kmap_local_page(page);
if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
set = test_bit(bit, paddr);
else
set = test_bit_le(bit, paddr);
- kunmap_atomic(paddr);
+ kunmap_local(paddr);
return set;
}
@@ -1388,9 +1388,9 @@ static int md_bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
* If the bitmap is out of date, dirty the whole page
* and write it out
*/
- paddr = kmap_atomic(page);
+ paddr = kmap_local_page(page);
memset(paddr + offset, 0xff, PAGE_SIZE - offset);
- kunmap_atomic(paddr);
+ kunmap_local(paddr);
filemap_write_page(bitmap, i, true);
if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) {
@@ -1406,12 +1406,12 @@ static int md_bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
void *paddr;
bool was_set;
- paddr = kmap_atomic(page);
+ paddr = kmap_local_page(page);
if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
was_set = test_bit(bit, paddr);
else
was_set = test_bit_le(bit, paddr);
- kunmap_atomic(paddr);
+ kunmap_local(paddr);
if (was_set) {
/* if the disk bit is set, set the memory bit */
@@ -1546,10 +1546,10 @@ static void bitmap_daemon_work(struct mddev *mddev)
bitmap_super_t *sb;
bitmap->need_sync = 0;
if (bitmap->storage.filemap) {
- sb = kmap_atomic(bitmap->storage.sb_page);
+ sb = kmap_local_page(bitmap->storage.sb_page);
sb->events_cleared =
cpu_to_le64(bitmap->events_cleared);
- kunmap_atomic(sb);
+ kunmap_local(sb);
set_page_attr(bitmap, 0,
BITMAP_PAGE_NEEDWRITE);
}
@@ -2355,9 +2355,8 @@ static int bitmap_get_stats(void *data, struct md_bitmap_stats *stats)
if (!bitmap)
return -ENOENT;
- if (bitmap->mddev->bitmap_info.external)
- return -ENOENT;
- if (!bitmap->storage.sb_page) /* no superblock */
+ if (!bitmap->mddev->bitmap_info.external &&
+ !bitmap->storage.sb_page)
return -EINVAL;
sb = kmap_local_page(bitmap->storage.sb_page);
stats->sync_size = le64_to_cpu(sb->sync_size);
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 465ca2af1e6e..ef859ccb0366 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -294,7 +294,7 @@ void mddev_destroy_serial_pool(struct mddev *mddev, struct md_rdev *rdev)
static struct ctl_table_header *raid_table_header;
-static struct ctl_table raid_table[] = {
+static const struct ctl_table raid_table[] = {
{
.procname = "speed_limit_min",
.data = &sysctl_speed_limit_min,
@@ -629,6 +629,12 @@ static void __mddev_put(struct mddev *mddev)
queue_work(md_misc_wq, &mddev->del_work);
}
+static void mddev_put_locked(struct mddev *mddev)
+{
+ if (atomic_dec_and_test(&mddev->active))
+ __mddev_put(mddev);
+}
+
void mddev_put(struct mddev *mddev)
{
if (!atomic_dec_and_lock(&mddev->active, &all_mddevs_lock))
@@ -1748,7 +1754,7 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
count <<= sb->bblog_shift;
if (bb + 1 == 0)
break;
- if (badblocks_set(&rdev->badblocks, sector, count, 1))
+ if (!badblocks_set(&rdev->badblocks, sector, count, 1))
return -EINVAL;
}
} else if (sb->bblog_offset != 0)
@@ -8461,9 +8467,7 @@ static int md_seq_show(struct seq_file *seq, void *v)
if (mddev == list_last_entry(&all_mddevs, struct mddev, all_mddevs))
status_unused(seq);
- if (atomic_dec_and_test(&mddev->active))
- __mddev_put(mddev);
-
+ mddev_put_locked(mddev);
return 0;
}
@@ -9460,6 +9464,13 @@ static bool md_choose_sync_action(struct mddev *mddev, int *spares)
return true;
}
+ /* Check if resync is in progress. */
+ if (mddev->recovery_cp < MaxSector) {
+ set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
+ clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
+ return true;
+ }
+
/*
* Remove any failed drives, then add spares if possible. Spares are
* also removed and re-added, to allow the personality to fail the
@@ -9476,13 +9487,6 @@ static bool md_choose_sync_action(struct mddev *mddev, int *spares)
return true;
}
- /* Check if recovery is in progress. */
- if (mddev->recovery_cp < MaxSector) {
- set_bit(MD_RECOVERY_SYNC, &mddev->recovery);
- clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
- return true;
- }
-
/* Delay to choose resync/check/repair in md_do_sync(). */
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery))
return true;
@@ -9846,7 +9850,6 @@ int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
int is_new)
{
struct mddev *mddev = rdev->mddev;
- int rv;
/*
* Recording new badblocks for faulty rdev will force unnecessary
@@ -9862,44 +9865,46 @@ int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
s += rdev->new_data_offset;
else
s += rdev->data_offset;
- rv = badblocks_set(&rdev->badblocks, s, sectors, 0);
- if (rv == 0) {
- /* Make sure they get written out promptly */
- if (test_bit(ExternalBbl, &rdev->flags))
- sysfs_notify_dirent_safe(rdev->sysfs_unack_badblocks);
- sysfs_notify_dirent_safe(rdev->sysfs_state);
- set_mask_bits(&mddev->sb_flags, 0,
- BIT(MD_SB_CHANGE_CLEAN) | BIT(MD_SB_CHANGE_PENDING));
- md_wakeup_thread(rdev->mddev->thread);
- return 1;
- } else
+
+ if (!badblocks_set(&rdev->badblocks, s, sectors, 0))
return 0;
+
+ /* Make sure they get written out promptly */
+ if (test_bit(ExternalBbl, &rdev->flags))
+ sysfs_notify_dirent_safe(rdev->sysfs_unack_badblocks);
+ sysfs_notify_dirent_safe(rdev->sysfs_state);
+ set_mask_bits(&mddev->sb_flags, 0,
+ BIT(MD_SB_CHANGE_CLEAN) | BIT(MD_SB_CHANGE_PENDING));
+ md_wakeup_thread(rdev->mddev->thread);
+ return 1;
}
EXPORT_SYMBOL_GPL(rdev_set_badblocks);
int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors,
int is_new)
{
- int rv;
if (is_new)
s += rdev->new_data_offset;
else
s += rdev->data_offset;
- rv = badblocks_clear(&rdev->badblocks, s, sectors);
- if ((rv == 0) && test_bit(ExternalBbl, &rdev->flags))
+
+ if (!badblocks_clear(&rdev->badblocks, s, sectors))
+ return 0;
+
+ if (test_bit(ExternalBbl, &rdev->flags))
sysfs_notify_dirent_safe(rdev->sysfs_badblocks);
- return rv;
+ return 1;
}
EXPORT_SYMBOL_GPL(rdev_clear_badblocks);
static int md_notify_reboot(struct notifier_block *this,
unsigned long code, void *x)
{
- struct mddev *mddev, *n;
+ struct mddev *mddev;
int need_delay = 0;
spin_lock(&all_mddevs_lock);
- list_for_each_entry_safe(mddev, n, &all_mddevs, all_mddevs) {
+ list_for_each_entry(mddev, &all_mddevs, all_mddevs) {
if (!mddev_get(mddev))
continue;
spin_unlock(&all_mddevs_lock);
@@ -9911,8 +9916,8 @@ static int md_notify_reboot(struct notifier_block *this,
mddev_unlock(mddev);
}
need_delay = 1;
- mddev_put(mddev);
spin_lock(&all_mddevs_lock);
+ mddev_put_locked(mddev);
}
spin_unlock(&all_mddevs_lock);
@@ -10245,7 +10250,7 @@ void md_autostart_arrays(int part)
static __exit void md_exit(void)
{
- struct mddev *mddev, *n;
+ struct mddev *mddev;
int delay = 1;
unregister_blkdev(MD_MAJOR,"md");
@@ -10266,7 +10271,7 @@ static __exit void md_exit(void)
remove_proc_entry("mdstat", NULL);
spin_lock(&all_mddevs_lock);
- list_for_each_entry_safe(mddev, n, &all_mddevs, all_mddevs) {
+ list_for_each_entry(mddev, &all_mddevs, all_mddevs) {
if (!mddev_get(mddev))
continue;
spin_unlock(&all_mddevs_lock);
@@ -10278,8 +10283,8 @@ static __exit void md_exit(void)
* the mddev for destruction by a workqueue, and the
* destroy_workqueue() below will wait for that to complete.
*/
- mddev_put(mddev);
spin_lock(&all_mddevs_lock);
+ mddev_put_locked(mddev);
}
spin_unlock(&all_mddevs_lock);
diff --git a/drivers/md/md.h b/drivers/md/md.h
index def808064ad8..cc31c795369d 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -266,8 +266,8 @@ enum flag_bits {
Nonrot, /* non-rotational device (SSD) */
};
-static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors,
- sector_t *first_bad, int *bad_sectors)
+static inline int is_badblock(struct md_rdev *rdev, sector_t s, sector_t sectors,
+ sector_t *first_bad, sector_t *bad_sectors)
{
if (unlikely(rdev->badblocks.count)) {
int rv = badblocks_check(&rdev->badblocks, rdev->data_offset + s,
@@ -284,7 +284,7 @@ static inline int rdev_has_badblock(struct md_rdev *rdev, sector_t s,
int sectors)
{
sector_t first_bad;
- int bad_sectors;
+ sector_t bad_sectors;
return is_badblock(rdev, s, sectors, &first_bad, &bad_sectors);
}
diff --git a/drivers/md/persistent-data/dm-transaction-manager.c b/drivers/md/persistent-data/dm-transaction-manager.c
index c7ba4e6cbbc7..98c745d90f48 100644
--- a/drivers/md/persistent-data/dm-transaction-manager.c
+++ b/drivers/md/persistent-data/dm-transaction-manager.c
@@ -13,6 +13,7 @@
#include <linux/export.h>
#include <linux/mutex.h>
#include <linux/hash.h>
+#include <linux/rbtree.h>
#include <linux/slab.h>
#include <linux/device-mapper.h>
@@ -77,7 +78,7 @@ static void prefetch_issue(struct prefetch_set *p, struct dm_block_manager *bm)
/*----------------------------------------------------------------*/
struct shadow_info {
- struct hlist_node hlist;
+ struct rb_node node;
dm_block_t where;
};
@@ -95,7 +96,7 @@ struct dm_transaction_manager {
struct dm_space_map *sm;
spinlock_t lock;
- struct hlist_head buckets[DM_HASH_SIZE];
+ struct rb_root buckets[DM_HASH_SIZE];
struct prefetch_set prefetches;
};
@@ -106,14 +107,22 @@ static int is_shadow(struct dm_transaction_manager *tm, dm_block_t b)
{
int r = 0;
unsigned int bucket = dm_hash_block(b, DM_HASH_MASK);
- struct shadow_info *si;
+ struct rb_node **node;
spin_lock(&tm->lock);
- hlist_for_each_entry(si, tm->buckets + bucket, hlist)
- if (si->where == b) {
+ node = &tm->buckets[bucket].rb_node;
+ while (*node) {
+ struct shadow_info *si =
+ rb_entry(*node, struct shadow_info, node);
+ if (b == si->where) {
r = 1;
break;
}
+ if (b < si->where)
+ node = &si->node.rb_left;
+ else
+ node = &si->node.rb_right;
+ }
spin_unlock(&tm->lock);
return r;
@@ -130,30 +139,41 @@ static void insert_shadow(struct dm_transaction_manager *tm, dm_block_t b)
si = kmalloc(sizeof(*si), GFP_NOIO);
if (si) {
+ struct rb_node **node, *parent;
si->where = b;
bucket = dm_hash_block(b, DM_HASH_MASK);
+
spin_lock(&tm->lock);
- hlist_add_head(&si->hlist, tm->buckets + bucket);
+ node = &tm->buckets[bucket].rb_node;
+ parent = NULL;
+ while (*node) {
+ struct shadow_info *si =
+ rb_entry(*node, struct shadow_info, node);
+ parent = *node;
+ if (b < si->where)
+ node = &si->node.rb_left;
+ else
+ node = &si->node.rb_right;
+ }
+ rb_link_node(&si->node, parent, node);
+ rb_insert_color(&si->node, &tm->buckets[bucket]);
spin_unlock(&tm->lock);
}
}
static void wipe_shadow_table(struct dm_transaction_manager *tm)
{
- struct shadow_info *si;
- struct hlist_node *tmp;
- struct hlist_head *bucket;
- int i;
+ unsigned int i;
spin_lock(&tm->lock);
for (i = 0; i < DM_HASH_SIZE; i++) {
- bucket = tm->buckets + i;
- hlist_for_each_entry_safe(si, tmp, bucket, hlist)
+ while (!RB_EMPTY_ROOT(&tm->buckets[i])) {
+ struct shadow_info *si =
+ rb_entry(tm->buckets[i].rb_node, struct shadow_info, node);
+ rb_erase(&si->node, &tm->buckets[i]);
kfree(si);
-
- INIT_HLIST_HEAD(bucket);
+ }
}
-
spin_unlock(&tm->lock);
}
@@ -162,7 +182,7 @@ static void wipe_shadow_table(struct dm_transaction_manager *tm)
static struct dm_transaction_manager *dm_tm_create(struct dm_block_manager *bm,
struct dm_space_map *sm)
{
- int i;
+ unsigned int i;
struct dm_transaction_manager *tm;
tm = kmalloc(sizeof(*tm), GFP_KERNEL);
@@ -176,7 +196,7 @@ static struct dm_transaction_manager *dm_tm_create(struct dm_block_manager *bm,
spin_lock_init(&tm->lock);
for (i = 0; i < DM_HASH_SIZE; i++)
- INIT_HLIST_HEAD(tm->buckets + i);
+ tm->buckets[i] = RB_ROOT;
prefetch_init(&tm->prefetches);
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index e8802309ed60..70bcc3cdf2cd 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -384,7 +384,7 @@ static int raid0_set_limits(struct mddev *mddev)
lim.max_write_zeroes_sectors = mddev->chunk_sectors;
lim.io_min = mddev->chunk_sectors << 9;
lim.io_opt = lim.io_min * mddev->raid_disks;
- lim.features |= BLK_FEAT_ATOMIC_WRITES_STACKED;
+ lim.features |= BLK_FEAT_ATOMIC_WRITES;
err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY);
if (err)
return err;
diff --git a/drivers/md/raid1-10.c b/drivers/md/raid1-10.c
index 4378d3250bd7..62b980b12f93 100644
--- a/drivers/md/raid1-10.c
+++ b/drivers/md/raid1-10.c
@@ -247,7 +247,7 @@ static inline int raid1_check_read_range(struct md_rdev *rdev,
sector_t this_sector, int *len)
{
sector_t first_bad;
- int bad_sectors;
+ sector_t bad_sectors;
/* no bad block overlap */
if (!is_badblock(rdev, this_sector, *len, &first_bad, &bad_sectors))
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 3c75a69376f4..15829ab192d2 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -45,6 +45,7 @@
static void allow_barrier(struct r1conf *conf, sector_t sector_nr);
static void lower_barrier(struct r1conf *conf, sector_t sector_nr);
+static void raid1_free(struct mddev *mddev, void *priv);
#define RAID_1_10_NAME "raid1"
#include "raid1-10.c"
@@ -1315,8 +1316,6 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
struct r1conf *conf = mddev->private;
struct raid1_info *mirror;
struct bio *read_bio;
- const enum req_op op = bio_op(bio);
- const blk_opf_t do_sync = bio->bi_opf & REQ_SYNC;
int max_sectors;
int rdisk, error;
bool r1bio_existed = !!r1_bio;
@@ -1404,7 +1403,6 @@ static void raid1_read_request(struct mddev *mddev, struct bio *bio,
read_bio->bi_iter.bi_sector = r1_bio->sector +
mirror->rdev->data_offset;
read_bio->bi_end_io = raid1_end_read_request;
- read_bio->bi_opf = op | do_sync;
if (test_bit(FailFast, &mirror->rdev->flags) &&
test_bit(R1BIO_FailFast, &r1_bio->state))
read_bio->bi_opf |= MD_FAILFAST;
@@ -1537,7 +1535,7 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
atomic_inc(&rdev->nr_pending);
if (test_bit(WriteErrorSeen, &rdev->flags)) {
sector_t first_bad;
- int bad_sectors;
+ sector_t bad_sectors;
int is_bad;
is_bad = is_badblock(rdev, r1_bio->sector, max_sectors,
@@ -1653,8 +1651,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
mbio->bi_iter.bi_sector = (r1_bio->sector + rdev->data_offset);
mbio->bi_end_io = raid1_end_write_request;
- mbio->bi_opf = bio_op(bio) |
- (bio->bi_opf & (REQ_SYNC | REQ_FUA | REQ_ATOMIC));
if (test_bit(FailFast, &rdev->flags) &&
!test_bit(WriteMostly, &rdev->flags) &&
conf->raid_disks - mddev->degraded > 1)
@@ -2886,7 +2882,7 @@ static sector_t raid1_sync_request(struct mddev *mddev, sector_t sector_nr,
} else {
/* may need to read from here */
sector_t first_bad = MaxSector;
- int bad_sectors;
+ sector_t bad_sectors;
if (is_badblock(rdev, sector_nr, good_sectors,
&first_bad, &bad_sectors)) {
@@ -3217,7 +3213,7 @@ static int raid1_set_limits(struct mddev *mddev)
md_init_stacking_limits(&lim);
lim.max_write_zeroes_sectors = 0;
- lim.features |= BLK_FEAT_ATOMIC_WRITES_STACKED;
+ lim.features |= BLK_FEAT_ATOMIC_WRITES;
err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY);
if (err)
return err;
@@ -3256,8 +3252,11 @@ static int raid1_run(struct mddev *mddev)
if (!mddev_is_dm(mddev)) {
ret = raid1_set_limits(mddev);
- if (ret)
+ if (ret) {
+ if (!mddev->private)
+ raid1_free(mddev, conf);
return ret;
+ }
}
mddev->degraded = 0;
@@ -3271,6 +3270,8 @@ static int raid1_run(struct mddev *mddev)
*/
if (conf->raid_disks - mddev->degraded < 1) {
md_unregister_thread(mddev, &conf->thread);
+ if (!mddev->private)
+ raid1_free(mddev, conf);
return -EINVAL;
}
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 8b736f30ef92..76a75925b713 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -747,7 +747,7 @@ static struct md_rdev *read_balance(struct r10conf *conf,
for (slot = 0; slot < conf->copies ; slot++) {
sector_t first_bad;
- int bad_sectors;
+ sector_t bad_sectors;
sector_t dev_sector;
unsigned int pending;
bool nonrot;
@@ -1146,8 +1146,6 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
{
struct r10conf *conf = mddev->private;
struct bio *read_bio;
- const enum req_op op = bio_op(bio);
- const blk_opf_t do_sync = bio->bi_opf & REQ_SYNC;
int max_sectors;
struct md_rdev *rdev;
char b[BDEVNAME_SIZE];
@@ -1228,7 +1226,6 @@ static void raid10_read_request(struct mddev *mddev, struct bio *bio,
read_bio->bi_iter.bi_sector = r10_bio->devs[slot].addr +
choose_data_offset(r10_bio, rdev);
read_bio->bi_end_io = raid10_end_read_request;
- read_bio->bi_opf = op | do_sync;
if (test_bit(FailFast, &rdev->flags) &&
test_bit(R10BIO_FailFast, &r10_bio->state))
read_bio->bi_opf |= MD_FAILFAST;
@@ -1247,10 +1244,6 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
struct bio *bio, bool replacement,
int n_copy)
{
- const enum req_op op = bio_op(bio);
- const blk_opf_t do_sync = bio->bi_opf & REQ_SYNC;
- const blk_opf_t do_fua = bio->bi_opf & REQ_FUA;
- const blk_opf_t do_atomic = bio->bi_opf & REQ_ATOMIC;
unsigned long flags;
struct r10conf *conf = mddev->private;
struct md_rdev *rdev;
@@ -1269,7 +1262,6 @@ static void raid10_write_one_disk(struct mddev *mddev, struct r10bio *r10_bio,
mbio->bi_iter.bi_sector = (r10_bio->devs[n_copy].addr +
choose_data_offset(r10_bio, rdev));
mbio->bi_end_io = raid10_end_write_request;
- mbio->bi_opf = op | do_sync | do_fua | do_atomic;
if (!replacement && test_bit(FailFast,
&conf->mirrors[devnum].rdev->flags)
&& enough(conf, devnum))
@@ -1438,7 +1430,7 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
if (rdev && test_bit(WriteErrorSeen, &rdev->flags)) {
sector_t first_bad;
sector_t dev_sector = r10_bio->devs[i].addr;
- int bad_sectors;
+ sector_t bad_sectors;
int is_bad;
is_bad = is_badblock(rdev, dev_sector, max_sectors,
@@ -1631,11 +1623,10 @@ static int raid10_handle_discard(struct mddev *mddev, struct bio *bio)
if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
return -EAGAIN;
- if (WARN_ON_ONCE(bio->bi_opf & REQ_NOWAIT)) {
+ if (!wait_barrier(conf, bio->bi_opf & REQ_NOWAIT)) {
bio_wouldblock_error(bio);
return 0;
}
- wait_barrier(conf, false);
/*
* Check reshape again to avoid reshape happens after checking
@@ -1743,6 +1734,7 @@ retry_discard:
* The discard bio returns only first r10bio finishes
*/
if (first_copy) {
+ md_account_bio(mddev, &bio);
r10_bio->master_bio = bio;
set_bit(R10BIO_Discard, &r10_bio->state);
first_copy = false;
@@ -3413,7 +3405,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
sector_t from_addr, to_addr;
struct md_rdev *rdev = conf->mirrors[d].rdev;
sector_t sector, first_bad;
- int bad_sectors;
+ sector_t bad_sectors;
if (!rdev ||
!test_bit(In_sync, &rdev->flags))
continue;
@@ -3609,7 +3601,7 @@ static sector_t raid10_sync_request(struct mddev *mddev, sector_t sector_nr,
for (i = 0; i < conf->copies; i++) {
int d = r10_bio->devs[i].devnum;
sector_t first_bad, sector;
- int bad_sectors;
+ sector_t bad_sectors;
struct md_rdev *rdev;
if (r10_bio->devs[i].repl_bio)
@@ -4018,7 +4010,7 @@ static int raid10_set_queue_limits(struct mddev *mddev)
lim.max_write_zeroes_sectors = 0;
lim.io_min = mddev->chunk_sectors << 9;
lim.io_opt = lim.io_min * raid10_nr_stripes(conf);
- lim.features |= BLK_FEAT_ATOMIC_WRITES_STACKED;
+ lim.features |= BLK_FEAT_ATOMIC_WRITES;
err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY);
if (err)
return err;
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index 011246e16a99..e530271cb86b 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -1019,10 +1019,10 @@ int r5l_write_stripe(struct r5l_log *log, struct stripe_head *sh)
/* checksum is already calculated in last run */
if (test_bit(STRIPE_LOG_TRAPPED, &sh->state))
continue;
- addr = kmap_atomic(sh->dev[i].page);
+ addr = kmap_local_page(sh->dev[i].page);
sh->dev[i].log_checksum = crc32c_le(log->uuid_checksum,
addr, PAGE_SIZE);
- kunmap_atomic(addr);
+ kunmap_local(addr);
}
parity_pages = 1 + !!(sh->qd_idx >= 0);
data_pages = write_disks - parity_pages;
@@ -1975,9 +1975,9 @@ r5l_recovery_verify_data_checksum(struct r5l_log *log,
u32 checksum;
r5l_recovery_read_page(log, ctx, page, log_offset);
- addr = kmap_atomic(page);
+ addr = kmap_local_page(page);
checksum = crc32c_le(log->uuid_checksum, addr, PAGE_SIZE);
- kunmap_atomic(addr);
+ kunmap_local(addr);
return (le32_to_cpu(log_checksum) == checksum) ? 0 : -EINVAL;
}
@@ -2377,11 +2377,11 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
payload->size = cpu_to_le32(BLOCK_SECTORS);
payload->location = cpu_to_le64(
raid5_compute_blocknr(sh, i, 0));
- addr = kmap_atomic(dev->page);
+ addr = kmap_local_page(dev->page);
payload->checksum[0] = cpu_to_le32(
crc32c_le(log->uuid_checksum, addr,
PAGE_SIZE));
- kunmap_atomic(addr);
+ kunmap_local(addr);
sync_page_io(log->rdev, write_pos, PAGE_SIZE,
dev->page, REQ_OP_WRITE, false);
write_pos = r5l_ring_add(log, write_pos,
@@ -2884,10 +2884,10 @@ int r5c_cache_data(struct r5l_log *log, struct stripe_head *sh)
if (!test_bit(R5_Wantwrite, &sh->dev[i].flags))
continue;
- addr = kmap_atomic(sh->dev[i].page);
+ addr = kmap_local_page(sh->dev[i].page);
sh->dev[i].log_checksum = crc32c_le(log->uuid_checksum,
addr, PAGE_SIZE);
- kunmap_atomic(addr);
+ kunmap_local(addr);
pages++;
}
WARN_ON(pages == 0);