summaryrefslogtreecommitdiff
path: root/drivers/md
diff options
context:
space:
mode:
authorDmitry Torokhov <dmitry.torokhov@gmail.com>2025-02-26 03:03:25 +0300
committerDmitry Torokhov <dmitry.torokhov@gmail.com>2025-02-26 03:03:25 +0300
commit0b119045b79a672bc6d8f18641c60fc8ce1b4585 (patch)
tree69c63ecfec55b9576c34dc742e0c38f46f8a317a /drivers/md
parent7f7573bd4f37d4edc168c5b5def0bc2a1951c657 (diff)
parentd082ecbc71e9e0bf49883ee4afd435a77a5101b6 (diff)
downloadlinux-0b119045b79a672bc6d8f18641c60fc8ce1b4585.tar.xz
Merge tag 'v6.14-rc4' into next
Sync up with the mainline.
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/Kconfig13
-rw-r--r--drivers/md/Makefile2
-rw-r--r--drivers/md/bcache/movinggc.c2
-rw-r--r--drivers/md/bcache/writeback.c2
-rw-r--r--drivers/md/dm-crypt.c42
-rw-r--r--drivers/md/dm-ebs-target.c2
-rw-r--r--drivers/md/dm-io.c1
-rw-r--r--drivers/md/dm-linear.c5
-rw-r--r--drivers/md/dm-ps-io-affinity.c2
-rw-r--r--drivers/md/dm-raid.c2
-rw-r--r--drivers/md/dm-raid1.c5
-rw-r--r--drivers/md/dm-rq.c2
-rw-r--r--drivers/md/dm-stripe.c5
-rw-r--r--drivers/md/dm-table.c29
-rw-r--r--drivers/md/dm-thin.c5
-rw-r--r--drivers/md/dm-verity-fec.c61
-rw-r--r--drivers/md/dm-verity-target.c4
-rw-r--r--drivers/md/dm.c31
-rw-r--r--drivers/md/md-autodetect.c8
-rw-r--r--drivers/md/md-bitmap.c121
-rw-r--r--drivers/md/md-bitmap.h7
-rw-r--r--drivers/md/md-linear.c352
-rw-r--r--drivers/md/md.c38
-rw-r--r--drivers/md/md.h5
-rw-r--r--drivers/md/persistent-data/dm-array.c19
-rw-r--r--drivers/md/persistent-data/dm-transaction-manager.c54
-rw-r--r--drivers/md/raid0.c6
-rw-r--r--drivers/md/raid1.c40
-rw-r--r--drivers/md/raid1.h1
-rw-r--r--drivers/md/raid10.c32
-rw-r--r--drivers/md/raid10.h1
-rw-r--r--drivers/md/raid5-cache.c20
-rw-r--r--drivers/md/raid5.c111
-rw-r--r--drivers/md/raid5.h4
34 files changed, 734 insertions, 300 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig
index 1e9db8e4acdf..0b1870a09e1f 100644
--- a/drivers/md/Kconfig
+++ b/drivers/md/Kconfig
@@ -61,6 +61,19 @@ config MD_BITMAP_FILE
various kernel APIs and can only work with files on a file system not
actually sitting on the MD device.
+config MD_LINEAR
+ tristate "Linear (append) mode"
+ depends on BLK_DEV_MD
+ help
+ If you say Y here, then your multiple devices driver will be able to
+ use the so-called linear mode, i.e. it will combine the hard disk
+ partitions by simply appending one to the other.
+
+ To compile this as a module, choose M here: the module
+ will be called linear.
+
+ If unsure, say Y.
+
config MD_RAID0
tristate "RAID-0 (striping) mode"
depends on BLK_DEV_MD
diff --git a/drivers/md/Makefile b/drivers/md/Makefile
index 476a214e4bdc..87bdfc9fe14c 100644
--- a/drivers/md/Makefile
+++ b/drivers/md/Makefile
@@ -29,12 +29,14 @@ dm-zoned-y += dm-zoned-target.o dm-zoned-metadata.o dm-zoned-reclaim.o
md-mod-y += md.o md-bitmap.o
raid456-y += raid5.o raid5-cache.o raid5-ppl.o
+linear-y += md-linear.o
# Note: link order is important. All raid personalities
# and must come before md.o, as they each initialise
# themselves, and md.o may use the personalities when it
# auto-initialised.
+obj-$(CONFIG_MD_LINEAR) += linear.o
obj-$(CONFIG_MD_RAID0) += raid0.o
obj-$(CONFIG_MD_RAID1) += raid1.o
obj-$(CONFIG_MD_RAID10) += raid10.o
diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c
index ef6abf33f926..45ca134cbf02 100644
--- a/drivers/md/bcache/movinggc.c
+++ b/drivers/md/bcache/movinggc.c
@@ -82,7 +82,7 @@ static void moving_init(struct moving_io *io)
bio_init(bio, NULL, bio->bi_inline_vecs,
DIV_ROUND_UP(KEY_SIZE(&io->w->key), PAGE_SECTORS), 0);
bio_get(bio);
- bio_set_prio(bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
+ bio->bi_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0);
bio->bi_iter.bi_size = KEY_SIZE(&io->w->key) << 9;
bio->bi_private = &io->cl;
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index c1d28e365910..453efbbdc8ee 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -334,7 +334,7 @@ static void dirty_init(struct keybuf_key *w)
bio_init(bio, NULL, bio->bi_inline_vecs,
DIV_ROUND_UP(KEY_SIZE(&w->key), PAGE_SECTORS), 0);
if (!io->dc->writeback_percent)
- bio_set_prio(bio, IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0));
+ bio->bi_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_IDLE, 0);
bio->bi_iter.bi_size = KEY_SIZE(&w->key) << 9;
bio->bi_private = w;
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 1ae2c71bb383..02a2919f4e5a 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -59,6 +59,7 @@ struct convert_context {
struct bio *bio_out;
struct bvec_iter iter_out;
atomic_t cc_pending;
+ unsigned int tag_offset;
u64 cc_sector;
union {
struct skcipher_request *req;
@@ -1187,7 +1188,7 @@ static int dm_crypt_integrity_io_alloc(struct dm_crypt_io *io, struct bio *bio)
tag_len = io->cc->tuple_size * (bio_sectors(bio) >> io->cc->sector_shift);
- bip->bip_iter.bi_sector = io->cc->start + io->sector;
+ bip->bip_iter.bi_sector = bio->bi_iter.bi_sector;
ret = bio_integrity_add_page(bio, virt_to_page(io->integrity_metadata),
tag_len, offset_in_page(io->integrity_metadata));
@@ -1256,6 +1257,7 @@ static void crypt_convert_init(struct crypt_config *cc,
if (bio_out)
ctx->iter_out = bio_out->bi_iter;
ctx->cc_sector = sector + cc->iv_offset;
+ ctx->tag_offset = 0;
init_completion(&ctx->restart);
}
@@ -1588,7 +1590,6 @@ static void crypt_free_req(struct crypt_config *cc, void *req, struct bio *base_
static blk_status_t crypt_convert(struct crypt_config *cc,
struct convert_context *ctx, bool atomic, bool reset_pending)
{
- unsigned int tag_offset = 0;
unsigned int sector_step = cc->sector_size >> SECTOR_SHIFT;
int r;
@@ -1611,9 +1612,9 @@ static blk_status_t crypt_convert(struct crypt_config *cc,
atomic_inc(&ctx->cc_pending);
if (crypt_integrity_aead(cc))
- r = crypt_convert_block_aead(cc, ctx, ctx->r.req_aead, tag_offset);
+ r = crypt_convert_block_aead(cc, ctx, ctx->r.req_aead, ctx->tag_offset);
else
- r = crypt_convert_block_skcipher(cc, ctx, ctx->r.req, tag_offset);
+ r = crypt_convert_block_skcipher(cc, ctx, ctx->r.req, ctx->tag_offset);
switch (r) {
/*
@@ -1633,8 +1634,8 @@ static blk_status_t crypt_convert(struct crypt_config *cc,
* exit and continue processing in a workqueue
*/
ctx->r.req = NULL;
+ ctx->tag_offset++;
ctx->cc_sector += sector_step;
- tag_offset++;
return BLK_STS_DEV_RESOURCE;
}
} else {
@@ -1648,8 +1649,8 @@ static blk_status_t crypt_convert(struct crypt_config *cc,
*/
case -EINPROGRESS:
ctx->r.req = NULL;
+ ctx->tag_offset++;
ctx->cc_sector += sector_step;
- tag_offset++;
continue;
/*
* The request was already processed (synchronously).
@@ -1657,7 +1658,7 @@ static blk_status_t crypt_convert(struct crypt_config *cc,
case 0:
atomic_dec(&ctx->cc_pending);
ctx->cc_sector += sector_step;
- tag_offset++;
+ ctx->tag_offset++;
if (!atomic)
cond_resched();
continue;
@@ -1719,6 +1720,7 @@ retry:
clone->bi_private = io;
clone->bi_end_io = crypt_endio;
clone->bi_ioprio = io->base_bio->bi_ioprio;
+ clone->bi_iter.bi_sector = cc->start + io->sector;
remaining_size = size;
@@ -1909,7 +1911,6 @@ static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp)
crypt_dec_pending(io);
return 1;
}
- clone->bi_iter.bi_sector = cc->start + io->sector;
crypt_convert_init(cc, &io->ctx, clone, clone, io->sector);
io->saved_bi_iter = clone->bi_iter;
dm_submit_bio_remap(io->base_bio, clone);
@@ -1925,13 +1926,13 @@ static int kcryptd_io_read(struct dm_crypt_io *io, gfp_t gfp)
clone = bio_alloc_clone(cc->dev->bdev, io->base_bio, gfp, &cc->bs);
if (!clone)
return 1;
+
+ clone->bi_iter.bi_sector = cc->start + io->sector;
clone->bi_private = io;
clone->bi_end_io = crypt_endio;
crypt_inc_pending(io);
- clone->bi_iter.bi_sector = cc->start + io->sector;
-
if (dm_crypt_integrity_io_alloc(io, clone)) {
crypt_dec_pending(io);
bio_put(clone);
@@ -2039,8 +2040,6 @@ static void kcryptd_crypt_write_io_submit(struct dm_crypt_io *io, int async)
/* crypt_convert should have filled the clone bio */
BUG_ON(io->ctx.iter_out.bi_size);
- clone->bi_iter.bi_sector = cc->start + io->sector;
-
if ((likely(!async) && test_bit(DM_CRYPT_NO_OFFLOAD, &cc->flags)) ||
test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags)) {
dm_submit_bio_remap(io->base_bio, clone);
@@ -2092,13 +2091,12 @@ static void kcryptd_crypt_write_continue(struct work_struct *work)
struct crypt_config *cc = io->cc;
struct convert_context *ctx = &io->ctx;
int crypt_finished;
- sector_t sector = io->sector;
blk_status_t r;
wait_for_completion(&ctx->restart);
reinit_completion(&ctx->restart);
- r = crypt_convert(cc, &io->ctx, true, false);
+ r = crypt_convert(cc, &io->ctx, false, false);
if (r)
io->error = r;
crypt_finished = atomic_dec_and_test(&ctx->cc_pending);
@@ -2109,10 +2107,8 @@ static void kcryptd_crypt_write_continue(struct work_struct *work)
}
/* Encryption was already finished, submit io now */
- if (crypt_finished) {
+ if (crypt_finished)
kcryptd_crypt_write_io_submit(io, 0);
- io->sector = sector;
- }
crypt_dec_pending(io);
}
@@ -2123,14 +2119,13 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
struct convert_context *ctx = &io->ctx;
struct bio *clone;
int crypt_finished;
- sector_t sector = io->sector;
blk_status_t r;
/*
* Prevent io from disappearing until this function completes.
*/
crypt_inc_pending(io);
- crypt_convert_init(cc, ctx, NULL, io->base_bio, sector);
+ crypt_convert_init(cc, ctx, NULL, io->base_bio, io->sector);
clone = crypt_alloc_buffer(io, io->base_bio->bi_iter.bi_size);
if (unlikely(!clone)) {
@@ -2147,8 +2142,6 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
io->ctx.iter_in = clone->bi_iter;
}
- sector += bio_sectors(clone);
-
crypt_inc_pending(io);
r = crypt_convert(cc, ctx,
test_bit(DM_CRYPT_NO_WRITE_WORKQUEUE, &cc->flags), true);
@@ -2172,10 +2165,8 @@ static void kcryptd_crypt_write_convert(struct dm_crypt_io *io)
}
/* Encryption was already finished, submit io now */
- if (crypt_finished) {
+ if (crypt_finished)
kcryptd_crypt_write_io_submit(io, 0);
- io->sector = sector;
- }
dec:
crypt_dec_pending(io);
@@ -2203,7 +2194,7 @@ static void kcryptd_crypt_read_continue(struct work_struct *work)
wait_for_completion(&io->ctx.restart);
reinit_completion(&io->ctx.restart);
- r = crypt_convert(cc, &io->ctx, true, false);
+ r = crypt_convert(cc, &io->ctx, false, false);
if (r)
io->error = r;
@@ -2221,7 +2212,6 @@ static void kcryptd_crypt_read_convert(struct dm_crypt_io *io)
crypt_inc_pending(io);
if (io->ctx.aead_recheck) {
- io->ctx.cc_sector = io->sector + cc->iv_offset;
r = crypt_convert(cc, &io->ctx,
test_bit(DM_CRYPT_NO_READ_WORKQUEUE, &cc->flags), true);
} else {
diff --git a/drivers/md/dm-ebs-target.c b/drivers/md/dm-ebs-target.c
index ec5db1478b2f..18ae45dcbfb2 100644
--- a/drivers/md/dm-ebs-target.c
+++ b/drivers/md/dm-ebs-target.c
@@ -442,7 +442,7 @@ static int ebs_iterate_devices(struct dm_target *ti,
static struct target_type ebs_target = {
.name = "ebs",
.version = {1, 0, 1},
- .features = DM_TARGET_PASSES_INTEGRITY,
+ .features = 0,
.module = THIS_MODULE,
.ctr = ebs_ctr,
.dtr = ebs_dtr,
diff --git a/drivers/md/dm-io.c b/drivers/md/dm-io.c
index d7a8e2f40db3..c37668790577 100644
--- a/drivers/md/dm-io.c
+++ b/drivers/md/dm-io.c
@@ -379,6 +379,7 @@ static void do_region(const blk_opf_t opf, unsigned int region,
atomic_inc(&io->count);
submit_bio(bio);
+ WARN_ON_ONCE(opf & REQ_ATOMIC && remaining);
} while (remaining);
}
diff --git a/drivers/md/dm-linear.c b/drivers/md/dm-linear.c
index 49fb0f684193..66318aba4bdb 100644
--- a/drivers/md/dm-linear.c
+++ b/drivers/md/dm-linear.c
@@ -199,9 +199,10 @@ static size_t linear_dax_recovery_write(struct dm_target *ti, pgoff_t pgoff,
static struct target_type linear_target = {
.name = "linear",
- .version = {1, 4, 0},
+ .version = {1, 5, 0},
.features = DM_TARGET_PASSES_INTEGRITY | DM_TARGET_NOWAIT |
- DM_TARGET_ZONED_HM | DM_TARGET_PASSES_CRYPTO,
+ DM_TARGET_ZONED_HM | DM_TARGET_PASSES_CRYPTO |
+ DM_TARGET_ATOMIC_WRITES,
.report_zones = linear_report_zones,
.module = THIS_MODULE,
.ctr = linear_ctr,
diff --git a/drivers/md/dm-ps-io-affinity.c b/drivers/md/dm-ps-io-affinity.c
index 461ee6b2044d..716807e511ee 100644
--- a/drivers/md/dm-ps-io-affinity.c
+++ b/drivers/md/dm-ps-io-affinity.c
@@ -116,7 +116,7 @@ static int ioa_create(struct path_selector *ps, unsigned int argc, char **argv)
if (!s)
return -ENOMEM;
- s->path_map = kzalloc(nr_cpu_ids * sizeof(struct path_info *),
+ s->path_map = kcalloc(nr_cpu_ids, sizeof(struct path_info *),
GFP_KERNEL);
if (!s->path_map)
goto free_selector;
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 1e0d3b9b75d6..6adc55fd90d3 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -3196,7 +3196,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv)
if (reshape_sectors || rs_is_raid1(rs)) {
/*
* We can only prepare for a reshape here, because the
- * raid set needs to run to provide the repective reshape
+ * raid set needs to run to provide the respective reshape
* check functions via its MD personality instance.
*
* So do the reshape check after md_run() succeeded.
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 9511dae5b556..8c6f1f7e6456 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -656,7 +656,7 @@ static void do_write(struct mirror_set *ms, struct bio *bio)
unsigned int i;
struct dm_io_region io[MAX_NR_MIRRORS], *dest = io;
struct mirror *m;
- blk_opf_t op_flags = bio->bi_opf & (REQ_FUA | REQ_PREFLUSH);
+ blk_opf_t op_flags = bio->bi_opf & (REQ_FUA | REQ_PREFLUSH | REQ_ATOMIC);
struct dm_io_request io_req = {
.bi_opf = REQ_OP_WRITE | op_flags,
.mem.type = DM_IO_BIO,
@@ -1483,8 +1483,9 @@ static int mirror_iterate_devices(struct dm_target *ti,
static struct target_type mirror_target = {
.name = "mirror",
- .version = {1, 14, 0},
+ .version = {1, 15, 0},
.module = THIS_MODULE,
+ .features = DM_TARGET_ATOMIC_WRITES,
.ctr = mirror_ctr,
.dtr = mirror_dtr,
.map = mirror_map,
diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c
index 499f8cc8a39f..e23076f7ece2 100644
--- a/drivers/md/dm-rq.c
+++ b/drivers/md/dm-rq.c
@@ -547,7 +547,7 @@ int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t)
md->tag_set->ops = &dm_mq_ops;
md->tag_set->queue_depth = dm_get_blk_mq_queue_depth();
md->tag_set->numa_node = md->numa_node_id;
- md->tag_set->flags = BLK_MQ_F_SHOULD_MERGE | BLK_MQ_F_STACKING;
+ md->tag_set->flags = BLK_MQ_F_STACKING;
md->tag_set->nr_hw_queues = dm_get_blk_mq_nr_hw_queues();
md->tag_set->driver_data = md;
diff --git a/drivers/md/dm-stripe.c b/drivers/md/dm-stripe.c
index 4112071de0be..3786ac67cefe 100644
--- a/drivers/md/dm-stripe.c
+++ b/drivers/md/dm-stripe.c
@@ -465,8 +465,9 @@ static void stripe_io_hints(struct dm_target *ti,
static struct target_type stripe_target = {
.name = "striped",
- .version = {1, 6, 0},
- .features = DM_TARGET_PASSES_INTEGRITY | DM_TARGET_NOWAIT,
+ .version = {1, 7, 0},
+ .features = DM_TARGET_PASSES_INTEGRITY | DM_TARGET_NOWAIT |
+ DM_TARGET_ATOMIC_WRITES,
.module = THIS_MODULE,
.ctr = stripe_ctr,
.dtr = stripe_dtr,
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index bd8b796ae683..0ef5203387b2 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -1806,6 +1806,32 @@ static bool dm_table_supports_secure_erase(struct dm_table *t)
return true;
}
+static int device_not_atomic_write_capable(struct dm_target *ti,
+ struct dm_dev *dev, sector_t start,
+ sector_t len, void *data)
+{
+ return !bdev_can_atomic_write(dev->bdev);
+}
+
+static bool dm_table_supports_atomic_writes(struct dm_table *t)
+{
+ for (unsigned int i = 0; i < t->num_targets; i++) {
+ struct dm_target *ti = dm_table_get_target(t, i);
+
+ if (!dm_target_supports_atomic_writes(ti->type))
+ return false;
+
+ if (!ti->type->iterate_devices)
+ return false;
+
+ if (ti->type->iterate_devices(ti,
+ device_not_atomic_write_capable, NULL)) {
+ return false;
+ }
+ }
+ return true;
+}
+
int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
struct queue_limits *limits)
{
@@ -1854,6 +1880,9 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
return r;
}
+ if (dm_table_supports_atomic_writes(t))
+ limits->features |= BLK_FEAT_ATOMIC_WRITES;
+
r = queue_limits_set(q, limits);
if (r)
return r;
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index bf0f9dddd146..05cf4e3f2bbe 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -2332,10 +2332,9 @@ static struct thin_c *get_first_thin(struct pool *pool)
struct thin_c *tc = NULL;
rcu_read_lock();
- if (!list_empty(&pool->active_thins)) {
- tc = list_entry_rcu(pool->active_thins.next, struct thin_c, list);
+ tc = list_first_or_null_rcu(&pool->active_thins, struct thin_c, list);
+ if (tc)
thin_get(tc);
- }
rcu_read_unlock();
return tc;
diff --git a/drivers/md/dm-verity-fec.c b/drivers/md/dm-verity-fec.c
index 62b1a44b8dd2..0c41949db784 100644
--- a/drivers/md/dm-verity-fec.c
+++ b/drivers/md/dm-verity-fec.c
@@ -40,35 +40,23 @@ static inline u64 fec_interleave(struct dm_verity *v, u64 offset)
}
/*
- * Decode an RS block using Reed-Solomon.
- */
-static int fec_decode_rs8(struct dm_verity *v, struct dm_verity_fec_io *fio,
- u8 *data, u8 *fec, int neras)
-{
- int i;
- uint16_t par[DM_VERITY_FEC_RSM - DM_VERITY_FEC_MIN_RSN];
-
- for (i = 0; i < v->fec->roots; i++)
- par[i] = fec[i];
-
- return decode_rs8(fio->rs, data, par, v->fec->rsn, NULL, neras,
- fio->erasures, 0, NULL);
-}
-
-/*
* Read error-correcting codes for the requested RS block. Returns a pointer
* to the data block. Caller is responsible for releasing buf.
*/
static u8 *fec_read_parity(struct dm_verity *v, u64 rsb, int index,
- unsigned int *offset, struct dm_buffer **buf,
- unsigned short ioprio)
+ unsigned int *offset, unsigned int par_buf_offset,
+ struct dm_buffer **buf, unsigned short ioprio)
{
u64 position, block, rem;
u8 *res;
+ /* We have already part of parity bytes read, skip to the next block */
+ if (par_buf_offset)
+ index++;
+
position = (index + rsb) * v->fec->roots;
block = div64_u64_rem(position, v->fec->io_size, &rem);
- *offset = (unsigned int)rem;
+ *offset = par_buf_offset ? 0 : (unsigned int)rem;
res = dm_bufio_read_with_ioprio(v->fec->bufio, block, buf, ioprio);
if (IS_ERR(res)) {
@@ -128,11 +116,13 @@ static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_io *io,
{
int r, corrected = 0, res;
struct dm_buffer *buf;
- unsigned int n, i, offset;
+ unsigned int n, i, j, offset, par_buf_offset = 0;
+ uint16_t par_buf[DM_VERITY_FEC_RSM - DM_VERITY_FEC_MIN_RSN];
u8 *par, *block;
struct bio *bio = dm_bio_from_per_bio_data(io, v->ti->per_io_data_size);
- par = fec_read_parity(v, rsb, block_offset, &offset, &buf, bio_prio(bio));
+ par = fec_read_parity(v, rsb, block_offset, &offset,
+ par_buf_offset, &buf, bio->bi_ioprio);
if (IS_ERR(par))
return PTR_ERR(par);
@@ -142,7 +132,11 @@ static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_io *io,
*/
fec_for_each_buffer_rs_block(fio, n, i) {
block = fec_buffer_rs_block(v, fio, n, i);
- res = fec_decode_rs8(v, fio, block, &par[offset], neras);
+ for (j = 0; j < v->fec->roots - par_buf_offset; j++)
+ par_buf[par_buf_offset + j] = par[offset + j];
+ /* Decode an RS block using Reed-Solomon */
+ res = decode_rs8(fio->rs, block, par_buf, v->fec->rsn,
+ NULL, neras, fio->erasures, 0, NULL);
if (res < 0) {
r = res;
goto error;
@@ -155,12 +149,22 @@ static int fec_decode_bufs(struct dm_verity *v, struct dm_verity_io *io,
if (block_offset >= 1 << v->data_dev_block_bits)
goto done;
- /* read the next block when we run out of parity bytes */
- offset += v->fec->roots;
+ /* Read the next block when we run out of parity bytes */
+ offset += (v->fec->roots - par_buf_offset);
+ /* Check if parity bytes are split between blocks */
+ if (offset < v->fec->io_size && (offset + v->fec->roots) > v->fec->io_size) {
+ par_buf_offset = v->fec->io_size - offset;
+ for (j = 0; j < par_buf_offset; j++)
+ par_buf[j] = par[offset + j];
+ offset += par_buf_offset;
+ } else
+ par_buf_offset = 0;
+
if (offset >= v->fec->io_size) {
dm_bufio_release(buf);
- par = fec_read_parity(v, rsb, block_offset, &offset, &buf, bio_prio(bio));
+ par = fec_read_parity(v, rsb, block_offset, &offset,
+ par_buf_offset, &buf, bio->bi_ioprio);
if (IS_ERR(par))
return PTR_ERR(par);
}
@@ -250,7 +254,7 @@ static int fec_read_bufs(struct dm_verity *v, struct dm_verity_io *io,
bufio = v->bufio;
}
- bbuf = dm_bufio_read_with_ioprio(bufio, block, &buf, bio_prio(bio));
+ bbuf = dm_bufio_read_with_ioprio(bufio, block, &buf, bio->bi_ioprio);
if (IS_ERR(bbuf)) {
DMWARN_LIMIT("%s: FEC %llu: read failed (%llu): %ld",
v->data_dev->name,
@@ -724,10 +728,7 @@ int verity_fec_ctr(struct dm_verity *v)
return -E2BIG;
}
- if ((f->roots << SECTOR_SHIFT) & ((1 << v->data_dev_block_bits) - 1))
- f->io_size = 1 << v->data_dev_block_bits;
- else
- f->io_size = v->fec->roots << SECTOR_SHIFT;
+ f->io_size = 1 << v->data_dev_block_bits;
f->bufio = dm_bufio_client_create(f->dev->bdev,
f->io_size,
diff --git a/drivers/md/dm-verity-target.c b/drivers/md/dm-verity-target.c
index 47d595f6a76e..e86c1431b108 100644
--- a/drivers/md/dm-verity-target.c
+++ b/drivers/md/dm-verity-target.c
@@ -321,7 +321,7 @@ static int verity_verify_level(struct dm_verity *v, struct dm_verity_io *io,
}
} else {
data = dm_bufio_read_with_ioprio(v->bufio, hash_block,
- &buf, bio_prio(bio));
+ &buf, bio->bi_ioprio);
}
if (IS_ERR(data))
@@ -789,7 +789,7 @@ static int verity_map(struct dm_target *ti, struct bio *bio)
verity_fec_init_io(io);
- verity_submit_prefetch(v, io, bio_prio(bio));
+ verity_submit_prefetch(v, io, bio->bi_ioprio);
submit_bio_noacct(bio);
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 12ecf07a3841..4d1e42891d24 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1479,12 +1479,12 @@ static void setup_split_accounting(struct clone_info *ci, unsigned int len)
static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci,
struct dm_target *ti, unsigned int num_bios,
- unsigned *len, gfp_t gfp_flag)
+ unsigned *len)
{
struct bio *bio;
- int try = (gfp_flag & GFP_NOWAIT) ? 0 : 1;
+ int try;
- for (; try < 2; try++) {
+ for (try = 0; try < 2; try++) {
int bio_nr;
if (try && num_bios > 1)
@@ -1508,8 +1508,7 @@ static void alloc_multiple_bios(struct bio_list *blist, struct clone_info *ci,
}
static unsigned int __send_duplicate_bios(struct clone_info *ci, struct dm_target *ti,
- unsigned int num_bios, unsigned int *len,
- gfp_t gfp_flag)
+ unsigned int num_bios, unsigned int *len)
{
struct bio_list blist = BIO_EMPTY_LIST;
struct bio *clone;
@@ -1526,7 +1525,7 @@ static unsigned int __send_duplicate_bios(struct clone_info *ci, struct dm_targe
* Using alloc_multiple_bios(), even if num_bios is 1, to consistently
* support allocating using GFP_NOWAIT with GFP_NOIO fallback.
*/
- alloc_multiple_bios(&blist, ci, ti, num_bios, len, gfp_flag);
+ alloc_multiple_bios(&blist, ci, ti, num_bios, len);
while ((clone = bio_list_pop(&blist))) {
if (num_bios > 1)
dm_tio_set_flag(clone_to_tio(clone), DM_TIO_IS_DUPLICATE_BIO);
@@ -1564,7 +1563,7 @@ static void __send_empty_flush(struct clone_info *ci)
atomic_add(ti->num_flush_bios, &ci->io->io_count);
bios = __send_duplicate_bios(ci, ti, ti->num_flush_bios,
- NULL, GFP_NOWAIT);
+ NULL);
atomic_sub(ti->num_flush_bios - bios, &ci->io->io_count);
}
} else {
@@ -1612,7 +1611,7 @@ static void __send_abnormal_io(struct clone_info *ci, struct dm_target *ti,
__max_io_len(ti, ci->sector, max_granularity, max_sectors));
atomic_add(num_bios, &ci->io->io_count);
- bios = __send_duplicate_bios(ci, ti, num_bios, &len, GFP_NOIO);
+ bios = __send_duplicate_bios(ci, ti, num_bios, &len);
/*
* alloc_io() takes one extra reference for submission, so the
* reference won't reach 0 without the following (+1) subtraction
@@ -1746,6 +1745,9 @@ static blk_status_t __split_and_process_bio(struct clone_info *ci)
ci->submit_as_polled = !!(ci->bio->bi_opf & REQ_POLLED);
len = min_t(sector_t, max_io_len(ti, ci->sector), ci->sector_count);
+ if (ci->bio->bi_opf & REQ_ATOMIC && len != ci->sector_count)
+ return BLK_STS_IOERR;
+
setup_split_accounting(ci, len);
if (unlikely(ci->bio->bi_opf & REQ_NOWAIT)) {
@@ -1849,7 +1851,7 @@ static blk_status_t __send_zone_reset_all_emulated(struct clone_info *ci,
* not go crazy with the clone allocation.
*/
alloc_multiple_bios(&blist, ci, ti, min(nr_reset, 32),
- NULL, GFP_NOIO);
+ NULL);
}
/* Get a clone and change it to a regular reset operation. */
@@ -1881,7 +1883,7 @@ static void __send_zone_reset_all_native(struct clone_info *ci,
unsigned int bios;
atomic_add(1, &ci->io->io_count);
- bios = __send_duplicate_bios(ci, ti, 1, NULL, GFP_NOIO);
+ bios = __send_duplicate_bios(ci, ti, 1, NULL);
atomic_sub(1 - bios, &ci->io->io_count);
ci->sector_count = 0;
@@ -1969,6 +1971,15 @@ static void dm_split_and_process_bio(struct mapped_device *md,
/* Only support nowait for normal IO */
if (unlikely(bio->bi_opf & REQ_NOWAIT) && !is_abnormal) {
+ /*
+ * Don't support NOWAIT for FLUSH because it may allocate
+ * multiple bios and there's no easy way how to undo the
+ * allocations.
+ */
+ if (bio->bi_opf & REQ_PREFLUSH) {
+ bio_wouldblock_error(bio);
+ return;
+ }
io = alloc_io(md, bio, GFP_NOWAIT);
if (unlikely(!io)) {
/* Unable to do anything without dm_io. */
diff --git a/drivers/md/md-autodetect.c b/drivers/md/md-autodetect.c
index b2a00f213c2c..4b80165afd23 100644
--- a/drivers/md/md-autodetect.c
+++ b/drivers/md/md-autodetect.c
@@ -49,6 +49,7 @@ static int md_setup_ents __initdata;
* instead of just one. -- KTK
* 18May2000: Added support for persistent-superblock arrays:
* md=n,0,factor,fault,device-list uses RAID0 for device n
+ * md=n,-1,factor,fault,device-list uses LINEAR for device n
* md=n,device-list reads a RAID superblock from the devices
* elements in device-list are read by name_to_kdev_t so can be
* a hex number or something like /dev/hda1 /dev/sdb
@@ -87,7 +88,7 @@ static int __init md_setup(char *str)
md_setup_ents++;
switch (get_option(&str, &level)) { /* RAID level */
case 2: /* could be 0 or -1.. */
- if (level == 0) {
+ if (level == 0 || level == LEVEL_LINEAR) {
if (get_option(&str, &factor) != 2 || /* Chunk Size */
get_option(&str, &fault) != 2) {
printk(KERN_WARNING "md: Too few arguments supplied to md=.\n");
@@ -95,7 +96,10 @@ static int __init md_setup(char *str)
}
md_setup_args[ent].level = level;
md_setup_args[ent].chunk = 1 << (factor+12);
- pername = "raid0";
+ if (level == LEVEL_LINEAR)
+ pername = "linear";
+ else
+ pername = "raid0";
break;
}
fallthrough;
diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c
index c3a42dd66ce5..23c09d22fcdb 100644
--- a/drivers/md/md-bitmap.c
+++ b/drivers/md/md-bitmap.c
@@ -682,7 +682,7 @@ static void bitmap_update_sb(void *data)
return;
if (!bitmap->storage.sb_page) /* no superblock */
return;
- sb = kmap_atomic(bitmap->storage.sb_page);
+ sb = kmap_local_page(bitmap->storage.sb_page);
sb->events = cpu_to_le64(bitmap->mddev->events);
if (bitmap->mddev->events < bitmap->events_cleared)
/* rocking back to read-only */
@@ -702,7 +702,7 @@ static void bitmap_update_sb(void *data)
sb->nodes = cpu_to_le32(bitmap->mddev->bitmap_info.nodes);
sb->sectors_reserved = cpu_to_le32(bitmap->mddev->
bitmap_info.space);
- kunmap_atomic(sb);
+ kunmap_local(sb);
if (bitmap->storage.file)
write_file_page(bitmap, bitmap->storage.sb_page, 1);
@@ -717,7 +717,7 @@ static void bitmap_print_sb(struct bitmap *bitmap)
if (!bitmap || !bitmap->storage.sb_page)
return;
- sb = kmap_atomic(bitmap->storage.sb_page);
+ sb = kmap_local_page(bitmap->storage.sb_page);
pr_debug("%s: bitmap file superblock:\n", bmname(bitmap));
pr_debug(" magic: %08x\n", le32_to_cpu(sb->magic));
pr_debug(" version: %u\n", le32_to_cpu(sb->version));
@@ -736,7 +736,7 @@ static void bitmap_print_sb(struct bitmap *bitmap)
pr_debug(" sync size: %llu KB\n",
(unsigned long long)le64_to_cpu(sb->sync_size)/2);
pr_debug("max write behind: %u\n", le32_to_cpu(sb->write_behind));
- kunmap_atomic(sb);
+ kunmap_local(sb);
}
/*
@@ -760,7 +760,7 @@ static int md_bitmap_new_disk_sb(struct bitmap *bitmap)
return -ENOMEM;
bitmap->storage.sb_index = 0;
- sb = kmap_atomic(bitmap->storage.sb_page);
+ sb = kmap_local_page(bitmap->storage.sb_page);
sb->magic = cpu_to_le32(BITMAP_MAGIC);
sb->version = cpu_to_le32(BITMAP_MAJOR_HI);
@@ -768,7 +768,7 @@ static int md_bitmap_new_disk_sb(struct bitmap *bitmap)
chunksize = bitmap->mddev->bitmap_info.chunksize;
BUG_ON(!chunksize);
if (!is_power_of_2(chunksize)) {
- kunmap_atomic(sb);
+ kunmap_local(sb);
pr_warn("bitmap chunksize not a power of 2\n");
return -EINVAL;
}
@@ -803,7 +803,7 @@ static int md_bitmap_new_disk_sb(struct bitmap *bitmap)
sb->events_cleared = cpu_to_le64(bitmap->mddev->events);
bitmap->mddev->bitmap_info.nodes = 0;
- kunmap_atomic(sb);
+ kunmap_local(sb);
return 0;
}
@@ -865,7 +865,7 @@ re_read:
return err;
err = -EINVAL;
- sb = kmap_atomic(sb_page);
+ sb = kmap_local_page(sb_page);
chunksize = le32_to_cpu(sb->chunksize);
daemon_sleep = le32_to_cpu(sb->daemon_sleep) * HZ;
@@ -932,7 +932,7 @@ re_read:
err = 0;
out:
- kunmap_atomic(sb);
+ kunmap_local(sb);
if (err == 0 && nodes && (bitmap->cluster_slot < 0)) {
/* Assigning chunksize is required for "re_read" */
bitmap->mddev->bitmap_info.chunksize = chunksize;
@@ -1161,12 +1161,12 @@ static void md_bitmap_file_set_bit(struct bitmap *bitmap, sector_t block)
bit = file_page_offset(&bitmap->storage, chunk);
/* set the bit */
- kaddr = kmap_atomic(page);
+ kaddr = kmap_local_page(page);
if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
set_bit(bit, kaddr);
else
set_bit_le(bit, kaddr);
- kunmap_atomic(kaddr);
+ kunmap_local(kaddr);
pr_debug("set file bit %lu page %lu\n", bit, index);
/* record page number so it gets flushed to disk when unplug occurs */
set_page_attr(bitmap, index - node_offset, BITMAP_PAGE_DIRTY);
@@ -1190,12 +1190,12 @@ static void md_bitmap_file_clear_bit(struct bitmap *bitmap, sector_t block)
if (!page)
return;
bit = file_page_offset(&bitmap->storage, chunk);
- paddr = kmap_atomic(page);
+ paddr = kmap_local_page(page);
if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
clear_bit(bit, paddr);
else
clear_bit_le(bit, paddr);
- kunmap_atomic(paddr);
+ kunmap_local(paddr);
if (!test_page_attr(bitmap, index - node_offset, BITMAP_PAGE_NEEDWRITE)) {
set_page_attr(bitmap, index - node_offset, BITMAP_PAGE_PENDING);
bitmap->allclean = 0;
@@ -1214,12 +1214,12 @@ static int md_bitmap_file_test_bit(struct bitmap *bitmap, sector_t block)
if (!page)
return -EINVAL;
bit = file_page_offset(&bitmap->storage, chunk);
- paddr = kmap_atomic(page);
+ paddr = kmap_local_page(page);
if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
set = test_bit(bit, paddr);
else
set = test_bit_le(bit, paddr);
- kunmap_atomic(paddr);
+ kunmap_local(paddr);
return set;
}
@@ -1388,9 +1388,9 @@ static int md_bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
* If the bitmap is out of date, dirty the whole page
* and write it out
*/
- paddr = kmap_atomic(page);
+ paddr = kmap_local_page(page);
memset(paddr + offset, 0xff, PAGE_SIZE - offset);
- kunmap_atomic(paddr);
+ kunmap_local(paddr);
filemap_write_page(bitmap, i, true);
if (test_bit(BITMAP_WRITE_ERROR, &bitmap->flags)) {
@@ -1406,12 +1406,12 @@ static int md_bitmap_init_from_disk(struct bitmap *bitmap, sector_t start)
void *paddr;
bool was_set;
- paddr = kmap_atomic(page);
+ paddr = kmap_local_page(page);
if (test_bit(BITMAP_HOSTENDIAN, &bitmap->flags))
was_set = test_bit(bit, paddr);
else
was_set = test_bit_le(bit, paddr);
- kunmap_atomic(paddr);
+ kunmap_local(paddr);
if (was_set) {
/* if the disk bit is set, set the memory bit */
@@ -1546,10 +1546,10 @@ static void bitmap_daemon_work(struct mddev *mddev)
bitmap_super_t *sb;
bitmap->need_sync = 0;
if (bitmap->storage.filemap) {
- sb = kmap_atomic(bitmap->storage.sb_page);
+ sb = kmap_local_page(bitmap->storage.sb_page);
sb->events_cleared =
cpu_to_le64(bitmap->events_cleared);
- kunmap_atomic(sb);
+ kunmap_local(sb);
set_page_attr(bitmap, 0,
BITMAP_PAGE_NEEDWRITE);
}
@@ -1671,24 +1671,13 @@ __acquires(bitmap->lock)
}
static int bitmap_startwrite(struct mddev *mddev, sector_t offset,
- unsigned long sectors, bool behind)
+ unsigned long sectors)
{
struct bitmap *bitmap = mddev->bitmap;
if (!bitmap)
return 0;
- if (behind) {
- int bw;
- atomic_inc(&bitmap->behind_writes);
- bw = atomic_read(&bitmap->behind_writes);
- if (bw > bitmap->behind_writes_used)
- bitmap->behind_writes_used = bw;
-
- pr_debug("inc write-behind count %d/%lu\n",
- bw, bitmap->mddev->bitmap_info.max_write_behind);
- }
-
while (sectors) {
sector_t blocks;
bitmap_counter_t *bmc;
@@ -1737,21 +1726,13 @@ static int bitmap_startwrite(struct mddev *mddev, sector_t offset,
}
static void bitmap_endwrite(struct mddev *mddev, sector_t offset,
- unsigned long sectors, bool success, bool behind)
+ unsigned long sectors)
{
struct bitmap *bitmap = mddev->bitmap;
if (!bitmap)
return;
- if (behind) {
- if (atomic_dec_and_test(&bitmap->behind_writes))
- wake_up(&bitmap->behind_wait);
- pr_debug("dec write-behind count %d/%lu\n",
- atomic_read(&bitmap->behind_writes),
- bitmap->mddev->bitmap_info.max_write_behind);
- }
-
while (sectors) {
sector_t blocks;
unsigned long flags;
@@ -1764,15 +1745,16 @@ static void bitmap_endwrite(struct mddev *mddev, sector_t offset,
return;
}
- if (success && !bitmap->mddev->degraded &&
- bitmap->events_cleared < bitmap->mddev->events) {
- bitmap->events_cleared = bitmap->mddev->events;
- bitmap->need_sync = 1;
- sysfs_notify_dirent_safe(bitmap->sysfs_can_clear);
- }
-
- if (!success && !NEEDED(*bmc))
+ if (!bitmap->mddev->degraded) {
+ if (bitmap->events_cleared < bitmap->mddev->events) {
+ bitmap->events_cleared = bitmap->mddev->events;
+ bitmap->need_sync = 1;
+ sysfs_notify_dirent_safe(
+ bitmap->sysfs_can_clear);
+ }
+ } else if (!NEEDED(*bmc)) {
*bmc |= NEEDED_MASK;
+ }
if (COUNTER(*bmc) == COUNTER_MAX)
wake_up(&bitmap->overflow_wait);
@@ -2062,6 +2044,37 @@ static void md_bitmap_free(void *data)
kfree(bitmap);
}
+static void bitmap_start_behind_write(struct mddev *mddev)
+{
+ struct bitmap *bitmap = mddev->bitmap;
+ int bw;
+
+ if (!bitmap)
+ return;
+
+ atomic_inc(&bitmap->behind_writes);
+ bw = atomic_read(&bitmap->behind_writes);
+ if (bw > bitmap->behind_writes_used)
+ bitmap->behind_writes_used = bw;
+
+ pr_debug("inc write-behind count %d/%lu\n",
+ bw, bitmap->mddev->bitmap_info.max_write_behind);
+}
+
+static void bitmap_end_behind_write(struct mddev *mddev)
+{
+ struct bitmap *bitmap = mddev->bitmap;
+
+ if (!bitmap)
+ return;
+
+ if (atomic_dec_and_test(&bitmap->behind_writes))
+ wake_up(&bitmap->behind_wait);
+ pr_debug("dec write-behind count %d/%lu\n",
+ atomic_read(&bitmap->behind_writes),
+ bitmap->mddev->bitmap_info.max_write_behind);
+}
+
static void bitmap_wait_behind_writes(struct mddev *mddev)
{
struct bitmap *bitmap = mddev->bitmap;
@@ -2342,7 +2355,10 @@ static int bitmap_get_stats(void *data, struct md_bitmap_stats *stats)
if (!bitmap)
return -ENOENT;
-
+ if (bitmap->mddev->bitmap_info.external)
+ return -ENOENT;
+ if (!bitmap->storage.sb_page) /* no superblock */
+ return -EINVAL;
sb = kmap_local_page(bitmap->storage.sb_page);
stats->sync_size = le64_to_cpu(sb->sync_size);
kunmap_local(sb);
@@ -2981,6 +2997,9 @@ static struct bitmap_operations bitmap_ops = {
.dirty_bits = bitmap_dirty_bits,
.unplug = bitmap_unplug,
.daemon_work = bitmap_daemon_work,
+
+ .start_behind_write = bitmap_start_behind_write,
+ .end_behind_write = bitmap_end_behind_write,
.wait_behind_writes = bitmap_wait_behind_writes,
.startwrite = bitmap_startwrite,
diff --git a/drivers/md/md-bitmap.h b/drivers/md/md-bitmap.h
index 662e6fc141a7..31c93019c76b 100644
--- a/drivers/md/md-bitmap.h
+++ b/drivers/md/md-bitmap.h
@@ -84,12 +84,15 @@ struct bitmap_operations {
unsigned long e);
void (*unplug)(struct mddev *mddev, bool sync);
void (*daemon_work)(struct mddev *mddev);
+
+ void (*start_behind_write)(struct mddev *mddev);
+ void (*end_behind_write)(struct mddev *mddev);
void (*wait_behind_writes)(struct mddev *mddev);
int (*startwrite)(struct mddev *mddev, sector_t offset,
- unsigned long sectors, bool behind);
+ unsigned long sectors);
void (*endwrite)(struct mddev *mddev, sector_t offset,
- unsigned long sectors, bool success, bool behind);
+ unsigned long sectors);
bool (*start_sync)(struct mddev *mddev, sector_t offset,
sector_t *blocks, bool degraded);
void (*end_sync)(struct mddev *mddev, sector_t offset, sector_t *blocks);
diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c
new file mode 100644
index 000000000000..369aed044b40
--- /dev/null
+++ b/drivers/md/md-linear.c
@@ -0,0 +1,352 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * linear.c : Multiple Devices driver for Linux Copyright (C) 1994-96 Marc
+ * ZYNGIER <zyngier@ufr-info-p7.ibp.fr> or <maz@gloups.fdn.fr>
+ */
+
+#include <linux/blkdev.h>
+#include <linux/raid/md_u.h>
+#include <linux/seq_file.h>
+#include <linux/module.h>
+#include <linux/slab.h>
+#include <trace/events/block.h>
+#include "md.h"
+
+struct dev_info {
+ struct md_rdev *rdev;
+ sector_t end_sector;
+};
+
+struct linear_conf {
+ struct rcu_head rcu;
+ sector_t array_sectors;
+ /* a copy of mddev->raid_disks */
+ int raid_disks;
+ struct dev_info disks[] __counted_by(raid_disks);
+};
+
+/*
+ * find which device holds a particular offset
+ */
+static inline struct dev_info *which_dev(struct mddev *mddev, sector_t sector)
+{
+ int lo, mid, hi;
+ struct linear_conf *conf;
+
+ lo = 0;
+ hi = mddev->raid_disks - 1;
+ conf = mddev->private;
+
+ /*
+ * Binary Search
+ */
+
+ while (hi > lo) {
+
+ mid = (hi + lo) / 2;
+ if (sector < conf->disks[mid].end_sector)
+ hi = mid;
+ else
+ lo = mid + 1;
+ }
+
+ return conf->disks + lo;
+}
+
+static sector_t linear_size(struct mddev *mddev, sector_t sectors, int raid_disks)
+{
+ struct linear_conf *conf;
+ sector_t array_sectors;
+
+ conf = mddev->private;
+ WARN_ONCE(sectors || raid_disks,
+ "%s does not support generic reshape\n", __func__);
+ array_sectors = conf->array_sectors;
+
+ return array_sectors;
+}
+
+static int linear_set_limits(struct mddev *mddev)
+{
+ struct queue_limits lim;
+ int err;
+
+ md_init_stacking_limits(&lim);
+ lim.max_hw_sectors = mddev->chunk_sectors;
+ lim.max_write_zeroes_sectors = mddev->chunk_sectors;
+ lim.io_min = mddev->chunk_sectors << 9;
+ err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY);
+ if (err)
+ return err;
+
+ return queue_limits_set(mddev->gendisk->queue, &lim);
+}
+
+static struct linear_conf *linear_conf(struct mddev *mddev, int raid_disks)
+{
+ struct linear_conf *conf;
+ struct md_rdev *rdev;
+ int ret = -EINVAL;
+ int cnt;
+ int i;
+
+ conf = kzalloc(struct_size(conf, disks, raid_disks), GFP_KERNEL);
+ if (!conf)
+ return ERR_PTR(-ENOMEM);
+
+ /*
+ * conf->raid_disks is copy of mddev->raid_disks. The reason to
+ * keep a copy of mddev->raid_disks in struct linear_conf is,
+ * mddev->raid_disks may not be consistent with pointers number of
+ * conf->disks[] when it is updated in linear_add() and used to
+ * iterate old conf->disks[] earray in linear_congested().
+ * Here conf->raid_disks is always consitent with number of
+ * pointers in conf->disks[] array, and mddev->private is updated
+ * with rcu_assign_pointer() in linear_addr(), such race can be
+ * avoided.
+ */
+ conf->raid_disks = raid_disks;
+
+ cnt = 0;
+ conf->array_sectors = 0;
+
+ rdev_for_each(rdev, mddev) {
+ int j = rdev->raid_disk;
+ struct dev_info *disk = conf->disks + j;
+ sector_t sectors;
+
+ if (j < 0 || j >= raid_disks || disk->rdev) {
+ pr_warn("md/linear:%s: disk numbering problem. Aborting!\n",
+ mdname(mddev));
+ goto out;
+ }
+
+ disk->rdev = rdev;
+ if (mddev->chunk_sectors) {
+ sectors = rdev->sectors;
+ sector_div(sectors, mddev->chunk_sectors);
+ rdev->sectors = sectors * mddev->chunk_sectors;
+ }
+
+ conf->array_sectors += rdev->sectors;
+ cnt++;
+ }
+ if (cnt != raid_disks) {
+ pr_warn("md/linear:%s: not enough drives present. Aborting!\n",
+ mdname(mddev));
+ goto out;
+ }
+
+ /*
+ * Here we calculate the device offsets.
+ */
+ conf->disks[0].end_sector = conf->disks[0].rdev->sectors;
+
+ for (i = 1; i < raid_disks; i++)
+ conf->disks[i].end_sector =
+ conf->disks[i-1].end_sector +
+ conf->disks[i].rdev->sectors;
+
+ if (!mddev_is_dm(mddev)) {
+ ret = linear_set_limits(mddev);
+ if (ret)
+ goto out;
+ }
+
+ return conf;
+
+out:
+ kfree(conf);
+ return ERR_PTR(ret);
+}
+
+static int linear_run(struct mddev *mddev)
+{
+ struct linear_conf *conf;
+ int ret;
+
+ if (md_check_no_bitmap(mddev))
+ return -EINVAL;
+
+ conf = linear_conf(mddev, mddev->raid_disks);
+ if (IS_ERR(conf))
+ return PTR_ERR(conf);
+
+ mddev->private = conf;
+ md_set_array_sectors(mddev, linear_size(mddev, 0, 0));
+
+ ret = md_integrity_register(mddev);
+ if (ret) {
+ kfree(conf);
+ mddev->private = NULL;
+ }
+ return ret;
+}
+
+static int linear_add(struct mddev *mddev, struct md_rdev *rdev)
+{
+ /* Adding a drive to a linear array allows the array to grow.
+ * It is permitted if the new drive has a matching superblock
+ * already on it, with raid_disk equal to raid_disks.
+ * It is achieved by creating a new linear_private_data structure
+ * and swapping it in in-place of the current one.
+ * The current one is never freed until the array is stopped.
+ * This avoids races.
+ */
+ struct linear_conf *newconf, *oldconf;
+
+ if (rdev->saved_raid_disk != mddev->raid_disks)
+ return -EINVAL;
+
+ rdev->raid_disk = rdev->saved_raid_disk;
+ rdev->saved_raid_disk = -1;
+
+ newconf = linear_conf(mddev, mddev->raid_disks + 1);
+ if (IS_ERR(newconf))
+ return PTR_ERR(newconf);
+
+ /* newconf->raid_disks already keeps a copy of * the increased
+ * value of mddev->raid_disks, WARN_ONCE() is just used to make
+ * sure of this. It is possible that oldconf is still referenced
+ * in linear_congested(), therefore kfree_rcu() is used to free
+ * oldconf until no one uses it anymore.
+ */
+ oldconf = rcu_dereference_protected(mddev->private,
+ lockdep_is_held(&mddev->reconfig_mutex));
+ mddev->raid_disks++;
+ WARN_ONCE(mddev->raid_disks != newconf->raid_disks,
+ "copied raid_disks doesn't match mddev->raid_disks");
+ rcu_assign_pointer(mddev->private, newconf);
+ md_set_array_sectors(mddev, linear_size(mddev, 0, 0));
+ set_capacity_and_notify(mddev->gendisk, mddev->array_sectors);
+ kfree_rcu(oldconf, rcu);
+ return 0;
+}
+
+static void linear_free(struct mddev *mddev, void *priv)
+{
+ struct linear_conf *conf = priv;
+
+ kfree(conf);
+}
+
+static bool linear_make_request(struct mddev *mddev, struct bio *bio)
+{
+ struct dev_info *tmp_dev;
+ sector_t start_sector, end_sector, data_offset;
+ sector_t bio_sector = bio->bi_iter.bi_sector;
+
+ if (unlikely(bio->bi_opf & REQ_PREFLUSH)
+ && md_flush_request(mddev, bio))
+ return true;
+
+ tmp_dev = which_dev(mddev, bio_sector);
+ start_sector = tmp_dev->end_sector - tmp_dev->rdev->sectors;
+ end_sector = tmp_dev->end_sector;
+ data_offset = tmp_dev->rdev->data_offset;
+
+ if (unlikely(bio_sector >= end_sector ||
+ bio_sector < start_sector))
+ goto out_of_bounds;
+
+ if (unlikely(is_rdev_broken(tmp_dev->rdev))) {
+ md_error(mddev, tmp_dev->rdev);
+ bio_io_error(bio);
+ return true;
+ }
+
+ if (unlikely(bio_end_sector(bio) > end_sector)) {
+ /* This bio crosses a device boundary, so we have to split it */
+ struct bio *split = bio_split(bio, end_sector - bio_sector,
+ GFP_NOIO, &mddev->bio_set);
+
+ if (IS_ERR(split)) {
+ bio->bi_status = errno_to_blk_status(PTR_ERR(split));
+ bio_endio(bio);
+ return true;
+ }
+
+ bio_chain(split, bio);
+ submit_bio_noacct(bio);
+ bio = split;
+ }
+
+ md_account_bio(mddev, &bio);
+ bio_set_dev(bio, tmp_dev->rdev->bdev);
+ bio->bi_iter.bi_sector = bio->bi_iter.bi_sector -
+ start_sector + data_offset;
+
+ if (unlikely((bio_op(bio) == REQ_OP_DISCARD) &&
+ !bdev_max_discard_sectors(bio->bi_bdev))) {
+ /* Just ignore it */
+ bio_endio(bio);
+ } else {
+ if (mddev->gendisk)
+ trace_block_bio_remap(bio, disk_devt(mddev->gendisk),
+ bio_sector);
+ mddev_check_write_zeroes(mddev, bio);
+ submit_bio_noacct(bio);
+ }
+ return true;
+
+out_of_bounds:
+ pr_err("md/linear:%s: make_request: Sector %llu out of bounds on dev %pg: %llu sectors, offset %llu\n",
+ mdname(mddev),
+ (unsigned long long)bio->bi_iter.bi_sector,
+ tmp_dev->rdev->bdev,
+ (unsigned long long)tmp_dev->rdev->sectors,
+ (unsigned long long)start_sector);
+ bio_io_error(bio);
+ return true;
+}
+
+static void linear_status(struct seq_file *seq, struct mddev *mddev)
+{
+ seq_printf(seq, " %dk rounding", mddev->chunk_sectors / 2);
+}
+
+static void linear_error(struct mddev *mddev, struct md_rdev *rdev)
+{
+ if (!test_and_set_bit(MD_BROKEN, &mddev->flags)) {
+ char *md_name = mdname(mddev);
+
+ pr_crit("md/linear%s: Disk failure on %pg detected, failing array.\n",
+ md_name, rdev->bdev);
+ }
+}
+
+static void linear_quiesce(struct mddev *mddev, int state)
+{
+}
+
+static struct md_personality linear_personality = {
+ .name = "linear",
+ .level = LEVEL_LINEAR,
+ .owner = THIS_MODULE,
+ .make_request = linear_make_request,
+ .run = linear_run,
+ .free = linear_free,
+ .status = linear_status,
+ .hot_add_disk = linear_add,
+ .size = linear_size,
+ .quiesce = linear_quiesce,
+ .error_handler = linear_error,
+};
+
+static int __init linear_init(void)
+{
+ return register_md_personality(&linear_personality);
+}
+
+static void linear_exit(void)
+{
+ unregister_md_personality(&linear_personality);
+}
+
+module_init(linear_init);
+module_exit(linear_exit);
+MODULE_LICENSE("GPL");
+MODULE_DESCRIPTION("Linear device concatenation personality for MD (deprecated)");
+MODULE_ALIAS("md-personality-1"); /* LINEAR - deprecated*/
+MODULE_ALIAS("md-linear");
+MODULE_ALIAS("md-level--1");
diff --git a/drivers/md/md.c b/drivers/md/md.c
index aebe12b0ee27..30b3dbbce2d2 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -294,7 +294,7 @@ void mddev_destroy_serial_pool(struct mddev *mddev, struct md_rdev *rdev)
static struct ctl_table_header *raid_table_header;
-static struct ctl_table raid_table[] = {
+static const struct ctl_table raid_table[] = {
{
.procname = "speed_limit_min",
.data = &sysctl_speed_limit_min,
@@ -8124,7 +8124,7 @@ void md_error(struct mddev *mddev, struct md_rdev *rdev)
return;
mddev->pers->error_handler(mddev, rdev);
- if (mddev->pers->level == 0)
+ if (mddev->pers->level == 0 || mddev->pers->level == LEVEL_LINEAR)
return;
if (mddev->degraded && !test_bit(MD_BROKEN, &mddev->flags))
@@ -8376,6 +8376,10 @@ static int md_seq_show(struct seq_file *seq, void *v)
return 0;
spin_unlock(&all_mddevs_lock);
+
+ /* prevent bitmap to be freed after checking */
+ mutex_lock(&mddev->bitmap_info.mutex);
+
spin_lock(&mddev->lock);
if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) {
seq_printf(seq, "%s : ", mdname(mddev));
@@ -8451,6 +8455,7 @@ static int md_seq_show(struct seq_file *seq, void *v)
seq_printf(seq, "\n");
}
spin_unlock(&mddev->lock);
+ mutex_unlock(&mddev->bitmap_info.mutex);
spin_lock(&all_mddevs_lock);
if (mddev == list_last_entry(&all_mddevs, struct mddev, all_mddevs))
@@ -8745,12 +8750,32 @@ void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev,
}
EXPORT_SYMBOL_GPL(md_submit_discard_bio);
+static void md_bitmap_start(struct mddev *mddev,
+ struct md_io_clone *md_io_clone)
+{
+ if (mddev->pers->bitmap_sector)
+ mddev->pers->bitmap_sector(mddev, &md_io_clone->offset,
+ &md_io_clone->sectors);
+
+ mddev->bitmap_ops->startwrite(mddev, md_io_clone->offset,
+ md_io_clone->sectors);
+}
+
+static void md_bitmap_end(struct mddev *mddev, struct md_io_clone *md_io_clone)
+{
+ mddev->bitmap_ops->endwrite(mddev, md_io_clone->offset,
+ md_io_clone->sectors);
+}
+
static void md_end_clone_io(struct bio *bio)
{
struct md_io_clone *md_io_clone = bio->bi_private;
struct bio *orig_bio = md_io_clone->orig_bio;
struct mddev *mddev = md_io_clone->mddev;
+ if (bio_data_dir(orig_bio) == WRITE && mddev->bitmap)
+ md_bitmap_end(mddev, md_io_clone);
+
if (bio->bi_status && !orig_bio->bi_status)
orig_bio->bi_status = bio->bi_status;
@@ -8775,6 +8800,12 @@ static void md_clone_bio(struct mddev *mddev, struct bio **bio)
if (blk_queue_io_stat(bdev->bd_disk->queue))
md_io_clone->start_time = bio_start_io_acct(*bio);
+ if (bio_data_dir(*bio) == WRITE && mddev->bitmap) {
+ md_io_clone->offset = (*bio)->bi_iter.bi_sector;
+ md_io_clone->sectors = bio_sectors(*bio);
+ md_bitmap_start(mddev, md_io_clone);
+ }
+
clone->bi_end_io = md_end_clone_io;
clone->bi_private = md_io_clone;
*bio = clone;
@@ -8793,6 +8824,9 @@ void md_free_cloned_bio(struct bio *bio)
struct bio *orig_bio = md_io_clone->orig_bio;
struct mddev *mddev = md_io_clone->mddev;
+ if (bio_data_dir(orig_bio) == WRITE && mddev->bitmap)
+ md_bitmap_end(mddev, md_io_clone);
+
if (bio->bi_status && !orig_bio->bi_status)
orig_bio->bi_status = bio->bi_status;
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 4ba93af36126..def808064ad8 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -746,6 +746,9 @@ struct md_personality
void *(*takeover) (struct mddev *mddev);
/* Changes the consistency policy of an active array. */
int (*change_consistency_policy)(struct mddev *mddev, const char *buf);
+ /* convert io ranges from array to bitmap */
+ void (*bitmap_sector)(struct mddev *mddev, sector_t *offset,
+ unsigned long *sectors);
};
struct md_sysfs_entry {
@@ -828,6 +831,8 @@ struct md_io_clone {
struct mddev *mddev;
struct bio *orig_bio;
unsigned long start_time;
+ sector_t offset;
+ unsigned long sectors;
struct bio bio_clone;
};
diff --git a/drivers/md/persistent-data/dm-array.c b/drivers/md/persistent-data/dm-array.c
index 157c9bd2fed7..8f8792e55806 100644
--- a/drivers/md/persistent-data/dm-array.c
+++ b/drivers/md/persistent-data/dm-array.c
@@ -917,23 +917,27 @@ static int load_ablock(struct dm_array_cursor *c)
if (c->block)
unlock_ablock(c->info, c->block);
- c->block = NULL;
- c->ab = NULL;
c->index = 0;
r = dm_btree_cursor_get_value(&c->cursor, &key, &value_le);
if (r) {
DMERR("dm_btree_cursor_get_value failed");
- dm_btree_cursor_end(&c->cursor);
+ goto out;
} else {
r = get_ablock(c->info, le64_to_cpu(value_le), &c->block, &c->ab);
if (r) {
DMERR("get_ablock failed");
- dm_btree_cursor_end(&c->cursor);
+ goto out;
}
}
+ return 0;
+
+out:
+ dm_btree_cursor_end(&c->cursor);
+ c->block = NULL;
+ c->ab = NULL;
return r;
}
@@ -956,10 +960,10 @@ EXPORT_SYMBOL_GPL(dm_array_cursor_begin);
void dm_array_cursor_end(struct dm_array_cursor *c)
{
- if (c->block) {
+ if (c->block)
unlock_ablock(c->info, c->block);
- dm_btree_cursor_end(&c->cursor);
- }
+
+ dm_btree_cursor_end(&c->cursor);
}
EXPORT_SYMBOL_GPL(dm_array_cursor_end);
@@ -999,6 +1003,7 @@ int dm_array_cursor_skip(struct dm_array_cursor *c, uint32_t count)
}
count -= remaining;
+ c->index += (remaining - 1);
r = dm_array_cursor_next(c);
} while (!r);
diff --git a/drivers/md/persistent-data/dm-transaction-manager.c b/drivers/md/persistent-data/dm-transaction-manager.c
index c7ba4e6cbbc7..98c745d90f48 100644
--- a/drivers/md/persistent-data/dm-transaction-manager.c
+++ b/drivers/md/persistent-data/dm-transaction-manager.c
@@ -13,6 +13,7 @@
#include <linux/export.h>
#include <linux/mutex.h>
#include <linux/hash.h>
+#include <linux/rbtree.h>
#include <linux/slab.h>
#include <linux/device-mapper.h>
@@ -77,7 +78,7 @@ static void prefetch_issue(struct prefetch_set *p, struct dm_block_manager *bm)
/*----------------------------------------------------------------*/
struct shadow_info {
- struct hlist_node hlist;
+ struct rb_node node;
dm_block_t where;
};
@@ -95,7 +96,7 @@ struct dm_transaction_manager {
struct dm_space_map *sm;
spinlock_t lock;
- struct hlist_head buckets[DM_HASH_SIZE];
+ struct rb_root buckets[DM_HASH_SIZE];
struct prefetch_set prefetches;
};
@@ -106,14 +107,22 @@ static int is_shadow(struct dm_transaction_manager *tm, dm_block_t b)
{
int r = 0;
unsigned int bucket = dm_hash_block(b, DM_HASH_MASK);
- struct shadow_info *si;
+ struct rb_node **node;
spin_lock(&tm->lock);
- hlist_for_each_entry(si, tm->buckets + bucket, hlist)
- if (si->where == b) {
+ node = &tm->buckets[bucket].rb_node;
+ while (*node) {
+ struct shadow_info *si =
+ rb_entry(*node, struct shadow_info, node);
+ if (b == si->where) {
r = 1;
break;
}
+ if (b < si->where)
+ node = &si->node.rb_left;
+ else
+ node = &si->node.rb_right;
+ }
spin_unlock(&tm->lock);
return r;
@@ -130,30 +139,41 @@ static void insert_shadow(struct dm_transaction_manager *tm, dm_block_t b)
si = kmalloc(sizeof(*si), GFP_NOIO);
if (si) {
+ struct rb_node **node, *parent;
si->where = b;
bucket = dm_hash_block(b, DM_HASH_MASK);
+
spin_lock(&tm->lock);
- hlist_add_head(&si->hlist, tm->buckets + bucket);
+ node = &tm->buckets[bucket].rb_node;
+ parent = NULL;
+ while (*node) {
+ struct shadow_info *si =
+ rb_entry(*node, struct shadow_info, node);
+ parent = *node;
+ if (b < si->where)
+ node = &si->node.rb_left;
+ else
+ node = &si->node.rb_right;
+ }
+ rb_link_node(&si->node, parent, node);
+ rb_insert_color(&si->node, &tm->buckets[bucket]);
spin_unlock(&tm->lock);
}
}
static void wipe_shadow_table(struct dm_transaction_manager *tm)
{
- struct shadow_info *si;
- struct hlist_node *tmp;
- struct hlist_head *bucket;
- int i;
+ unsigned int i;
spin_lock(&tm->lock);
for (i = 0; i < DM_HASH_SIZE; i++) {
- bucket = tm->buckets + i;
- hlist_for_each_entry_safe(si, tmp, bucket, hlist)
+ while (!RB_EMPTY_ROOT(&tm->buckets[i])) {
+ struct shadow_info *si =
+ rb_entry(tm->buckets[i].rb_node, struct shadow_info, node);
+ rb_erase(&si->node, &tm->buckets[i]);
kfree(si);
-
- INIT_HLIST_HEAD(bucket);
+ }
}
-
spin_unlock(&tm->lock);
}
@@ -162,7 +182,7 @@ static void wipe_shadow_table(struct dm_transaction_manager *tm)
static struct dm_transaction_manager *dm_tm_create(struct dm_block_manager *bm,
struct dm_space_map *sm)
{
- int i;
+ unsigned int i;
struct dm_transaction_manager *tm;
tm = kmalloc(sizeof(*tm), GFP_KERNEL);
@@ -176,7 +196,7 @@ static struct dm_transaction_manager *dm_tm_create(struct dm_block_manager *bm,
spin_lock_init(&tm->lock);
for (i = 0; i < DM_HASH_SIZE; i++)
- INIT_HLIST_HEAD(tm->buckets + i);
+ tm->buckets[i] = RB_ROOT;
prefetch_init(&tm->prefetches);
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 7049ec7fb8eb..70bcc3cdf2cd 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -384,12 +384,10 @@ static int raid0_set_limits(struct mddev *mddev)
lim.max_write_zeroes_sectors = mddev->chunk_sectors;
lim.io_min = mddev->chunk_sectors << 9;
lim.io_opt = lim.io_min * mddev->raid_disks;
- lim.features |= BLK_FEAT_ATOMIC_WRITES_STACKED;
+ lim.features |= BLK_FEAT_ATOMIC_WRITES;
err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY);
- if (err) {
- queue_limits_cancel_update(mddev->gendisk->queue);
+ if (err)
return err;
- }
return queue_limits_set(mddev->gendisk->queue, &lim);
}
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 519c56f0ee3d..10ea3af40991 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -420,10 +420,8 @@ static void close_write(struct r1bio *r1_bio)
r1_bio->behind_master_bio = NULL;
}
- /* clear the bitmap if all writes complete successfully */
- mddev->bitmap_ops->endwrite(mddev, r1_bio->sector, r1_bio->sectors,
- !test_bit(R1BIO_Degraded, &r1_bio->state),
- test_bit(R1BIO_BehindIO, &r1_bio->state));
+ if (test_bit(R1BIO_BehindIO, &r1_bio->state))
+ mddev->bitmap_ops->end_behind_write(mddev);
md_write_end(mddev);
}
@@ -480,8 +478,6 @@ static void raid1_end_write_request(struct bio *bio)
if (!test_bit(Faulty, &rdev->flags))
set_bit(R1BIO_WriteError, &r1_bio->state);
else {
- /* Fail the request */
- set_bit(R1BIO_Degraded, &r1_bio->state);
/* Finished with this branch */
r1_bio->bios[mirror] = NULL;
to_put = bio;
@@ -1535,11 +1531,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
write_behind = true;
r1_bio->bios[i] = NULL;
- if (!rdev || test_bit(Faulty, &rdev->flags)) {
- if (i < conf->raid_disks)
- set_bit(R1BIO_Degraded, &r1_bio->state);
+ if (!rdev || test_bit(Faulty, &rdev->flags))
continue;
- }
atomic_inc(&rdev->nr_pending);
if (test_bit(WriteErrorSeen, &rdev->flags)) {
@@ -1558,16 +1551,6 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
*/
max_sectors = bad_sectors;
rdev_dec_pending(rdev, mddev);
- /* We don't set R1BIO_Degraded as that
- * only applies if the disk is
- * missing, so it might be re-added,
- * and we want to know to recover this
- * chunk.
- * In this case the device is here,
- * and the fact that this chunk is not
- * in-sync is recorded in the bad
- * block log
- */
continue;
}
if (is_bad) {
@@ -1645,9 +1628,8 @@ static void raid1_write_request(struct mddev *mddev, struct bio *bio,
stats.behind_writes < max_write_behind)
alloc_behind_master_bio(r1_bio, bio);
- mddev->bitmap_ops->startwrite(
- mddev, r1_bio->sector, r1_bio->sectors,
- test_bit(R1BIO_BehindIO, &r1_bio->state));
+ if (test_bit(R1BIO_BehindIO, &r1_bio->state))
+ mddev->bitmap_ops->start_behind_write(mddev);
first_clone = 0;
}
@@ -2614,12 +2596,10 @@ static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio)
* errors.
*/
fail = true;
- if (!narrow_write_error(r1_bio, m)) {
+ if (!narrow_write_error(r1_bio, m))
md_error(conf->mddev,
conf->mirrors[m].rdev);
/* an I/O failed, we can't clear the bitmap */
- set_bit(R1BIO_Degraded, &r1_bio->state);
- }
rdev_dec_pending(conf->mirrors[m].rdev,
conf->mddev);
}
@@ -2710,8 +2690,6 @@ static void raid1d(struct md_thread *thread)
list_del(&r1_bio->retry_list);
idx = sector_to_idx(r1_bio->sector);
atomic_dec(&conf->nr_queued[idx]);
- if (mddev->degraded)
- set_bit(R1BIO_Degraded, &r1_bio->state);
if (test_bit(R1BIO_WriteError, &r1_bio->state))
close_write(r1_bio);
raid_end_bio_io(r1_bio);
@@ -3239,12 +3217,10 @@ static int raid1_set_limits(struct mddev *mddev)
md_init_stacking_limits(&lim);
lim.max_write_zeroes_sectors = 0;
- lim.features |= BLK_FEAT_ATOMIC_WRITES_STACKED;
+ lim.features |= BLK_FEAT_ATOMIC_WRITES;
err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY);
- if (err) {
- queue_limits_cancel_update(mddev->gendisk->queue);
+ if (err)
return err;
- }
return queue_limits_set(mddev->gendisk->queue, &lim);
}
diff --git a/drivers/md/raid1.h b/drivers/md/raid1.h
index 5300cbaa58a4..33f318fcc268 100644
--- a/drivers/md/raid1.h
+++ b/drivers/md/raid1.h
@@ -188,7 +188,6 @@ struct r1bio {
enum r1bio_state {
R1BIO_Uptodate,
R1BIO_IsSync,
- R1BIO_Degraded,
R1BIO_BehindIO,
/* Set ReadError on bios that experience a readerror so that
* raid1d knows what to do with them.
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 7d7a8a2524dc..15b9ae5bf84d 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -428,10 +428,6 @@ static void close_write(struct r10bio *r10_bio)
{
struct mddev *mddev = r10_bio->mddev;
- /* clear the bitmap if all writes complete successfully */
- mddev->bitmap_ops->endwrite(mddev, r10_bio->sector, r10_bio->sectors,
- !test_bit(R10BIO_Degraded, &r10_bio->state),
- false);
md_write_end(mddev);
}
@@ -501,7 +497,6 @@ static void raid10_end_write_request(struct bio *bio)
set_bit(R10BIO_WriteError, &r10_bio->state);
else {
/* Fail the request */
- set_bit(R10BIO_Degraded, &r10_bio->state);
r10_bio->devs[slot].bio = NULL;
to_put = bio;
dec_rdev = 1;
@@ -1438,10 +1433,8 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
r10_bio->devs[i].bio = NULL;
r10_bio->devs[i].repl_bio = NULL;
- if (!rdev && !rrdev) {
- set_bit(R10BIO_Degraded, &r10_bio->state);
+ if (!rdev && !rrdev)
continue;
- }
if (rdev && test_bit(WriteErrorSeen, &rdev->flags)) {
sector_t first_bad;
sector_t dev_sector = r10_bio->devs[i].addr;
@@ -1458,14 +1451,6 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
* to other devices yet
*/
max_sectors = bad_sectors;
- /* We don't set R10BIO_Degraded as that
- * only applies if the disk is missing,
- * so it might be re-added, and we want to
- * know to recover this chunk.
- * In this case the device is here, and the
- * fact that this chunk is not in-sync is
- * recorded in the bad block log.
- */
continue;
}
if (is_bad) {
@@ -1519,8 +1504,6 @@ static void raid10_write_request(struct mddev *mddev, struct bio *bio,
md_account_bio(mddev, &bio);
r10_bio->master_bio = bio;
atomic_set(&r10_bio->remaining, 1);
- mddev->bitmap_ops->startwrite(mddev, r10_bio->sector, r10_bio->sectors,
- false);
for (i = 0; i < conf->copies; i++) {
if (r10_bio->devs[i].bio)
@@ -2966,11 +2949,8 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
rdev_dec_pending(rdev, conf->mddev);
} else if (bio != NULL && bio->bi_status) {
fail = true;
- if (!narrow_write_error(r10_bio, m)) {
+ if (!narrow_write_error(r10_bio, m))
md_error(conf->mddev, rdev);
- set_bit(R10BIO_Degraded,
- &r10_bio->state);
- }
rdev_dec_pending(rdev, conf->mddev);
}
bio = r10_bio->devs[m].repl_bio;
@@ -3029,8 +3009,6 @@ static void raid10d(struct md_thread *thread)
r10_bio = list_first_entry(&tmp, struct r10bio,
retry_list);
list_del(&r10_bio->retry_list);
- if (mddev->degraded)
- set_bit(R10BIO_Degraded, &r10_bio->state);
if (test_bit(R10BIO_WriteError,
&r10_bio->state))
@@ -4040,12 +4018,10 @@ static int raid10_set_queue_limits(struct mddev *mddev)
lim.max_write_zeroes_sectors = 0;
lim.io_min = mddev->chunk_sectors << 9;
lim.io_opt = lim.io_min * raid10_nr_stripes(conf);
- lim.features |= BLK_FEAT_ATOMIC_WRITES_STACKED;
+ lim.features |= BLK_FEAT_ATOMIC_WRITES;
err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY);
- if (err) {
- queue_limits_cancel_update(mddev->gendisk->queue);
+ if (err)
return err;
- }
return queue_limits_set(mddev->gendisk->queue, &lim);
}
diff --git a/drivers/md/raid10.h b/drivers/md/raid10.h
index 2e75e88d0802..3f16ad6904a9 100644
--- a/drivers/md/raid10.h
+++ b/drivers/md/raid10.h
@@ -161,7 +161,6 @@ enum r10bio_state {
R10BIO_IsSync,
R10BIO_IsRecover,
R10BIO_IsReshape,
- R10BIO_Degraded,
/* Set ReadError on bios that experience a read error
* so that raid10d knows what to do with them.
*/
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index b4f7b79fd187..e530271cb86b 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -313,10 +313,6 @@ void r5c_handle_cached_data_endio(struct r5conf *conf,
if (sh->dev[i].written) {
set_bit(R5_UPTODATE, &sh->dev[i].flags);
r5c_return_dev_pending_writes(conf, &sh->dev[i]);
- conf->mddev->bitmap_ops->endwrite(conf->mddev,
- sh->sector, RAID5_STRIPE_SECTORS(conf),
- !test_bit(STRIPE_DEGRADED, &sh->state),
- false);
}
}
}
@@ -1023,10 +1019,10 @@ int r5l_write_stripe(struct r5l_log *log, struct stripe_head *sh)
/* checksum is already calculated in last run */
if (test_bit(STRIPE_LOG_TRAPPED, &sh->state))
continue;
- addr = kmap_atomic(sh->dev[i].page);
+ addr = kmap_local_page(sh->dev[i].page);
sh->dev[i].log_checksum = crc32c_le(log->uuid_checksum,
addr, PAGE_SIZE);
- kunmap_atomic(addr);
+ kunmap_local(addr);
}
parity_pages = 1 + !!(sh->qd_idx >= 0);
data_pages = write_disks - parity_pages;
@@ -1979,9 +1975,9 @@ r5l_recovery_verify_data_checksum(struct r5l_log *log,
u32 checksum;
r5l_recovery_read_page(log, ctx, page, log_offset);
- addr = kmap_atomic(page);
+ addr = kmap_local_page(page);
checksum = crc32c_le(log->uuid_checksum, addr, PAGE_SIZE);
- kunmap_atomic(addr);
+ kunmap_local(addr);
return (le32_to_cpu(log_checksum) == checksum) ? 0 : -EINVAL;
}
@@ -2381,11 +2377,11 @@ r5c_recovery_rewrite_data_only_stripes(struct r5l_log *log,
payload->size = cpu_to_le32(BLOCK_SECTORS);
payload->location = cpu_to_le64(
raid5_compute_blocknr(sh, i, 0));
- addr = kmap_atomic(dev->page);
+ addr = kmap_local_page(dev->page);
payload->checksum[0] = cpu_to_le32(
crc32c_le(log->uuid_checksum, addr,
PAGE_SIZE));
- kunmap_atomic(addr);
+ kunmap_local(addr);
sync_page_io(log->rdev, write_pos, PAGE_SIZE,
dev->page, REQ_OP_WRITE, false);
write_pos = r5l_ring_add(log, write_pos,
@@ -2888,10 +2884,10 @@ int r5c_cache_data(struct r5l_log *log, struct stripe_head *sh)
if (!test_bit(R5_Wantwrite, &sh->dev[i].flags))
continue;
- addr = kmap_atomic(sh->dev[i].page);
+ addr = kmap_local_page(sh->dev[i].page);
sh->dev[i].log_checksum = crc32c_le(log->uuid_checksum,
addr, PAGE_SIZE);
- kunmap_atomic(addr);
+ kunmap_local(addr);
pages++;
}
WARN_ON(pages == 0);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index f09e7677ee9f..5c79429acc64 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -906,8 +906,7 @@ static bool stripe_can_batch(struct stripe_head *sh)
if (raid5_has_log(conf) || raid5_has_ppl(conf))
return false;
return test_bit(STRIPE_BATCH_READY, &sh->state) &&
- !test_bit(STRIPE_BITMAP_PENDING, &sh->state) &&
- is_full_stripe_write(sh);
+ is_full_stripe_write(sh);
}
/* we only do back search */
@@ -1345,8 +1344,6 @@ again:
submit_bio_noacct(rbi);
}
if (!rdev && !rrdev) {
- if (op_is_write(op))
- set_bit(STRIPE_DEGRADED, &sh->state);
pr_debug("skip op %d on disc %d for sector %llu\n",
bi->bi_opf, i, (unsigned long long)sh->sector);
clear_bit(R5_LOCKED, &sh->dev[i].flags);
@@ -2884,7 +2881,6 @@ static void raid5_end_write_request(struct bio *bi)
set_bit(R5_MadeGoodRepl, &sh->dev[i].flags);
} else {
if (bi->bi_status) {
- set_bit(STRIPE_DEGRADED, &sh->state);
set_bit(WriteErrorSeen, &rdev->flags);
set_bit(R5_WriteError, &sh->dev[i].flags);
if (!test_and_set_bit(WantReplacement, &rdev->flags))
@@ -3548,29 +3544,9 @@ static void __add_stripe_bio(struct stripe_head *sh, struct bio *bi,
(*bip)->bi_iter.bi_sector, sh->sector, dd_idx,
sh->dev[dd_idx].sector);
- if (conf->mddev->bitmap && firstwrite) {
- /* Cannot hold spinlock over bitmap_startwrite,
- * but must ensure this isn't added to a batch until
- * we have added to the bitmap and set bm_seq.
- * So set STRIPE_BITMAP_PENDING to prevent
- * batching.
- * If multiple __add_stripe_bio() calls race here they
- * much all set STRIPE_BITMAP_PENDING. So only the first one
- * to complete "bitmap_startwrite" gets to set
- * STRIPE_BIT_DELAY. This is important as once a stripe
- * is added to a batch, STRIPE_BIT_DELAY cannot be changed
- * any more.
- */
- set_bit(STRIPE_BITMAP_PENDING, &sh->state);
- spin_unlock_irq(&sh->stripe_lock);
- conf->mddev->bitmap_ops->startwrite(conf->mddev, sh->sector,
- RAID5_STRIPE_SECTORS(conf), false);
- spin_lock_irq(&sh->stripe_lock);
- clear_bit(STRIPE_BITMAP_PENDING, &sh->state);
- if (!sh->batch_head) {
- sh->bm_seq = conf->seq_flush+1;
- set_bit(STRIPE_BIT_DELAY, &sh->state);
- }
+ if (conf->mddev->bitmap && firstwrite && !sh->batch_head) {
+ sh->bm_seq = conf->seq_flush+1;
+ set_bit(STRIPE_BIT_DELAY, &sh->state);
}
}
@@ -3621,7 +3597,6 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
BUG_ON(sh->batch_head);
for (i = disks; i--; ) {
struct bio *bi;
- int bitmap_end = 0;
if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
struct md_rdev *rdev = conf->disks[i].rdev;
@@ -3646,8 +3621,6 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
sh->dev[i].towrite = NULL;
sh->overwrite_disks = 0;
spin_unlock_irq(&sh->stripe_lock);
- if (bi)
- bitmap_end = 1;
log_stripe_write_finished(sh);
@@ -3662,11 +3635,6 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
bio_io_error(bi);
bi = nextbi;
}
- if (bitmap_end)
- conf->mddev->bitmap_ops->endwrite(conf->mddev,
- sh->sector, RAID5_STRIPE_SECTORS(conf),
- false, false);
- bitmap_end = 0;
/* and fail all 'written' */
bi = sh->dev[i].written;
sh->dev[i].written = NULL;
@@ -3675,7 +3643,6 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
sh->dev[i].page = sh->dev[i].orig_page;
}
- if (bi) bitmap_end = 1;
while (bi && bi->bi_iter.bi_sector <
sh->dev[i].sector + RAID5_STRIPE_SECTORS(conf)) {
struct bio *bi2 = r5_next_bio(conf, bi, sh->dev[i].sector);
@@ -3709,10 +3676,6 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
bi = nextbi;
}
}
- if (bitmap_end)
- conf->mddev->bitmap_ops->endwrite(conf->mddev,
- sh->sector, RAID5_STRIPE_SECTORS(conf),
- false, false);
/* If we were in the middle of a write the parity block might
* still be locked - so just clear all R5_LOCKED flags
*/
@@ -4061,10 +4024,7 @@ returnbi:
bio_endio(wbi);
wbi = wbi2;
}
- conf->mddev->bitmap_ops->endwrite(conf->mddev,
- sh->sector, RAID5_STRIPE_SECTORS(conf),
- !test_bit(STRIPE_DEGRADED, &sh->state),
- false);
+
if (head_sh->batch_head) {
sh = list_first_entry(&sh->batch_list,
struct stripe_head,
@@ -4341,7 +4301,6 @@ static void handle_parity_checks5(struct r5conf *conf, struct stripe_head *sh,
s->locked++;
set_bit(R5_Wantwrite, &dev->flags);
- clear_bit(STRIPE_DEGRADED, &sh->state);
set_bit(STRIPE_INSYNC, &sh->state);
break;
case check_state_run:
@@ -4498,7 +4457,6 @@ static void handle_parity_checks6(struct r5conf *conf, struct stripe_head *sh,
clear_bit(R5_Wantwrite, &dev->flags);
s->locked--;
}
- clear_bit(STRIPE_DEGRADED, &sh->state);
set_bit(STRIPE_INSYNC, &sh->state);
break;
@@ -4891,8 +4849,7 @@ static void break_stripe_batch_list(struct stripe_head *head_sh,
(1 << STRIPE_COMPUTE_RUN) |
(1 << STRIPE_DISCARD) |
(1 << STRIPE_BATCH_READY) |
- (1 << STRIPE_BATCH_ERR) |
- (1 << STRIPE_BITMAP_PENDING)),
+ (1 << STRIPE_BATCH_ERR)),
"stripe state: %lx\n", sh->state);
WARN_ONCE(head_sh->state & ((1 << STRIPE_DISCARD) |
(1 << STRIPE_REPLACED)),
@@ -4900,7 +4857,6 @@ static void break_stripe_batch_list(struct stripe_head *head_sh,
set_mask_bits(&sh->state, ~(STRIPE_EXPAND_SYNC_FLAGS |
(1 << STRIPE_PREREAD_ACTIVE) |
- (1 << STRIPE_DEGRADED) |
(1 << STRIPE_ON_UNPLUG_LIST)),
head_sh->state & (1 << STRIPE_INSYNC));
@@ -5784,10 +5740,6 @@ static void make_discard_request(struct mddev *mddev, struct bio *bi)
}
spin_unlock_irq(&sh->stripe_lock);
if (conf->mddev->bitmap) {
- for (d = 0; d < conf->raid_disks - conf->max_degraded;
- d++)
- mddev->bitmap_ops->startwrite(mddev, sh->sector,
- RAID5_STRIPE_SECTORS(conf), false);
sh->bm_seq = conf->seq_flush + 1;
set_bit(STRIPE_BIT_DELAY, &sh->state);
}
@@ -5928,6 +5880,54 @@ static enum reshape_loc get_reshape_loc(struct mddev *mddev,
return LOC_BEHIND_RESHAPE;
}
+static void raid5_bitmap_sector(struct mddev *mddev, sector_t *offset,
+ unsigned long *sectors)
+{
+ struct r5conf *conf = mddev->private;
+ sector_t start = *offset;
+ sector_t end = start + *sectors;
+ sector_t prev_start = start;
+ sector_t prev_end = end;
+ int sectors_per_chunk;
+ enum reshape_loc loc;
+ int dd_idx;
+
+ sectors_per_chunk = conf->chunk_sectors *
+ (conf->raid_disks - conf->max_degraded);
+ start = round_down(start, sectors_per_chunk);
+ end = round_up(end, sectors_per_chunk);
+
+ start = raid5_compute_sector(conf, start, 0, &dd_idx, NULL);
+ end = raid5_compute_sector(conf, end, 0, &dd_idx, NULL);
+
+ /*
+ * For LOC_INSIDE_RESHAPE, this IO will wait for reshape to make
+ * progress, hence it's the same as LOC_BEHIND_RESHAPE.
+ */
+ loc = get_reshape_loc(mddev, conf, prev_start);
+ if (likely(loc != LOC_AHEAD_OF_RESHAPE)) {
+ *offset = start;
+ *sectors = end - start;
+ return;
+ }
+
+ sectors_per_chunk = conf->prev_chunk_sectors *
+ (conf->previous_raid_disks - conf->max_degraded);
+ prev_start = round_down(prev_start, sectors_per_chunk);
+ prev_end = round_down(prev_end, sectors_per_chunk);
+
+ prev_start = raid5_compute_sector(conf, prev_start, 1, &dd_idx, NULL);
+ prev_end = raid5_compute_sector(conf, prev_end, 1, &dd_idx, NULL);
+
+ /*
+ * for LOC_AHEAD_OF_RESHAPE, reshape can make progress before this IO
+ * is handled in make_stripe_request(), we can't know this here hence
+ * we set bits for both.
+ */
+ *offset = min(start, prev_start);
+ *sectors = max(end, prev_end) - *offset;
+}
+
static enum stripe_result make_stripe_request(struct mddev *mddev,
struct r5conf *conf, struct stripe_request_ctx *ctx,
sector_t logical_sector, struct bio *bi)
@@ -8976,6 +8976,7 @@ static struct md_personality raid6_personality =
.takeover = raid6_takeover,
.change_consistency_policy = raid5_change_consistency_policy,
.prepare_suspend = raid5_prepare_suspend,
+ .bitmap_sector = raid5_bitmap_sector,
};
static struct md_personality raid5_personality =
{
@@ -9001,6 +9002,7 @@ static struct md_personality raid5_personality =
.takeover = raid5_takeover,
.change_consistency_policy = raid5_change_consistency_policy,
.prepare_suspend = raid5_prepare_suspend,
+ .bitmap_sector = raid5_bitmap_sector,
};
static struct md_personality raid4_personality =
@@ -9027,6 +9029,7 @@ static struct md_personality raid4_personality =
.takeover = raid4_takeover,
.change_consistency_policy = raid5_change_consistency_policy,
.prepare_suspend = raid5_prepare_suspend,
+ .bitmap_sector = raid5_bitmap_sector,
};
static int __init raid5_init(void)
diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h
index d174e586698f..eafc6e9ed6ee 100644
--- a/drivers/md/raid5.h
+++ b/drivers/md/raid5.h
@@ -358,7 +358,6 @@ enum {
STRIPE_REPLACED,
STRIPE_PREREAD_ACTIVE,
STRIPE_DELAYED,
- STRIPE_DEGRADED,
STRIPE_BIT_DELAY,
STRIPE_EXPANDING,
STRIPE_EXPAND_SOURCE,
@@ -372,9 +371,6 @@ enum {
STRIPE_ON_RELEASE_LIST,
STRIPE_BATCH_READY,
STRIPE_BATCH_ERR,
- STRIPE_BITMAP_PENDING, /* Being added to bitmap, don't add
- * to batch yet.
- */
STRIPE_LOG_TRAPPED, /* trapped into log (see raid5-cache.c)
* this bit is used in two scenarios:
*