summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJens Axboe <axboe@kernel.dk>2025-11-11 16:58:11 +0300
committerJens Axboe <axboe@kernel.dk>2025-11-11 16:58:11 +0300
commit3d076988aaaee6b0c8f4db953fd4fe8194ec370f (patch)
tree4c7a3b0625a674b41462dc5c871fcfab020402a9
parent4cda40dce95a5b4ec0620a84f322472730d01f7a (diff)
parent62ed1b58224636185fa689db81224b8c8af46473 (diff)
downloadlinux-3d076988aaaee6b0c8f4db953fd4fe8194ec370f.tar.xz
Merge tag 'md-6.19-20251111' of gitolite.kernel.org:pub/scm/linux/kernel/git/mdraid/linux into for-6.19/block
Pull MD changes from Yu: "- Change maintainer's email address (Yu Kuai) - Data can be lost if array is created with different lbs devices, fix this problem and record lbs of the array in metadata (Li Nan) - Fix rcu protection for md_thread (Yun Zhou) - Fix mddev kobject lifetime regression (Xiao Ni) - Enable atomic writes for md-linear (John Garry) - Some cleanups (Chen Ni, Huiwen He, Wu Guanghao)" * tag 'md-6.19-20251111' of gitolite.kernel.org:pub/scm/linux/kernel/git/mdraid/linux: md: allow configuring logical block size md: add check_new_feature module parameter md/raid0: Move queue limit setup before r0conf initialization md: init bioset in mddev_init md: delete md_redundancy_group when array is becoming inactive md: prevent adding disks with larger logical_block_size to active arrays md/raid5: remove redundant __GFP_NOWARN md: avoid repeated calls to del_gendisk md/md-llbitmap: Remove unneeded semicolon md/md-linear: Enable atomic writes Factor out code into md_should_do_recovery() md: fix rcu protection in md_wakeup_thread md: delete mddev kobj before deleting gendisk kobj MAINTAINERS: Update Yu Kuai's E-mail address
-rw-r--r--Documentation/admin-guide/md.rst10
-rw-r--r--MAINTAINERS4
-rw-r--r--drivers/md/md-linear.c2
-rw-r--r--drivers/md/md-llbitmap.c2
-rw-r--r--drivers/md/md.c247
-rw-r--r--drivers/md/md.h10
-rw-r--r--drivers/md/raid0.c17
-rw-r--r--drivers/md/raid1.c1
-rw-r--r--drivers/md/raid10.c1
-rw-r--r--drivers/md/raid5-cache.c2
-rw-r--r--drivers/md/raid5.c1
-rw-r--r--include/uapi/linux/raid/md_p.h3
12 files changed, 225 insertions, 75 deletions
diff --git a/Documentation/admin-guide/md.rst b/Documentation/admin-guide/md.rst
index deed823eab01..dc7eab191caa 100644
--- a/Documentation/admin-guide/md.rst
+++ b/Documentation/admin-guide/md.rst
@@ -238,6 +238,16 @@ All md devices contain:
the number of devices in a raid4/5/6, or to support external
metadata formats which mandate such clipping.
+ logical_block_size
+ Configure the array's logical block size in bytes. This attribute
+ is only supported for 1.x meta. Write the value before starting
+ array. The final array LBS uses the maximum between this
+ configuration and LBS of all combined devices. Note that
+ LBS cannot exceed PAGE_SIZE before RAID supports folio.
+ WARNING: Arrays created on new kernel cannot be assembled at old
+ kernel due to padding check, Set module parameter 'check_new_feature'
+ to false to bypass, but data loss may occur.
+
reshape_position
This is either ``none`` or a sector number within the devices of
the array where ``reshape`` is up to. If this is set, the three
diff --git a/MAINTAINERS b/MAINTAINERS
index 545a4776795e..ce7f67e09910 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4300,7 +4300,7 @@ F: Documentation/filesystems/befs.rst
F: fs/befs/
BFQ I/O SCHEDULER
-M: Yu Kuai <yukuai3@huawei.com>
+M: Yu Kuai <yukuai@fnnas.com>
L: linux-block@vger.kernel.org
S: Odd Fixes
F: Documentation/block/bfq-iosched.rst
@@ -23842,7 +23842,7 @@ F: include/linux/property.h
SOFTWARE RAID (Multiple Disks) SUPPORT
M: Song Liu <song@kernel.org>
-M: Yu Kuai <yukuai3@huawei.com>
+M: Yu Kuai <yukuai@fnnas.com>
L: linux-raid@vger.kernel.org
S: Supported
Q: https://patchwork.kernel.org/project/linux-raid/list/
diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c
index 7033d982d377..8d7b82c4a723 100644
--- a/drivers/md/md-linear.c
+++ b/drivers/md/md-linear.c
@@ -72,9 +72,11 @@ static int linear_set_limits(struct mddev *mddev)
md_init_stacking_limits(&lim);
lim.max_hw_sectors = mddev->chunk_sectors;
+ lim.logical_block_size = mddev->logical_block_size;
lim.max_write_zeroes_sectors = mddev->chunk_sectors;
lim.max_hw_wzeroes_unmap_sectors = mddev->chunk_sectors;
lim.io_min = mddev->chunk_sectors << 9;
+ lim.features |= BLK_FEAT_ATOMIC_WRITES;
err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY);
if (err)
return err;
diff --git a/drivers/md/md-llbitmap.c b/drivers/md/md-llbitmap.c
index 1eb434306162..9c1ade19b774 100644
--- a/drivers/md/md-llbitmap.c
+++ b/drivers/md/md-llbitmap.c
@@ -378,7 +378,7 @@ static void llbitmap_infect_dirty_bits(struct llbitmap *llbitmap,
case BitClean:
pctl->state[pos] = BitDirty;
break;
- };
+ }
}
}
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 41c476b40c7a..7b5c5967568f 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -99,7 +99,7 @@ static int remove_and_add_spares(struct mddev *mddev,
struct md_rdev *this);
static void mddev_detach(struct mddev *mddev);
static void export_rdev(struct md_rdev *rdev, struct mddev *mddev);
-static void md_wakeup_thread_directly(struct md_thread __rcu *thread);
+static void md_wakeup_thread_directly(struct md_thread __rcu **thread);
/*
* Default number of read corrections we'll attempt on an rdev
@@ -339,6 +339,7 @@ static int start_readonly;
*/
static bool create_on_open = true;
static bool legacy_async_del_gendisk = true;
+static bool check_new_feature = true;
/*
* We have a system wide 'event count' that is incremented
@@ -730,6 +731,8 @@ static void mddev_clear_bitmap_ops(struct mddev *mddev)
int mddev_init(struct mddev *mddev)
{
+ int err = 0;
+
if (!IS_ENABLED(CONFIG_MD_BITMAP))
mddev->bitmap_id = ID_BITMAP_NONE;
else
@@ -741,10 +744,23 @@ int mddev_init(struct mddev *mddev)
if (percpu_ref_init(&mddev->writes_pending, no_op,
PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) {
- percpu_ref_exit(&mddev->active_io);
- return -ENOMEM;
+ err = -ENOMEM;
+ goto exit_acitve_io;
}
+ err = bioset_init(&mddev->bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
+ if (err)
+ goto exit_writes_pending;
+
+ err = bioset_init(&mddev->sync_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
+ if (err)
+ goto exit_bio_set;
+
+ err = bioset_init(&mddev->io_clone_set, BIO_POOL_SIZE,
+ offsetof(struct md_io_clone, bio_clone), 0);
+ if (err)
+ goto exit_sync_set;
+
/* We want to start with the refcount at zero */
percpu_ref_put(&mddev->writes_pending);
@@ -773,11 +789,24 @@ int mddev_init(struct mddev *mddev)
INIT_WORK(&mddev->del_work, mddev_delayed_delete);
return 0;
+
+exit_sync_set:
+ bioset_exit(&mddev->sync_set);
+exit_bio_set:
+ bioset_exit(&mddev->bio_set);
+exit_writes_pending:
+ percpu_ref_exit(&mddev->writes_pending);
+exit_acitve_io:
+ percpu_ref_exit(&mddev->active_io);
+ return err;
}
EXPORT_SYMBOL_GPL(mddev_init);
void mddev_destroy(struct mddev *mddev)
{
+ bioset_exit(&mddev->bio_set);
+ bioset_exit(&mddev->sync_set);
+ bioset_exit(&mddev->io_clone_set);
percpu_ref_exit(&mddev->active_io);
percpu_ref_exit(&mddev->writes_pending);
}
@@ -941,8 +970,11 @@ void mddev_unlock(struct mddev *mddev)
* do_md_stop. dm raid only uses md_stop to stop. So dm raid
* doesn't need to check MD_DELETED when getting reconfig lock
*/
- if (test_bit(MD_DELETED, &mddev->flags))
+ if (test_bit(MD_DELETED, &mddev->flags) &&
+ !test_and_set_bit(MD_DO_DELETE, &mddev->flags)) {
+ kobject_del(&mddev->kobj);
del_gendisk(mddev->gendisk);
+ }
}
}
EXPORT_SYMBOL_GPL(mddev_unlock);
@@ -1820,9 +1852,13 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_
}
if (sb->pad0 ||
sb->pad3[0] ||
- memcmp(sb->pad3, sb->pad3+1, sizeof(sb->pad3) - sizeof(sb->pad3[1])))
- /* Some padding is non-zero, might be a new feature */
- return -EINVAL;
+ memcmp(sb->pad3, sb->pad3+1, sizeof(sb->pad3) - sizeof(sb->pad3[1]))) {
+ pr_warn("Some padding is non-zero on %pg, might be a new feature\n",
+ rdev->bdev);
+ if (check_new_feature)
+ return -EINVAL;
+ pr_warn("check_new_feature is disabled, data corruption possible\n");
+ }
rdev->preferred_minor = 0xffff;
rdev->data_offset = le64_to_cpu(sb->data_offset);
@@ -1963,6 +1999,7 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *freshest, struc
mddev->layout = le32_to_cpu(sb->layout);
mddev->raid_disks = le32_to_cpu(sb->raid_disks);
mddev->dev_sectors = le64_to_cpu(sb->size);
+ mddev->logical_block_size = le32_to_cpu(sb->logical_block_size);
mddev->events = ev1;
mddev->bitmap_info.offset = 0;
mddev->bitmap_info.space = 0;
@@ -2172,6 +2209,7 @@ static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev)
sb->chunksize = cpu_to_le32(mddev->chunk_sectors);
sb->level = cpu_to_le32(mddev->level);
sb->layout = cpu_to_le32(mddev->layout);
+ sb->logical_block_size = cpu_to_le32(mddev->logical_block_size);
if (test_bit(FailFast, &rdev->flags))
sb->devflags |= FailFast1;
else
@@ -5134,7 +5172,7 @@ static void stop_sync_thread(struct mddev *mddev, bool locked)
* Thread might be blocked waiting for metadata update which will now
* never happen
*/
- md_wakeup_thread_directly(mddev->sync_thread);
+ md_wakeup_thread_directly(&mddev->sync_thread);
if (work_pending(&mddev->sync_work))
flush_work(&mddev->sync_work);
@@ -5900,6 +5938,68 @@ static struct md_sysfs_entry md_serialize_policy =
__ATTR(serialize_policy, S_IRUGO | S_IWUSR, serialize_policy_show,
serialize_policy_store);
+static int mddev_set_logical_block_size(struct mddev *mddev,
+ unsigned int lbs)
+{
+ int err = 0;
+ struct queue_limits lim;
+
+ if (queue_logical_block_size(mddev->gendisk->queue) >= lbs) {
+ pr_err("%s: Cannot set LBS smaller than mddev LBS %u\n",
+ mdname(mddev), lbs);
+ return -EINVAL;
+ }
+
+ lim = queue_limits_start_update(mddev->gendisk->queue);
+ lim.logical_block_size = lbs;
+ pr_info("%s: logical_block_size is changed, data may be lost\n",
+ mdname(mddev));
+ err = queue_limits_commit_update(mddev->gendisk->queue, &lim);
+ if (err)
+ return err;
+
+ mddev->logical_block_size = lbs;
+ /* New lbs will be written to superblock after array is running */
+ set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags);
+ return 0;
+}
+
+static ssize_t
+lbs_show(struct mddev *mddev, char *page)
+{
+ return sprintf(page, "%u\n", mddev->logical_block_size);
+}
+
+static ssize_t
+lbs_store(struct mddev *mddev, const char *buf, size_t len)
+{
+ unsigned int lbs;
+ int err = -EBUSY;
+
+ /* Only 1.x meta supports configurable LBS */
+ if (mddev->major_version == 0)
+ return -EINVAL;
+
+ if (mddev->pers)
+ return -EBUSY;
+
+ err = kstrtouint(buf, 10, &lbs);
+ if (err < 0)
+ return -EINVAL;
+
+ err = mddev_lock(mddev);
+ if (err)
+ goto unlock;
+
+ err = mddev_set_logical_block_size(mddev, lbs);
+
+unlock:
+ mddev_unlock(mddev);
+ return err ?: len;
+}
+
+static struct md_sysfs_entry md_logical_block_size =
+__ATTR(logical_block_size, 0644, lbs_show, lbs_store);
static struct attribute *md_default_attrs[] = {
&md_level.attr,
@@ -5922,6 +6022,7 @@ static struct attribute *md_default_attrs[] = {
&md_consistency_policy.attr,
&md_fail_last_dev.attr,
&md_serialize_policy.attr,
+ &md_logical_block_size.attr,
NULL,
};
@@ -6052,6 +6153,17 @@ int mddev_stack_rdev_limits(struct mddev *mddev, struct queue_limits *lim,
return -EINVAL;
}
+ /*
+ * Before RAID adding folio support, the logical_block_size
+ * should be smaller than the page size.
+ */
+ if (lim->logical_block_size > PAGE_SIZE) {
+ pr_err("%s: logical_block_size must not larger than PAGE_SIZE\n",
+ mdname(mddev));
+ return -EINVAL;
+ }
+ mddev->logical_block_size = lim->logical_block_size;
+
return 0;
}
EXPORT_SYMBOL_GPL(mddev_stack_rdev_limits);
@@ -6064,6 +6176,13 @@ int mddev_stack_new_rdev(struct mddev *mddev, struct md_rdev *rdev)
if (mddev_is_dm(mddev))
return 0;
+ if (queue_logical_block_size(rdev->bdev->bd_disk->queue) >
+ queue_logical_block_size(mddev->gendisk->queue)) {
+ pr_err("%s: incompatible logical_block_size, can not add\n",
+ mdname(mddev));
+ return -EINVAL;
+ }
+
lim = queue_limits_start_update(mddev->gendisk->queue);
queue_limits_stack_bdev(&lim, rdev->bdev, rdev->data_offset,
mddev->gendisk->disk_name);
@@ -6384,29 +6503,9 @@ int md_run(struct mddev *mddev)
nowait = nowait && bdev_nowait(rdev->bdev);
}
- if (!bioset_initialized(&mddev->bio_set)) {
- err = bioset_init(&mddev->bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
- if (err)
- return err;
- }
- if (!bioset_initialized(&mddev->sync_set)) {
- err = bioset_init(&mddev->sync_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS);
- if (err)
- goto exit_bio_set;
- }
-
- if (!bioset_initialized(&mddev->io_clone_set)) {
- err = bioset_init(&mddev->io_clone_set, BIO_POOL_SIZE,
- offsetof(struct md_io_clone, bio_clone), 0);
- if (err)
- goto exit_sync_set;
- }
-
pers = get_pers(mddev->level, mddev->clevel);
- if (!pers) {
- err = -EINVAL;
- goto abort;
- }
+ if (!pers)
+ return -EINVAL;
if (mddev->level != pers->head.id) {
mddev->level = pers->head.id;
mddev->new_level = pers->head.id;
@@ -6417,8 +6516,7 @@ int md_run(struct mddev *mddev)
pers->start_reshape == NULL) {
/* This personality cannot handle reshaping... */
put_pers(pers);
- err = -EINVAL;
- goto abort;
+ return -EINVAL;
}
if (pers->sync_request) {
@@ -6545,12 +6643,6 @@ bitmap_abort:
mddev->private = NULL;
put_pers(pers);
md_bitmap_destroy(mddev);
-abort:
- bioset_exit(&mddev->io_clone_set);
-exit_sync_set:
- bioset_exit(&mddev->sync_set);
-exit_bio_set:
- bioset_exit(&mddev->bio_set);
return err;
}
EXPORT_SYMBOL_GPL(md_run);
@@ -6683,6 +6775,7 @@ static void md_clean(struct mddev *mddev)
mddev->chunk_sectors = 0;
mddev->ctime = mddev->utime = 0;
mddev->layout = 0;
+ mddev->logical_block_size = 0;
mddev->max_disks = 0;
mddev->events = 0;
mddev->can_decrease_events = 0;
@@ -6775,10 +6868,6 @@ static void __md_stop(struct mddev *mddev)
mddev->private = NULL;
put_pers(pers);
clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
-
- bioset_exit(&mddev->bio_set);
- bioset_exit(&mddev->sync_set);
- bioset_exit(&mddev->io_clone_set);
}
void md_stop(struct mddev *mddev)
@@ -6869,6 +6958,10 @@ static int do_md_stop(struct mddev *mddev, int mode)
if (!md_is_rdwr(mddev))
set_disk_ro(disk, 0);
+ if (mode == 2 && mddev->pers->sync_request &&
+ mddev->to_remove == NULL)
+ mddev->to_remove = &md_redundancy_group;
+
__md_stop_writes(mddev);
__md_stop(mddev);
@@ -8373,22 +8466,21 @@ static int md_thread(void *arg)
return 0;
}
-static void md_wakeup_thread_directly(struct md_thread __rcu *thread)
+static void md_wakeup_thread_directly(struct md_thread __rcu **thread)
{
struct md_thread *t;
rcu_read_lock();
- t = rcu_dereference(thread);
+ t = rcu_dereference(*thread);
if (t)
wake_up_process(t->tsk);
rcu_read_unlock();
}
-void md_wakeup_thread(struct md_thread __rcu *thread)
+void __md_wakeup_thread(struct md_thread __rcu *thread)
{
struct md_thread *t;
- rcu_read_lock();
t = rcu_dereference(thread);
if (t) {
pr_debug("md: waking up MD thread %s.\n", t->tsk->comm);
@@ -8396,9 +8488,8 @@ void md_wakeup_thread(struct md_thread __rcu *thread)
if (wq_has_sleeper(&t->wqueue))
wake_up(&t->wqueue);
}
- rcu_read_unlock();
}
-EXPORT_SYMBOL(md_wakeup_thread);
+EXPORT_SYMBOL(__md_wakeup_thread);
struct md_thread *md_register_thread(void (*run) (struct md_thread *),
struct mddev *mddev, const char *name)
@@ -9978,6 +10069,52 @@ static void unregister_sync_thread(struct mddev *mddev)
md_reap_sync_thread(mddev);
}
+static bool md_should_do_recovery(struct mddev *mddev)
+{
+ /*
+ * As long as one of the following flags is set,
+ * recovery needs to do or cleanup.
+ */
+ if (test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
+ test_bit(MD_RECOVERY_DONE, &mddev->recovery))
+ return true;
+
+ /*
+ * If no flags are set and it is in read-only status,
+ * there is nothing to do.
+ */
+ if (!md_is_rdwr(mddev))
+ return false;
+
+ /*
+ * MD_SB_CHANGE_PENDING indicates that the array is switching from clean to
+ * active, and no action is needed for now.
+ * All other MD_SB_* flags require to update the superblock.
+ */
+ if (mddev->sb_flags & ~ (1<<MD_SB_CHANGE_PENDING))
+ return true;
+
+ /*
+ * If the array is not using external metadata and there has been no data
+ * written for some time, then the array's status needs to be set to
+ * in_sync.
+ */
+ if (mddev->external == 0 && mddev->safemode == 1)
+ return true;
+
+ /*
+ * When the system is about to restart or the process receives an signal,
+ * the array needs to be synchronized as soon as possible.
+ * Once the data synchronization is completed, need to change the array
+ * status to in_sync.
+ */
+ if (mddev->safemode == 2 && !mddev->in_sync &&
+ mddev->resync_offset == MaxSector)
+ return true;
+
+ return false;
+}
+
/*
* This routine is regularly called by all per-raid-array threads to
* deal with generic issues like resync and super-block update.
@@ -10014,18 +10151,7 @@ void md_check_recovery(struct mddev *mddev)
flush_signals(current);
}
- if (!md_is_rdwr(mddev) &&
- !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) &&
- !test_bit(MD_RECOVERY_DONE, &mddev->recovery))
- return;
- if ( ! (
- (mddev->sb_flags & ~ (1<<MD_SB_CHANGE_PENDING)) ||
- test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) ||
- test_bit(MD_RECOVERY_DONE, &mddev->recovery) ||
- (mddev->external == 0 && mddev->safemode == 1) ||
- (mddev->safemode == 2
- && !mddev->in_sync && mddev->resync_offset == MaxSector)
- ))
+ if (!md_should_do_recovery(mddev))
return;
if (mddev_trylock(mddev)) {
@@ -10697,6 +10823,7 @@ module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR);
module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR);
module_param(create_on_open, bool, S_IRUSR|S_IWUSR);
module_param(legacy_async_del_gendisk, bool, 0600);
+module_param(check_new_feature, bool, 0600);
MODULE_LICENSE("GPL");
MODULE_DESCRIPTION("MD RAID framework");
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 1979c2d4fe89..6985f2829bbd 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -354,6 +354,7 @@ enum mddev_flags {
MD_HAS_MULTIPLE_PPLS,
MD_NOT_READY,
MD_BROKEN,
+ MD_DO_DELETE,
MD_DELETED,
};
@@ -432,6 +433,7 @@ struct mddev {
sector_t array_sectors; /* exported array size */
int external_size; /* size managed
* externally */
+ unsigned int logical_block_size;
__u64 events;
/* If the last 'event' was simply a clean->dirty transition, and
* we didn't write it to the spares, then it is safe and simple
@@ -882,6 +884,12 @@ struct md_io_clone {
#define THREAD_WAKEUP 0
+#define md_wakeup_thread(thread) do { \
+ rcu_read_lock(); \
+ __md_wakeup_thread(thread); \
+ rcu_read_unlock(); \
+} while (0)
+
static inline void safe_put_page(struct page *p)
{
if (p) put_page(p);
@@ -895,7 +903,7 @@ extern struct md_thread *md_register_thread(
struct mddev *mddev,
const char *name);
extern void md_unregister_thread(struct mddev *mddev, struct md_thread __rcu **threadp);
-extern void md_wakeup_thread(struct md_thread __rcu *thread);
+extern void __md_wakeup_thread(struct md_thread __rcu *thread);
extern void md_check_recovery(struct mddev *mddev);
extern void md_reap_sync_thread(struct mddev *mddev);
extern enum sync_action md_sync_action(struct mddev *mddev);
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index e443e478645a..47aee1b1d4d1 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -68,7 +68,7 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
struct strip_zone *zone;
int cnt;
struct r0conf *conf = kzalloc(sizeof(*conf), GFP_KERNEL);
- unsigned blksize = 512;
+ unsigned int blksize = queue_logical_block_size(mddev->gendisk->queue);
*private_conf = ERR_PTR(-ENOMEM);
if (!conf)
@@ -84,9 +84,6 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf)
sector_div(sectors, mddev->chunk_sectors);
rdev1->sectors = sectors * mddev->chunk_sectors;
- blksize = max(blksize, queue_logical_block_size(
- rdev1->bdev->bd_disk->queue));
-
rdev_for_each(rdev2, mddev) {
pr_debug("md/raid0:%s: comparing %pg(%llu)"
" with %pg(%llu)\n",
@@ -383,6 +380,7 @@ static int raid0_set_limits(struct mddev *mddev)
lim.max_hw_sectors = mddev->chunk_sectors;
lim.max_write_zeroes_sectors = mddev->chunk_sectors;
lim.max_hw_wzeroes_unmap_sectors = mddev->chunk_sectors;
+ lim.logical_block_size = mddev->logical_block_size;
lim.io_min = mddev->chunk_sectors << 9;
lim.io_opt = lim.io_min * mddev->raid_disks;
lim.chunk_sectors = mddev->chunk_sectors;
@@ -405,6 +403,12 @@ static int raid0_run(struct mddev *mddev)
if (md_check_no_bitmap(mddev))
return -EINVAL;
+ if (!mddev_is_dm(mddev)) {
+ ret = raid0_set_limits(mddev);
+ if (ret)
+ return ret;
+ }
+
/* if private is not null, we are here after takeover */
if (mddev->private == NULL) {
ret = create_strip_zones(mddev, &conf);
@@ -413,11 +417,6 @@ static int raid0_run(struct mddev *mddev)
mddev->private = conf;
}
conf = mddev->private;
- if (!mddev_is_dm(mddev)) {
- ret = raid0_set_limits(mddev);
- if (ret)
- return ret;
- }
/* calculate array device size */
md_set_array_sectors(mddev, raid0_size(mddev, 0, 0));
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 592a40233004..57d50465eed1 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -3213,6 +3213,7 @@ static int raid1_set_limits(struct mddev *mddev)
md_init_stacking_limits(&lim);
lim.max_write_zeroes_sectors = 0;
lim.max_hw_wzeroes_unmap_sectors = 0;
+ lim.logical_block_size = mddev->logical_block_size;
lim.features |= BLK_FEAT_ATOMIC_WRITES;
err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY);
if (err)
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 14dcd5142eb4..84be4cc7e873 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -4000,6 +4000,7 @@ static int raid10_set_queue_limits(struct mddev *mddev)
md_init_stacking_limits(&lim);
lim.max_write_zeroes_sectors = 0;
lim.max_hw_wzeroes_unmap_sectors = 0;
+ lim.logical_block_size = mddev->logical_block_size;
lim.io_min = mddev->chunk_sectors << 9;
lim.chunk_sectors = mddev->chunk_sectors;
lim.io_opt = lim.io_min * raid10_nr_stripes(conf);
diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c
index ba768ca7f422..e29e69335c69 100644
--- a/drivers/md/raid5-cache.c
+++ b/drivers/md/raid5-cache.c
@@ -3104,7 +3104,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev)
goto out_mempool;
spin_lock_init(&log->tree_lock);
- INIT_RADIX_TREE(&log->big_stripe_tree, GFP_NOWAIT | __GFP_NOWARN);
+ INIT_RADIX_TREE(&log->big_stripe_tree, GFP_NOWAIT);
thread = md_register_thread(r5l_reclaim_thread, log->rdev->mddev,
"reclaim");
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 24b32a0c95b4..cdbc7eba5c54 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -7745,6 +7745,7 @@ static int raid5_set_limits(struct mddev *mddev)
stripe = roundup_pow_of_two(data_disks * (mddev->chunk_sectors << 9));
md_init_stacking_limits(&lim);
+ lim.logical_block_size = mddev->logical_block_size;
lim.io_min = mddev->chunk_sectors << 9;
lim.io_opt = lim.io_min * (conf->raid_disks - conf->max_degraded);
lim.features |= BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE;
diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h
index ac74133a4768..310068bb2a1d 100644
--- a/include/uapi/linux/raid/md_p.h
+++ b/include/uapi/linux/raid/md_p.h
@@ -291,7 +291,8 @@ struct mdp_superblock_1 {
__le64 resync_offset; /* data before this offset (from data_offset) known to be in sync */
__le32 sb_csum; /* checksum up to devs[max_dev] */
__le32 max_dev; /* size of devs[] array to consider */
- __u8 pad3[64-32]; /* set to 0 when writing */
+ __le32 logical_block_size; /* same as q->limits->logical_block_size */
+ __u8 pad3[64-36]; /* set to 0 when writing */
/* device state information. Indexed by dev_number.
* 2 bytes per device