diff options
| author | Jens Axboe <axboe@kernel.dk> | 2025-11-11 16:58:11 +0300 |
|---|---|---|
| committer | Jens Axboe <axboe@kernel.dk> | 2025-11-11 16:58:11 +0300 |
| commit | 3d076988aaaee6b0c8f4db953fd4fe8194ec370f (patch) | |
| tree | 4c7a3b0625a674b41462dc5c871fcfab020402a9 | |
| parent | 4cda40dce95a5b4ec0620a84f322472730d01f7a (diff) | |
| parent | 62ed1b58224636185fa689db81224b8c8af46473 (diff) | |
| download | linux-3d076988aaaee6b0c8f4db953fd4fe8194ec370f.tar.xz | |
Merge tag 'md-6.19-20251111' of gitolite.kernel.org:pub/scm/linux/kernel/git/mdraid/linux into for-6.19/block
Pull MD changes from Yu:
"- Change maintainer's email address (Yu Kuai)
- Data can be lost if array is created with different lbs devices, fix
this problem and record lbs of the array in metadata (Li Nan)
- Fix rcu protection for md_thread (Yun Zhou)
- Fix mddev kobject lifetime regression (Xiao Ni)
- Enable atomic writes for md-linear (John Garry)
- Some cleanups (Chen Ni, Huiwen He, Wu Guanghao)"
* tag 'md-6.19-20251111' of gitolite.kernel.org:pub/scm/linux/kernel/git/mdraid/linux:
md: allow configuring logical block size
md: add check_new_feature module parameter
md/raid0: Move queue limit setup before r0conf initialization
md: init bioset in mddev_init
md: delete md_redundancy_group when array is becoming inactive
md: prevent adding disks with larger logical_block_size to active arrays
md/raid5: remove redundant __GFP_NOWARN
md: avoid repeated calls to del_gendisk
md/md-llbitmap: Remove unneeded semicolon
md/md-linear: Enable atomic writes
Factor out code into md_should_do_recovery()
md: fix rcu protection in md_wakeup_thread
md: delete mddev kobj before deleting gendisk kobj
MAINTAINERS: Update Yu Kuai's E-mail address
| -rw-r--r-- | Documentation/admin-guide/md.rst | 10 | ||||
| -rw-r--r-- | MAINTAINERS | 4 | ||||
| -rw-r--r-- | drivers/md/md-linear.c | 2 | ||||
| -rw-r--r-- | drivers/md/md-llbitmap.c | 2 | ||||
| -rw-r--r-- | drivers/md/md.c | 247 | ||||
| -rw-r--r-- | drivers/md/md.h | 10 | ||||
| -rw-r--r-- | drivers/md/raid0.c | 17 | ||||
| -rw-r--r-- | drivers/md/raid1.c | 1 | ||||
| -rw-r--r-- | drivers/md/raid10.c | 1 | ||||
| -rw-r--r-- | drivers/md/raid5-cache.c | 2 | ||||
| -rw-r--r-- | drivers/md/raid5.c | 1 | ||||
| -rw-r--r-- | include/uapi/linux/raid/md_p.h | 3 |
12 files changed, 225 insertions, 75 deletions
diff --git a/Documentation/admin-guide/md.rst b/Documentation/admin-guide/md.rst index deed823eab01..dc7eab191caa 100644 --- a/Documentation/admin-guide/md.rst +++ b/Documentation/admin-guide/md.rst @@ -238,6 +238,16 @@ All md devices contain: the number of devices in a raid4/5/6, or to support external metadata formats which mandate such clipping. + logical_block_size + Configure the array's logical block size in bytes. This attribute + is only supported for 1.x meta. Write the value before starting + array. The final array LBS uses the maximum between this + configuration and LBS of all combined devices. Note that + LBS cannot exceed PAGE_SIZE before RAID supports folio. + WARNING: Arrays created on new kernel cannot be assembled at old + kernel due to padding check, Set module parameter 'check_new_feature' + to false to bypass, but data loss may occur. + reshape_position This is either ``none`` or a sector number within the devices of the array where ``reshape`` is up to. If this is set, the three diff --git a/MAINTAINERS b/MAINTAINERS index 545a4776795e..ce7f67e09910 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -4300,7 +4300,7 @@ F: Documentation/filesystems/befs.rst F: fs/befs/ BFQ I/O SCHEDULER -M: Yu Kuai <yukuai3@huawei.com> +M: Yu Kuai <yukuai@fnnas.com> L: linux-block@vger.kernel.org S: Odd Fixes F: Documentation/block/bfq-iosched.rst @@ -23842,7 +23842,7 @@ F: include/linux/property.h SOFTWARE RAID (Multiple Disks) SUPPORT M: Song Liu <song@kernel.org> -M: Yu Kuai <yukuai3@huawei.com> +M: Yu Kuai <yukuai@fnnas.com> L: linux-raid@vger.kernel.org S: Supported Q: https://patchwork.kernel.org/project/linux-raid/list/ diff --git a/drivers/md/md-linear.c b/drivers/md/md-linear.c index 7033d982d377..8d7b82c4a723 100644 --- a/drivers/md/md-linear.c +++ b/drivers/md/md-linear.c @@ -72,9 +72,11 @@ static int linear_set_limits(struct mddev *mddev) md_init_stacking_limits(&lim); lim.max_hw_sectors = mddev->chunk_sectors; + lim.logical_block_size = mddev->logical_block_size; lim.max_write_zeroes_sectors = mddev->chunk_sectors; lim.max_hw_wzeroes_unmap_sectors = mddev->chunk_sectors; lim.io_min = mddev->chunk_sectors << 9; + lim.features |= BLK_FEAT_ATOMIC_WRITES; err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY); if (err) return err; diff --git a/drivers/md/md-llbitmap.c b/drivers/md/md-llbitmap.c index 1eb434306162..9c1ade19b774 100644 --- a/drivers/md/md-llbitmap.c +++ b/drivers/md/md-llbitmap.c @@ -378,7 +378,7 @@ static void llbitmap_infect_dirty_bits(struct llbitmap *llbitmap, case BitClean: pctl->state[pos] = BitDirty; break; - }; + } } } diff --git a/drivers/md/md.c b/drivers/md/md.c index 41c476b40c7a..7b5c5967568f 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -99,7 +99,7 @@ static int remove_and_add_spares(struct mddev *mddev, struct md_rdev *this); static void mddev_detach(struct mddev *mddev); static void export_rdev(struct md_rdev *rdev, struct mddev *mddev); -static void md_wakeup_thread_directly(struct md_thread __rcu *thread); +static void md_wakeup_thread_directly(struct md_thread __rcu **thread); /* * Default number of read corrections we'll attempt on an rdev @@ -339,6 +339,7 @@ static int start_readonly; */ static bool create_on_open = true; static bool legacy_async_del_gendisk = true; +static bool check_new_feature = true; /* * We have a system wide 'event count' that is incremented @@ -730,6 +731,8 @@ static void mddev_clear_bitmap_ops(struct mddev *mddev) int mddev_init(struct mddev *mddev) { + int err = 0; + if (!IS_ENABLED(CONFIG_MD_BITMAP)) mddev->bitmap_id = ID_BITMAP_NONE; else @@ -741,10 +744,23 @@ int mddev_init(struct mddev *mddev) if (percpu_ref_init(&mddev->writes_pending, no_op, PERCPU_REF_ALLOW_REINIT, GFP_KERNEL)) { - percpu_ref_exit(&mddev->active_io); - return -ENOMEM; + err = -ENOMEM; + goto exit_acitve_io; } + err = bioset_init(&mddev->bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS); + if (err) + goto exit_writes_pending; + + err = bioset_init(&mddev->sync_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS); + if (err) + goto exit_bio_set; + + err = bioset_init(&mddev->io_clone_set, BIO_POOL_SIZE, + offsetof(struct md_io_clone, bio_clone), 0); + if (err) + goto exit_sync_set; + /* We want to start with the refcount at zero */ percpu_ref_put(&mddev->writes_pending); @@ -773,11 +789,24 @@ int mddev_init(struct mddev *mddev) INIT_WORK(&mddev->del_work, mddev_delayed_delete); return 0; + +exit_sync_set: + bioset_exit(&mddev->sync_set); +exit_bio_set: + bioset_exit(&mddev->bio_set); +exit_writes_pending: + percpu_ref_exit(&mddev->writes_pending); +exit_acitve_io: + percpu_ref_exit(&mddev->active_io); + return err; } EXPORT_SYMBOL_GPL(mddev_init); void mddev_destroy(struct mddev *mddev) { + bioset_exit(&mddev->bio_set); + bioset_exit(&mddev->sync_set); + bioset_exit(&mddev->io_clone_set); percpu_ref_exit(&mddev->active_io); percpu_ref_exit(&mddev->writes_pending); } @@ -941,8 +970,11 @@ void mddev_unlock(struct mddev *mddev) * do_md_stop. dm raid only uses md_stop to stop. So dm raid * doesn't need to check MD_DELETED when getting reconfig lock */ - if (test_bit(MD_DELETED, &mddev->flags)) + if (test_bit(MD_DELETED, &mddev->flags) && + !test_and_set_bit(MD_DO_DELETE, &mddev->flags)) { + kobject_del(&mddev->kobj); del_gendisk(mddev->gendisk); + } } } EXPORT_SYMBOL_GPL(mddev_unlock); @@ -1820,9 +1852,13 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_ } if (sb->pad0 || sb->pad3[0] || - memcmp(sb->pad3, sb->pad3+1, sizeof(sb->pad3) - sizeof(sb->pad3[1]))) - /* Some padding is non-zero, might be a new feature */ - return -EINVAL; + memcmp(sb->pad3, sb->pad3+1, sizeof(sb->pad3) - sizeof(sb->pad3[1]))) { + pr_warn("Some padding is non-zero on %pg, might be a new feature\n", + rdev->bdev); + if (check_new_feature) + return -EINVAL; + pr_warn("check_new_feature is disabled, data corruption possible\n"); + } rdev->preferred_minor = 0xffff; rdev->data_offset = le64_to_cpu(sb->data_offset); @@ -1963,6 +1999,7 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *freshest, struc mddev->layout = le32_to_cpu(sb->layout); mddev->raid_disks = le32_to_cpu(sb->raid_disks); mddev->dev_sectors = le64_to_cpu(sb->size); + mddev->logical_block_size = le32_to_cpu(sb->logical_block_size); mddev->events = ev1; mddev->bitmap_info.offset = 0; mddev->bitmap_info.space = 0; @@ -2172,6 +2209,7 @@ static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev) sb->chunksize = cpu_to_le32(mddev->chunk_sectors); sb->level = cpu_to_le32(mddev->level); sb->layout = cpu_to_le32(mddev->layout); + sb->logical_block_size = cpu_to_le32(mddev->logical_block_size); if (test_bit(FailFast, &rdev->flags)) sb->devflags |= FailFast1; else @@ -5134,7 +5172,7 @@ static void stop_sync_thread(struct mddev *mddev, bool locked) * Thread might be blocked waiting for metadata update which will now * never happen */ - md_wakeup_thread_directly(mddev->sync_thread); + md_wakeup_thread_directly(&mddev->sync_thread); if (work_pending(&mddev->sync_work)) flush_work(&mddev->sync_work); @@ -5900,6 +5938,68 @@ static struct md_sysfs_entry md_serialize_policy = __ATTR(serialize_policy, S_IRUGO | S_IWUSR, serialize_policy_show, serialize_policy_store); +static int mddev_set_logical_block_size(struct mddev *mddev, + unsigned int lbs) +{ + int err = 0; + struct queue_limits lim; + + if (queue_logical_block_size(mddev->gendisk->queue) >= lbs) { + pr_err("%s: Cannot set LBS smaller than mddev LBS %u\n", + mdname(mddev), lbs); + return -EINVAL; + } + + lim = queue_limits_start_update(mddev->gendisk->queue); + lim.logical_block_size = lbs; + pr_info("%s: logical_block_size is changed, data may be lost\n", + mdname(mddev)); + err = queue_limits_commit_update(mddev->gendisk->queue, &lim); + if (err) + return err; + + mddev->logical_block_size = lbs; + /* New lbs will be written to superblock after array is running */ + set_bit(MD_SB_CHANGE_DEVS, &mddev->sb_flags); + return 0; +} + +static ssize_t +lbs_show(struct mddev *mddev, char *page) +{ + return sprintf(page, "%u\n", mddev->logical_block_size); +} + +static ssize_t +lbs_store(struct mddev *mddev, const char *buf, size_t len) +{ + unsigned int lbs; + int err = -EBUSY; + + /* Only 1.x meta supports configurable LBS */ + if (mddev->major_version == 0) + return -EINVAL; + + if (mddev->pers) + return -EBUSY; + + err = kstrtouint(buf, 10, &lbs); + if (err < 0) + return -EINVAL; + + err = mddev_lock(mddev); + if (err) + goto unlock; + + err = mddev_set_logical_block_size(mddev, lbs); + +unlock: + mddev_unlock(mddev); + return err ?: len; +} + +static struct md_sysfs_entry md_logical_block_size = +__ATTR(logical_block_size, 0644, lbs_show, lbs_store); static struct attribute *md_default_attrs[] = { &md_level.attr, @@ -5922,6 +6022,7 @@ static struct attribute *md_default_attrs[] = { &md_consistency_policy.attr, &md_fail_last_dev.attr, &md_serialize_policy.attr, + &md_logical_block_size.attr, NULL, }; @@ -6052,6 +6153,17 @@ int mddev_stack_rdev_limits(struct mddev *mddev, struct queue_limits *lim, return -EINVAL; } + /* + * Before RAID adding folio support, the logical_block_size + * should be smaller than the page size. + */ + if (lim->logical_block_size > PAGE_SIZE) { + pr_err("%s: logical_block_size must not larger than PAGE_SIZE\n", + mdname(mddev)); + return -EINVAL; + } + mddev->logical_block_size = lim->logical_block_size; + return 0; } EXPORT_SYMBOL_GPL(mddev_stack_rdev_limits); @@ -6064,6 +6176,13 @@ int mddev_stack_new_rdev(struct mddev *mddev, struct md_rdev *rdev) if (mddev_is_dm(mddev)) return 0; + if (queue_logical_block_size(rdev->bdev->bd_disk->queue) > + queue_logical_block_size(mddev->gendisk->queue)) { + pr_err("%s: incompatible logical_block_size, can not add\n", + mdname(mddev)); + return -EINVAL; + } + lim = queue_limits_start_update(mddev->gendisk->queue); queue_limits_stack_bdev(&lim, rdev->bdev, rdev->data_offset, mddev->gendisk->disk_name); @@ -6384,29 +6503,9 @@ int md_run(struct mddev *mddev) nowait = nowait && bdev_nowait(rdev->bdev); } - if (!bioset_initialized(&mddev->bio_set)) { - err = bioset_init(&mddev->bio_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS); - if (err) - return err; - } - if (!bioset_initialized(&mddev->sync_set)) { - err = bioset_init(&mddev->sync_set, BIO_POOL_SIZE, 0, BIOSET_NEED_BVECS); - if (err) - goto exit_bio_set; - } - - if (!bioset_initialized(&mddev->io_clone_set)) { - err = bioset_init(&mddev->io_clone_set, BIO_POOL_SIZE, - offsetof(struct md_io_clone, bio_clone), 0); - if (err) - goto exit_sync_set; - } - pers = get_pers(mddev->level, mddev->clevel); - if (!pers) { - err = -EINVAL; - goto abort; - } + if (!pers) + return -EINVAL; if (mddev->level != pers->head.id) { mddev->level = pers->head.id; mddev->new_level = pers->head.id; @@ -6417,8 +6516,7 @@ int md_run(struct mddev *mddev) pers->start_reshape == NULL) { /* This personality cannot handle reshaping... */ put_pers(pers); - err = -EINVAL; - goto abort; + return -EINVAL; } if (pers->sync_request) { @@ -6545,12 +6643,6 @@ bitmap_abort: mddev->private = NULL; put_pers(pers); md_bitmap_destroy(mddev); -abort: - bioset_exit(&mddev->io_clone_set); -exit_sync_set: - bioset_exit(&mddev->sync_set); -exit_bio_set: - bioset_exit(&mddev->bio_set); return err; } EXPORT_SYMBOL_GPL(md_run); @@ -6683,6 +6775,7 @@ static void md_clean(struct mddev *mddev) mddev->chunk_sectors = 0; mddev->ctime = mddev->utime = 0; mddev->layout = 0; + mddev->logical_block_size = 0; mddev->max_disks = 0; mddev->events = 0; mddev->can_decrease_events = 0; @@ -6775,10 +6868,6 @@ static void __md_stop(struct mddev *mddev) mddev->private = NULL; put_pers(pers); clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); - - bioset_exit(&mddev->bio_set); - bioset_exit(&mddev->sync_set); - bioset_exit(&mddev->io_clone_set); } void md_stop(struct mddev *mddev) @@ -6869,6 +6958,10 @@ static int do_md_stop(struct mddev *mddev, int mode) if (!md_is_rdwr(mddev)) set_disk_ro(disk, 0); + if (mode == 2 && mddev->pers->sync_request && + mddev->to_remove == NULL) + mddev->to_remove = &md_redundancy_group; + __md_stop_writes(mddev); __md_stop(mddev); @@ -8373,22 +8466,21 @@ static int md_thread(void *arg) return 0; } -static void md_wakeup_thread_directly(struct md_thread __rcu *thread) +static void md_wakeup_thread_directly(struct md_thread __rcu **thread) { struct md_thread *t; rcu_read_lock(); - t = rcu_dereference(thread); + t = rcu_dereference(*thread); if (t) wake_up_process(t->tsk); rcu_read_unlock(); } -void md_wakeup_thread(struct md_thread __rcu *thread) +void __md_wakeup_thread(struct md_thread __rcu *thread) { struct md_thread *t; - rcu_read_lock(); t = rcu_dereference(thread); if (t) { pr_debug("md: waking up MD thread %s.\n", t->tsk->comm); @@ -8396,9 +8488,8 @@ void md_wakeup_thread(struct md_thread __rcu *thread) if (wq_has_sleeper(&t->wqueue)) wake_up(&t->wqueue); } - rcu_read_unlock(); } -EXPORT_SYMBOL(md_wakeup_thread); +EXPORT_SYMBOL(__md_wakeup_thread); struct md_thread *md_register_thread(void (*run) (struct md_thread *), struct mddev *mddev, const char *name) @@ -9978,6 +10069,52 @@ static void unregister_sync_thread(struct mddev *mddev) md_reap_sync_thread(mddev); } +static bool md_should_do_recovery(struct mddev *mddev) +{ + /* + * As long as one of the following flags is set, + * recovery needs to do or cleanup. + */ + if (test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) || + test_bit(MD_RECOVERY_DONE, &mddev->recovery)) + return true; + + /* + * If no flags are set and it is in read-only status, + * there is nothing to do. + */ + if (!md_is_rdwr(mddev)) + return false; + + /* + * MD_SB_CHANGE_PENDING indicates that the array is switching from clean to + * active, and no action is needed for now. + * All other MD_SB_* flags require to update the superblock. + */ + if (mddev->sb_flags & ~ (1<<MD_SB_CHANGE_PENDING)) + return true; + + /* + * If the array is not using external metadata and there has been no data + * written for some time, then the array's status needs to be set to + * in_sync. + */ + if (mddev->external == 0 && mddev->safemode == 1) + return true; + + /* + * When the system is about to restart or the process receives an signal, + * the array needs to be synchronized as soon as possible. + * Once the data synchronization is completed, need to change the array + * status to in_sync. + */ + if (mddev->safemode == 2 && !mddev->in_sync && + mddev->resync_offset == MaxSector) + return true; + + return false; +} + /* * This routine is regularly called by all per-raid-array threads to * deal with generic issues like resync and super-block update. @@ -10014,18 +10151,7 @@ void md_check_recovery(struct mddev *mddev) flush_signals(current); } - if (!md_is_rdwr(mddev) && - !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) && - !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) - return; - if ( ! ( - (mddev->sb_flags & ~ (1<<MD_SB_CHANGE_PENDING)) || - test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) || - test_bit(MD_RECOVERY_DONE, &mddev->recovery) || - (mddev->external == 0 && mddev->safemode == 1) || - (mddev->safemode == 2 - && !mddev->in_sync && mddev->resync_offset == MaxSector) - )) + if (!md_should_do_recovery(mddev)) return; if (mddev_trylock(mddev)) { @@ -10697,6 +10823,7 @@ module_param(start_dirty_degraded, int, S_IRUGO|S_IWUSR); module_param_call(new_array, add_named_array, NULL, NULL, S_IWUSR); module_param(create_on_open, bool, S_IRUSR|S_IWUSR); module_param(legacy_async_del_gendisk, bool, 0600); +module_param(check_new_feature, bool, 0600); MODULE_LICENSE("GPL"); MODULE_DESCRIPTION("MD RAID framework"); diff --git a/drivers/md/md.h b/drivers/md/md.h index 1979c2d4fe89..6985f2829bbd 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -354,6 +354,7 @@ enum mddev_flags { MD_HAS_MULTIPLE_PPLS, MD_NOT_READY, MD_BROKEN, + MD_DO_DELETE, MD_DELETED, }; @@ -432,6 +433,7 @@ struct mddev { sector_t array_sectors; /* exported array size */ int external_size; /* size managed * externally */ + unsigned int logical_block_size; __u64 events; /* If the last 'event' was simply a clean->dirty transition, and * we didn't write it to the spares, then it is safe and simple @@ -882,6 +884,12 @@ struct md_io_clone { #define THREAD_WAKEUP 0 +#define md_wakeup_thread(thread) do { \ + rcu_read_lock(); \ + __md_wakeup_thread(thread); \ + rcu_read_unlock(); \ +} while (0) + static inline void safe_put_page(struct page *p) { if (p) put_page(p); @@ -895,7 +903,7 @@ extern struct md_thread *md_register_thread( struct mddev *mddev, const char *name); extern void md_unregister_thread(struct mddev *mddev, struct md_thread __rcu **threadp); -extern void md_wakeup_thread(struct md_thread __rcu *thread); +extern void __md_wakeup_thread(struct md_thread __rcu *thread); extern void md_check_recovery(struct mddev *mddev); extern void md_reap_sync_thread(struct mddev *mddev); extern enum sync_action md_sync_action(struct mddev *mddev); diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index e443e478645a..47aee1b1d4d1 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -68,7 +68,7 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) struct strip_zone *zone; int cnt; struct r0conf *conf = kzalloc(sizeof(*conf), GFP_KERNEL); - unsigned blksize = 512; + unsigned int blksize = queue_logical_block_size(mddev->gendisk->queue); *private_conf = ERR_PTR(-ENOMEM); if (!conf) @@ -84,9 +84,6 @@ static int create_strip_zones(struct mddev *mddev, struct r0conf **private_conf) sector_div(sectors, mddev->chunk_sectors); rdev1->sectors = sectors * mddev->chunk_sectors; - blksize = max(blksize, queue_logical_block_size( - rdev1->bdev->bd_disk->queue)); - rdev_for_each(rdev2, mddev) { pr_debug("md/raid0:%s: comparing %pg(%llu)" " with %pg(%llu)\n", @@ -383,6 +380,7 @@ static int raid0_set_limits(struct mddev *mddev) lim.max_hw_sectors = mddev->chunk_sectors; lim.max_write_zeroes_sectors = mddev->chunk_sectors; lim.max_hw_wzeroes_unmap_sectors = mddev->chunk_sectors; + lim.logical_block_size = mddev->logical_block_size; lim.io_min = mddev->chunk_sectors << 9; lim.io_opt = lim.io_min * mddev->raid_disks; lim.chunk_sectors = mddev->chunk_sectors; @@ -405,6 +403,12 @@ static int raid0_run(struct mddev *mddev) if (md_check_no_bitmap(mddev)) return -EINVAL; + if (!mddev_is_dm(mddev)) { + ret = raid0_set_limits(mddev); + if (ret) + return ret; + } + /* if private is not null, we are here after takeover */ if (mddev->private == NULL) { ret = create_strip_zones(mddev, &conf); @@ -413,11 +417,6 @@ static int raid0_run(struct mddev *mddev) mddev->private = conf; } conf = mddev->private; - if (!mddev_is_dm(mddev)) { - ret = raid0_set_limits(mddev); - if (ret) - return ret; - } /* calculate array device size */ md_set_array_sectors(mddev, raid0_size(mddev, 0, 0)); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 592a40233004..57d50465eed1 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -3213,6 +3213,7 @@ static int raid1_set_limits(struct mddev *mddev) md_init_stacking_limits(&lim); lim.max_write_zeroes_sectors = 0; lim.max_hw_wzeroes_unmap_sectors = 0; + lim.logical_block_size = mddev->logical_block_size; lim.features |= BLK_FEAT_ATOMIC_WRITES; err = mddev_stack_rdev_limits(mddev, &lim, MDDEV_STACK_INTEGRITY); if (err) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 14dcd5142eb4..84be4cc7e873 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -4000,6 +4000,7 @@ static int raid10_set_queue_limits(struct mddev *mddev) md_init_stacking_limits(&lim); lim.max_write_zeroes_sectors = 0; lim.max_hw_wzeroes_unmap_sectors = 0; + lim.logical_block_size = mddev->logical_block_size; lim.io_min = mddev->chunk_sectors << 9; lim.chunk_sectors = mddev->chunk_sectors; lim.io_opt = lim.io_min * raid10_nr_stripes(conf); diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index ba768ca7f422..e29e69335c69 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -3104,7 +3104,7 @@ int r5l_init_log(struct r5conf *conf, struct md_rdev *rdev) goto out_mempool; spin_lock_init(&log->tree_lock); - INIT_RADIX_TREE(&log->big_stripe_tree, GFP_NOWAIT | __GFP_NOWARN); + INIT_RADIX_TREE(&log->big_stripe_tree, GFP_NOWAIT); thread = md_register_thread(r5l_reclaim_thread, log->rdev->mddev, "reclaim"); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 24b32a0c95b4..cdbc7eba5c54 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -7745,6 +7745,7 @@ static int raid5_set_limits(struct mddev *mddev) stripe = roundup_pow_of_two(data_disks * (mddev->chunk_sectors << 9)); md_init_stacking_limits(&lim); + lim.logical_block_size = mddev->logical_block_size; lim.io_min = mddev->chunk_sectors << 9; lim.io_opt = lim.io_min * (conf->raid_disks - conf->max_degraded); lim.features |= BLK_FEAT_RAID_PARTIAL_STRIPES_EXPENSIVE; diff --git a/include/uapi/linux/raid/md_p.h b/include/uapi/linux/raid/md_p.h index ac74133a4768..310068bb2a1d 100644 --- a/include/uapi/linux/raid/md_p.h +++ b/include/uapi/linux/raid/md_p.h @@ -291,7 +291,8 @@ struct mdp_superblock_1 { __le64 resync_offset; /* data before this offset (from data_offset) known to be in sync */ __le32 sb_csum; /* checksum up to devs[max_dev] */ __le32 max_dev; /* size of devs[] array to consider */ - __u8 pad3[64-32]; /* set to 0 when writing */ + __le32 logical_block_size; /* same as q->limits->logical_block_size */ + __u8 pad3[64-36]; /* set to 0 when writing */ /* device state information. Indexed by dev_number. * 2 bytes per device |
