From 22c992e1a868478b9fe83701cdf6329103c2ac06 Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Tue, 1 Oct 2019 17:47:52 +0200 Subject: dm raid: change rs_set_dev_and_array_sectors API and callers Add a size argument to rs_set_dev_and_array_sectors as prerequisite to fixing grown device resynchronization not occuring when new MD bitmap pages have to be allocated as a result of the extension in a follwup patch. Also avoid code duplication by using rs_set_rdev_sectors in the aforementioned function. Signed-off-by: Heinz Mauelshagen Signed-off-by: Mike Snitzer --- drivers/md/dm-raid.c | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'drivers/md/dm-raid.c') diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index b0aa595e4375..89f805e851cf 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -1615,13 +1615,12 @@ static int _check_data_dev_sectors(struct raid_set *rs) } /* Calculate the sectors per device and per array used for @rs */ -static int rs_set_dev_and_array_sectors(struct raid_set *rs, bool use_mddev) +static int rs_set_dev_and_array_sectors(struct raid_set *rs, sector_t sectors, bool use_mddev) { int delta_disks; unsigned int data_stripes; + sector_t array_sectors = sectors, dev_sectors = sectors; struct mddev *mddev = &rs->md; - struct md_rdev *rdev; - sector_t array_sectors = rs->ti->len, dev_sectors = rs->ti->len; if (use_mddev) { delta_disks = mddev->delta_disks; @@ -1656,12 +1655,9 @@ static int rs_set_dev_and_array_sectors(struct raid_set *rs, bool use_mddev) /* Striped layouts */ array_sectors = (data_stripes + delta_disks) * dev_sectors; - rdev_for_each(rdev, mddev) - if (!test_bit(Journal, &rdev->flags)) - rdev->sectors = dev_sectors; - mddev->array_sectors = array_sectors; mddev->dev_sectors = dev_sectors; + rs_set_rdev_sectors(rs); return _check_data_dev_sectors(rs); bad: @@ -2911,7 +2907,7 @@ static int rs_setup_reshape(struct raid_set *rs) /* Remove disk(s) */ } else if (rs->delta_disks < 0) { - r = rs_set_dev_and_array_sectors(rs, true); + r = rs_set_dev_and_array_sectors(rs, rs->ti->len, true); mddev->reshape_backwards = 1; /* removing disk(s) -> backward reshape */ /* Change layout and/or chunk size */ @@ -3067,7 +3063,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) * * Any existing superblock will overwrite the array and device sizes */ - r = rs_set_dev_and_array_sectors(rs, false); + r = rs_set_dev_and_array_sectors(rs, rs->ti->len, false); if (r) goto bad; -- cgit v1.2.3 From 99273d9e6e19ce7bc393d294d60b8e0c0b971121 Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Tue, 1 Oct 2019 17:47:53 +0200 Subject: dm raid: to ensure resynchronization, perform raid set grow in preresume This fixes a flaw causing raid set extensions not to be synchronized in case the MD bitmap resize required additional pages to be allocated. Also share resize code in the raid constructor between new size changes and those occuring during recovery. Bump the target version to define the change and document it in Documentation/admin-guide/device-mapper/dm-raid.rst. Reported-by: Steve D Signed-off-by: Heinz Mauelshagen Signed-off-by: Mike Snitzer --- .../admin-guide/device-mapper/dm-raid.rst | 2 + drivers/md/dm-raid.c | 81 ++++++++++++++++------ 2 files changed, 62 insertions(+), 21 deletions(-) (limited to 'drivers/md/dm-raid.c') diff --git a/Documentation/admin-guide/device-mapper/dm-raid.rst b/Documentation/admin-guide/device-mapper/dm-raid.rst index 2fe255b130fb..f6344675e395 100644 --- a/Documentation/admin-guide/device-mapper/dm-raid.rst +++ b/Documentation/admin-guide/device-mapper/dm-raid.rst @@ -417,3 +417,5 @@ Version History deadlock/potential data corruption. Update superblock when specific devices are requested via rebuild. Fix RAID leg rebuild errors. + 1.15.0 Fix size extensions not being synchronized in case of new MD bitmap + pages allocated; also fix those not occuring after previous reductions diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 89f805e851cf..5c84215d9b62 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -209,6 +209,7 @@ struct raid_dev { #define RT_FLAG_RS_SUSPENDED 5 #define RT_FLAG_RS_IN_SYNC 6 #define RT_FLAG_RS_RESYNCING 7 +#define RT_FLAG_RS_GROW 8 /* Array elements of 64 bit needed for rebuild/failed disk bits */ #define DISKS_ARRAY_ELEMS ((MAX_RAID_DEVICES + (sizeof(uint64_t) * 8 - 1)) / sizeof(uint64_t) / 8) @@ -241,6 +242,9 @@ struct raid_set { struct raid_type *raid_type; struct dm_target_callbacks callbacks; + sector_t array_sectors; + sector_t dev_sectors; + /* Optional raid4/5/6 journal device */ struct journal_dev { struct dm_dev *dev; @@ -3004,7 +3008,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) bool resize = false; struct raid_type *rt; unsigned int num_raid_params, num_raid_devs; - sector_t calculated_dev_sectors, rdev_sectors, reshape_sectors; + sector_t sb_array_sectors, rdev_sectors, reshape_sectors; struct raid_set *rs = NULL; const char *arg; struct rs_layout rs_layout; @@ -3067,7 +3071,9 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (r) goto bad; - calculated_dev_sectors = rs->md.dev_sectors; + /* Memorize just calculated, potentially larger sizes to grow the raid set in preresume */ + rs->array_sectors = rs->md.array_sectors; + rs->dev_sectors = rs->md.dev_sectors; /* * Backup any new raid set level, layout, ... @@ -3080,6 +3086,8 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) if (r) goto bad; + /* All in-core metadata now as of current superblocks after calling analyse_superblocks() */ + sb_array_sectors = rs->md.array_sectors; rdev_sectors = __rdev_sectors(rs); if (!rdev_sectors) { ti->error = "Invalid rdev size"; @@ -3089,8 +3097,11 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) reshape_sectors = _get_reshape_sectors(rs); - if (calculated_dev_sectors != rdev_sectors) - resize = calculated_dev_sectors != (reshape_sectors ? rdev_sectors - reshape_sectors : rdev_sectors); + if (rs->dev_sectors != rdev_sectors) { + resize = (rs->dev_sectors != rdev_sectors - reshape_sectors); + if (rs->dev_sectors > rdev_sectors - reshape_sectors) + set_bit(RT_FLAG_RS_GROW, &rs->runtime_flags); + } INIT_WORK(&rs->md.event_work, do_table_event); ti->private = rs; @@ -3117,13 +3128,8 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags); rs_set_new(rs); } else if (rs_is_recovering(rs)) { - /* Rebuild particular devices */ - if (test_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags)) { - set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags); - rs_setup_recovery(rs, MaxSector); - } /* A recovering raid set may be resized */ - ; /* skip setup rs */ + goto size_check; } else if (rs_is_reshaping(rs)) { /* Have to reject size change request during reshape */ if (resize) { @@ -3167,6 +3173,9 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) rs_setup_recovery(rs, MaxSector); rs_set_new(rs); } else if (rs_reshape_requested(rs)) { + /* Only on size extensions, not on reshapes. */ + clear_bit(RT_FLAG_RS_GROW, &rs->runtime_flags); + /* * No need to check for 'ongoing' takeover here, because takeover * is an instant operation as oposed to an ongoing reshape. @@ -3197,13 +3206,30 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) } rs_set_cur(rs); } else { +size_check: /* May not set recovery when a device rebuild is requested */ if (test_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags)) { - rs_setup_recovery(rs, MaxSector); + clear_bit(RT_FLAG_RS_GROW, &rs->runtime_flags); set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags); - } else - rs_setup_recovery(rs, test_bit(__CTR_FLAG_SYNC, &rs->ctr_flags) ? - 0 : (resize ? calculated_dev_sectors : MaxSector)); + } else if (test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags)) { + /* + * Set raid set to current size, i.e. non-grown size + * as of superblocks to grow to new size in preresume. + */ + r = rs_set_dev_and_array_sectors(rs, sb_array_sectors, false); + if (r) + goto bad; + + rs_setup_recovery(rs, rs->md.recovery_cp < rs->md.dev_sectors ? rs->md.recovery_cp : rs->md.dev_sectors); + } else { + /* This is no size change or it is shrinking, update size and record in superblocks */ + r = rs_set_dev_and_array_sectors(rs, rs->ti->len, false); + if (r) + goto bad; + + if (sb_array_sectors > rs->array_sectors) + set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags); + } rs_set_cur(rs); } @@ -3951,11 +3977,22 @@ static int raid_preresume(struct dm_target *ti) if (r) return r; - /* Resize bitmap to adjust to changed region size (aka MD bitmap chunksize) */ - if (test_bit(RT_FLAG_RS_BITMAP_LOADED, &rs->runtime_flags) && mddev->bitmap && - mddev->bitmap_info.chunksize != to_bytes(rs->requested_bitmap_chunk_sectors)) { - r = md_bitmap_resize(mddev->bitmap, mddev->dev_sectors, - to_bytes(rs->requested_bitmap_chunk_sectors), 0); + /* We are extending the raid set size, adjust mddev/md_rdev sizes and set capacity. */ + if (test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags)) { + mddev->array_sectors = rs->array_sectors; + mddev->dev_sectors = rs->dev_sectors; + rs_set_rdev_sectors(rs); + rs_set_capacity(rs); + } + + /* Resize bitmap to adjust to changed region size (aka MD bitmap chunksize) or grown device size */ + if (test_bit(RT_FLAG_RS_BITMAP_LOADED, &rs->runtime_flags) && mddev->bitmap && + (test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags) || + (rs->requested_bitmap_chunk_sectors && + mddev->bitmap_info.chunksize != to_bytes(rs->requested_bitmap_chunk_sectors)))) { + int chunksize = to_bytes(rs->requested_bitmap_chunk_sectors) ?: mddev->bitmap_info.chunksize; + + r = md_bitmap_resize(mddev->bitmap, mddev->dev_sectors, chunksize, 0); if (r) DMERR("Failed to resize bitmap"); } @@ -3964,8 +4001,10 @@ static int raid_preresume(struct dm_target *ti) /* Be prepared for mddev_resume() in raid_resume() */ set_bit(MD_RECOVERY_FROZEN, &mddev->recovery); if (mddev->recovery_cp && mddev->recovery_cp < MaxSector) { - set_bit(MD_RECOVERY_SYNC, &mddev->recovery); + set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); mddev->resync_min = mddev->recovery_cp; + if (test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags)) + mddev->resync_max_sectors = mddev->dev_sectors; } /* Check for any reshape request unless new raid set */ @@ -4013,7 +4052,7 @@ static void raid_resume(struct dm_target *ti) static struct target_type raid_target = { .name = "raid", - .version = {1, 14, 0}, + .version = {1, 15, 0}, .module = THIS_MODULE, .ctr = raid_ctr, .dtr = raid_dtr, -- cgit v1.2.3 From f9f3ee9130eb588c75e4a145837d4e6214947c40 Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Tue, 1 Oct 2019 17:47:54 +0200 Subject: dm raid: simplify rs_setup_recovery call chain rs_setup_recovery() sets the starting recovery offset. Drop superfluous rs_setup_recovery() and replace with __rs_setup_recovery(). Signed-off-by: Heinz Mauelshagen Signed-off-by: Mike Snitzer --- drivers/md/dm-raid.c | 27 ++++++--------------------- 1 file changed, 6 insertions(+), 21 deletions(-) (limited to 'drivers/md/dm-raid.c') diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 5c84215d9b62..3b99ef79dbe3 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -1670,7 +1670,7 @@ bad: } /* Setup recovery on @rs */ -static void __rs_setup_recovery(struct raid_set *rs, sector_t dev_sectors) +static void rs_setup_recovery(struct raid_set *rs, sector_t dev_sectors) { /* raid0 does not recover */ if (rs_is_raid0(rs)) @@ -1691,22 +1691,6 @@ static void __rs_setup_recovery(struct raid_set *rs, sector_t dev_sectors) ? MaxSector : dev_sectors; } -/* Setup recovery on @rs based on raid type, device size and 'nosync' flag */ -static void rs_setup_recovery(struct raid_set *rs, sector_t dev_sectors) -{ - if (!dev_sectors) - /* New raid set or 'sync' flag provided */ - __rs_setup_recovery(rs, 0); - else if (dev_sectors == MaxSector) - /* Prevent recovery */ - __rs_setup_recovery(rs, MaxSector); - else if (__rdev_sectors(rs) < dev_sectors) - /* Grown raid set */ - __rs_setup_recovery(rs, __rdev_sectors(rs)); - else - __rs_setup_recovery(rs, MaxSector); -} - static void do_table_event(struct work_struct *ws) { struct raid_set *rs = container_of(ws, struct raid_set, md.event_work); @@ -2474,7 +2458,7 @@ static int super_validate(struct raid_set *rs, struct md_rdev *rdev) return -EINVAL; } - /* Enable bitmap creation for RAID levels != 0 */ + /* Enable bitmap creation on @rs unless no metadevs or raid0 or journaled raid4/5/6 set. */ mddev->bitmap_info.offset = (rt_is_raid0(rs->raid_type) || rs->journal_dev.dev) ? 0 : to_sector(4096); mddev->bitmap_info.default_offset = mddev->bitmap_info.offset; @@ -3173,7 +3157,7 @@ static int raid_ctr(struct dm_target *ti, unsigned int argc, char **argv) rs_setup_recovery(rs, MaxSector); rs_set_new(rs); } else if (rs_reshape_requested(rs)) { - /* Only on size extensions, not on reshapes. */ + /* Only request grow on raid set size extensions, not on reshapes. */ clear_bit(RT_FLAG_RS_GROW, &rs->runtime_flags); /* @@ -3211,10 +3195,11 @@ size_check: if (test_bit(__CTR_FLAG_REBUILD, &rs->ctr_flags)) { clear_bit(RT_FLAG_RS_GROW, &rs->runtime_flags); set_bit(RT_FLAG_UPDATE_SBS, &rs->runtime_flags); + rs_setup_recovery(rs, MaxSector); } else if (test_bit(RT_FLAG_RS_GROW, &rs->runtime_flags)) { /* - * Set raid set to current size, i.e. non-grown size - * as of superblocks to grow to new size in preresume. + * Set raid set to current size, i.e. size as of + * superblocks to grow to larger size in preresume. */ r = rs_set_dev_and_array_sectors(rs, sb_array_sectors, false); if (r) -- cgit v1.2.3 From 53be73a5d75f477e52c9275ed7aa9307a8b73e5c Mon Sep 17 00:00:00 2001 From: Heinz Mauelshagen Date: Tue, 1 Oct 2019 17:47:55 +0200 Subject: dm raid: streamline rs_get_progress() and its raid_status() caller side Pass already deciphered state into rs_get_progress, simplify recovery offset definition and combine two st_resync, st_reshape conditionals into one as is already the case with st_check and st_repair. Signed-off-by: Heinz Mauelshagen Signed-off-by: Mike Snitzer --- drivers/md/dm-raid.c | 47 ++++++++++++++++++++--------------------------- 1 file changed, 20 insertions(+), 27 deletions(-) (limited to 'drivers/md/dm-raid.c') diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 3b99ef79dbe3..13fa90546a0f 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -3413,10 +3413,9 @@ static const char *__raid_dev_status(struct raid_set *rs, struct md_rdev *rdev) /* Helper to return resync/reshape progress for @rs and runtime flags for raid set in sync / resynching */ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery, - sector_t resync_max_sectors) + enum sync_state state, sector_t resync_max_sectors) { sector_t r; - enum sync_state state; struct mddev *mddev = &rs->md; clear_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags); @@ -3427,8 +3426,6 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery, set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags); } else { - state = decipher_sync_action(mddev, recovery); - if (state == st_idle && !test_bit(MD_RECOVERY_INTR, &recovery)) r = mddev->recovery_cp; else @@ -3446,18 +3443,14 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery, /* * In case we are recovering, the array is not in sync * and health chars should show the recovering legs. + * + * Already retrieved recovery offset from curr_resync_completed above. */ ; - else if (state == st_resync) - /* - * If "resync" is occurring, the raid set - * is or may be out of sync hence the health - * characters shall be 'a'. - */ - set_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags); - else if (state == st_reshape) + + else if (state == st_resync || state == st_reshape) /* - * If "reshape" is occurring, the raid set + * If "resync/reshape" is occurring, the raid set * is or may be out of sync hence the health * characters shall be 'a'. */ @@ -3471,22 +3464,22 @@ static sector_t rs_get_progress(struct raid_set *rs, unsigned long recovery, */ set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags); - else { - struct md_rdev *rdev; - + else if (test_bit(MD_RECOVERY_NEEDED, &recovery)) /* * We are idle and recovery is needed, prevent 'A' chars race * caused by components still set to in-sync by constructor. */ - if (test_bit(MD_RECOVERY_NEEDED, &recovery)) - set_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags); + set_bit(RT_FLAG_RS_RESYNCING, &rs->runtime_flags); + else { /* - * The raid set may be doing an initial sync, or it may - * be rebuilding individual components. If all the - * devices are In_sync, then it is the raid set that is - * being initialized. + * We are idle and the raid set may be doing an initial + * sync, or it may be rebuilding individual components. + * If all the devices are In_sync, then it is the raid set + * that is being initialized. */ + struct md_rdev *rdev; + set_bit(RT_FLAG_RS_IN_SYNC, &rs->runtime_flags); rdev_for_each(rdev, mddev) if (!test_bit(Journal, &rdev->flags) && @@ -3519,7 +3512,7 @@ static void raid_status(struct dm_target *ti, status_type_t type, unsigned int rebuild_disks; unsigned int write_mostly_params = 0; sector_t progress, resync_max_sectors, resync_mismatches; - const char *sync_action; + enum sync_state state; struct raid_type *rt; switch (type) { @@ -3533,14 +3526,14 @@ static void raid_status(struct dm_target *ti, status_type_t type, /* Access most recent mddev properties for status output */ smp_rmb(); - recovery = rs->md.recovery; /* Get sensible max sectors even if raid set not yet started */ resync_max_sectors = test_bit(RT_FLAG_RS_PRERESUMED, &rs->runtime_flags) ? mddev->resync_max_sectors : mddev->dev_sectors; - progress = rs_get_progress(rs, recovery, resync_max_sectors); + recovery = rs->md.recovery; + state = decipher_sync_action(mddev, recovery); + progress = rs_get_progress(rs, recovery, state, resync_max_sectors); resync_mismatches = (mddev->last_sync_action && !strcasecmp(mddev->last_sync_action, "check")) ? atomic64_read(&mddev->resync_mismatches) : 0; - sync_action = sync_str(decipher_sync_action(&rs->md, recovery)); /* HM FIXME: do we want another state char for raid0? It shows 'D'/'A'/'-' now */ for (i = 0; i < rs->raid_disks; i++) @@ -3568,7 +3561,7 @@ static void raid_status(struct dm_target *ti, status_type_t type, * See Documentation/admin-guide/device-mapper/dm-raid.rst for * information on each of these states. */ - DMEMIT(" %s", sync_action); + DMEMIT(" %s", sync_str(state)); /* * v1.5.0+: -- cgit v1.2.3 From 35ad035b8398c634d888324a299ec50a84cbc6cc Mon Sep 17 00:00:00 2001 From: Nathan Chancellor Date: Thu, 24 Oct 2019 13:28:03 -0700 Subject: dm raid: Remove unnecessary negation of a shift in raid10_format_to_md_layout When building with Clang + -Wtautological-constant-compare: drivers/md/dm-raid.c:619:8: warning: converting the result of '<<' to a boolean always evaluates to true [-Wtautological-constant-compare] r = !RAID10_OFFSET; ^ drivers/md/dm-raid.c:517:28: note: expanded from macro 'RAID10_OFFSET' #define RAID10_OFFSET (1 << 16) /* stripes with data copies area adjacent on devices */ ^ 1 warning generated. Negating a non-zero number will always make it zero, which is the default value of r in this function so this statement is unnecessary; remove it so that clang no longer warns. Link: https://github.com/ClangBuiltLinux/linux/issues/753 Signed-off-by: Nathan Chancellor Acked-by: Heinz Mauelshagen Signed-off-by: Mike Snitzer --- drivers/md/dm-raid.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/md/dm-raid.c') diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 13fa90546a0f..c412eaa975fc 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -620,7 +620,6 @@ static int raid10_format_to_md_layout(struct raid_set *rs, } else if (algorithm == ALGORITHM_RAID10_FAR) { f = copies; - r = !RAID10_OFFSET; if (!test_bit(__CTR_FLAG_RAID10_USE_NEAR_SETS, &rs->ctr_flags)) r |= RAID10_USE_FAR_SETS; -- cgit v1.2.3