From 2c810cddc44d6f95cef75df3f07fc0850ff92417 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 21 May 2012 09:27:00 +1000 Subject: md: allow a reshape operation to be reversed. Currently a reshape operation always progresses from the start of the array to the end unless the number of devices is being reduced, in which case it progressed in the opposite direction. To reverse a partial reshape which changes the number of devices you can stop the array and re-assemble with the raid-disks numbers reversed and it will undo. However for a reshape that does not change the number of devices it is not possible to reverse the reshape in the middle - you have to wait until it completes. So add a 'reshape_direction' attribute with is either 'forwards' or 'backwards' and can be explicitly set when delta_disks is zero. This will become more important when we allow the data_offset to change in a reshape. Then the explicit statement of what direction is being used will be more useful. This can be enabled in raid5 trivially as it already supports reverse reshape and just needs to use a different trigger to request it. Signed-off-by: NeilBrown --- drivers/md/md.h | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/md/md.h') diff --git a/drivers/md/md.h b/drivers/md/md.h index 1c2063ccf48e..d51c0ca37777 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -262,6 +262,7 @@ struct mddev { sector_t reshape_position; int delta_disks, new_level, new_layout; int new_chunk_sectors; + int reshape_backwards; atomic_t plug_cnt; /* If device is expecting * more bios soon. -- cgit v1.2.3 From c6563a8c38fde3c1c7fc925a10bde3ca20799301 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 21 May 2012 09:27:00 +1000 Subject: md: add possibility to change data-offset for devices. When reshaping we can avoid costly intermediate backup by changing the 'start' address of the array on the device (if there is enough room). So as a first step, allow such a change to be requested through sysfs, and recorded in v1.x metadata. (As we didn't previous check that all 'pad' fields were zero, we need a new FEATURE flag for this. A (belatedly) check that all remaining 'pad' fields are zero to avoid a repeat of this) The new data offset must be requested separately for each device. This allows each to have a different change in the data offset. This is not likely to be used often but as data_offset can be set per-device, new_data_offset should be too. This patch also removes the 'acknowledged' arg to rdev_set_badblocks as it is never used and never will be. At the same time we add a new arg ('in_new') which is currently always zero but will be used more soon. When a reshape finishes we will need to update the data_offset and rdev->sectors. So provide an exported function to do that. Signed-off-by: NeilBrown --- drivers/md/md.c | 217 +++++++++++++++++++++++++++++++++++++++++----- drivers/md/md.h | 7 +- drivers/md/raid1.c | 4 +- drivers/md/raid10.c | 8 +- drivers/md/raid5.c | 10 ++- include/linux/raid/md_p.h | 10 ++- 6 files changed, 222 insertions(+), 34 deletions(-) (limited to 'drivers/md/md.h') diff --git a/drivers/md/md.c b/drivers/md/md.c index 44bb1d52dd4c..9fa98fc74b05 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1035,12 +1035,17 @@ static unsigned int calc_sb_csum(mdp_super_t * sb) struct super_type { char *name; struct module *owner; - int (*load_super)(struct md_rdev *rdev, struct md_rdev *refdev, + int (*load_super)(struct md_rdev *rdev, + struct md_rdev *refdev, int minor_version); - int (*validate_super)(struct mddev *mddev, struct md_rdev *rdev); - void (*sync_super)(struct mddev *mddev, struct md_rdev *rdev); + int (*validate_super)(struct mddev *mddev, + struct md_rdev *rdev); + void (*sync_super)(struct mddev *mddev, + struct md_rdev *rdev); unsigned long long (*rdev_size_change)(struct md_rdev *rdev, sector_t num_sectors); + int (*allow_new_offset)(struct md_rdev *rdev, + unsigned long long new_offset); }; /* @@ -1112,6 +1117,7 @@ static int super_90_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor rdev->preferred_minor = sb->md_minor; rdev->data_offset = 0; + rdev->new_data_offset = 0; rdev->sb_size = MD_SB_BYTES; rdev->badblocks.shift = -1; @@ -1438,6 +1444,12 @@ super_90_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors) return num_sectors; } +static int +super_90_allow_new_offset(struct md_rdev *rdev, unsigned long long new_offset) +{ + /* non-zero offset changes not possible with v0.90 */ + return new_offset == 0; +} /* * version 1 superblock @@ -1473,6 +1485,7 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_ struct mdp_superblock_1 *sb; int ret; sector_t sb_start; + sector_t sectors; char b[BDEVNAME_SIZE], b2[BDEVNAME_SIZE]; int bmask; @@ -1527,9 +1540,18 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_ bdevname(rdev->bdev,b)); return -EINVAL; } + if (sb->pad0 || + sb->pad3[0] || + memcmp(sb->pad3, sb->pad3+1, sizeof(sb->pad3) - sizeof(sb->pad3[1]))) + /* Some padding is non-zero, might be a new feature */ + return -EINVAL; rdev->preferred_minor = 0xffff; rdev->data_offset = le64_to_cpu(sb->data_offset); + rdev->new_data_offset = rdev->data_offset; + if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE) && + (le32_to_cpu(sb->feature_map) & MD_FEATURE_NEW_OFFSET)) + rdev->new_data_offset += (s32)le32_to_cpu(sb->new_offset); atomic_set(&rdev->corrected_errors, le32_to_cpu(sb->cnt_corrected_read)); rdev->sb_size = le32_to_cpu(sb->max_dev) * 2 + 256; @@ -1540,6 +1562,9 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_ if (minor_version && rdev->data_offset < sb_start + (rdev->sb_size/512)) return -EINVAL; + if (minor_version + && rdev->new_data_offset < sb_start + (rdev->sb_size/512)) + return -EINVAL; if (sb->level == cpu_to_le32(LEVEL_MULTIPATH)) rdev->desc_nr = -1; @@ -1611,16 +1636,14 @@ static int super_1_load(struct md_rdev *rdev, struct md_rdev *refdev, int minor_ else ret = 0; } - if (minor_version) - rdev->sectors = (i_size_read(rdev->bdev->bd_inode) >> 9) - - le64_to_cpu(sb->data_offset); - else - rdev->sectors = rdev->sb_start; - if (rdev->sectors < le64_to_cpu(sb->data_size)) + if (minor_version) { + sectors = (i_size_read(rdev->bdev->bd_inode) >> 9); + sectors -= rdev->data_offset; + } else + sectors = rdev->sb_start; + if (sectors < le64_to_cpu(sb->data_size)) return -EINVAL; rdev->sectors = le64_to_cpu(sb->data_size); - if (le64_to_cpu(sb->size) > rdev->sectors) - return -EINVAL; return ret; } @@ -1745,7 +1768,6 @@ static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev) sb->feature_map = 0; sb->pad0 = 0; sb->recovery_offset = cpu_to_le64(0); - memset(sb->pad1, 0, sizeof(sb->pad1)); memset(sb->pad3, 0, sizeof(sb->pad3)); sb->utime = cpu_to_le64((__u64)mddev->utime); @@ -1767,6 +1789,8 @@ static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev) sb->devflags |= WriteMostly1; else sb->devflags &= ~WriteMostly1; + sb->data_offset = cpu_to_le64(rdev->data_offset); + sb->data_size = cpu_to_le64(rdev->sectors); if (mddev->bitmap && mddev->bitmap_info.file == NULL) { sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_info.offset); @@ -1795,6 +1819,12 @@ static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev) mddev->reshape_backwards) sb->feature_map |= cpu_to_le32(MD_FEATURE_RESHAPE_BACKWARDS); + if (rdev->new_data_offset != rdev->data_offset) { + sb->feature_map + |= cpu_to_le32(MD_FEATURE_NEW_OFFSET); + sb->new_offset = cpu_to_le32((__u32)(rdev->new_data_offset + - rdev->data_offset)); + } } if (rdev->badblocks.count == 0) @@ -1871,6 +1901,8 @@ super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors) sector_t max_sectors; if (num_sectors && num_sectors < rdev->mddev->dev_sectors) return 0; /* component must fit device */ + if (rdev->data_offset != rdev->new_data_offset) + return 0; /* too confusing */ if (rdev->sb_start < rdev->data_offset) { /* minor versions 1 and 2; superblock before data */ max_sectors = i_size_read(rdev->bdev->bd_inode) >> 9; @@ -1898,6 +1930,40 @@ super_1_rdev_size_change(struct md_rdev *rdev, sector_t num_sectors) rdev->sb_page); md_super_wait(rdev->mddev); return num_sectors; + +} + +static int +super_1_allow_new_offset(struct md_rdev *rdev, + unsigned long long new_offset) +{ + /* All necessary checks on new >= old have been done */ + struct bitmap *bitmap; + if (new_offset >= rdev->data_offset) + return 1; + + /* with 1.0 metadata, there is no metadata to tread on + * so we can always move back */ + if (rdev->mddev->minor_version == 0) + return 1; + + /* otherwise we must be sure not to step on + * any metadata, so stay: + * 36K beyond start of superblock + * beyond end of badblocks + * beyond write-intent bitmap + */ + if (rdev->sb_start + (32+4)*2 > new_offset) + return 0; + bitmap = rdev->mddev->bitmap; + if (bitmap && !rdev->mddev->bitmap_info.file && + rdev->sb_start + rdev->mddev->bitmap_info.offset + + bitmap->file_pages * (PAGE_SIZE>>9) > new_offset) + return 0; + if (rdev->badblocks.sector + rdev->badblocks.size > new_offset) + return 0; + + return 1; } static struct super_type super_types[] = { @@ -1908,6 +1974,7 @@ static struct super_type super_types[] = { .validate_super = super_90_validate, .sync_super = super_90_sync, .rdev_size_change = super_90_rdev_size_change, + .allow_new_offset = super_90_allow_new_offset, }, [1] = { .name = "md-1", @@ -1916,6 +1983,7 @@ static struct super_type super_types[] = { .validate_super = super_1_validate, .sync_super = super_1_sync, .rdev_size_change = super_1_rdev_size_change, + .allow_new_offset = super_1_allow_new_offset, }, }; @@ -2823,9 +2891,8 @@ offset_show(struct md_rdev *rdev, char *page) static ssize_t offset_store(struct md_rdev *rdev, const char *buf, size_t len) { - char *e; - unsigned long long offset = simple_strtoull(buf, &e, 10); - if (e==buf || (*e && *e != '\n')) + unsigned long long offset; + if (strict_strtoull(buf, 10, &offset) < 0) return -EINVAL; if (rdev->mddev->pers && rdev->raid_disk >= 0) return -EBUSY; @@ -2840,6 +2907,63 @@ offset_store(struct md_rdev *rdev, const char *buf, size_t len) static struct rdev_sysfs_entry rdev_offset = __ATTR(offset, S_IRUGO|S_IWUSR, offset_show, offset_store); +static ssize_t new_offset_show(struct md_rdev *rdev, char *page) +{ + return sprintf(page, "%llu\n", + (unsigned long long)rdev->new_data_offset); +} + +static ssize_t new_offset_store(struct md_rdev *rdev, + const char *buf, size_t len) +{ + unsigned long long new_offset; + struct mddev *mddev = rdev->mddev; + + if (strict_strtoull(buf, 10, &new_offset) < 0) + return -EINVAL; + + if (mddev->sync_thread) + return -EBUSY; + if (new_offset == rdev->data_offset) + /* reset is always permitted */ + ; + else if (new_offset > rdev->data_offset) { + /* must not push array size beyond rdev_sectors */ + if (new_offset - rdev->data_offset + + mddev->dev_sectors > rdev->sectors) + return -E2BIG; + } + /* Metadata worries about other space details. */ + + /* decreasing the offset is inconsistent with a backwards + * reshape. + */ + if (new_offset < rdev->data_offset && + mddev->reshape_backwards) + return -EINVAL; + /* Increasing offset is inconsistent with forwards + * reshape. reshape_direction should be set to + * 'backwards' first. + */ + if (new_offset > rdev->data_offset && + !mddev->reshape_backwards) + return -EINVAL; + + if (mddev->pers && mddev->persistent && + !super_types[mddev->major_version] + .allow_new_offset(rdev, new_offset)) + return -E2BIG; + rdev->new_data_offset = new_offset; + if (new_offset > rdev->data_offset) + mddev->reshape_backwards = 1; + else if (new_offset < rdev->data_offset) + mddev->reshape_backwards = 0; + + return len; +} +static struct rdev_sysfs_entry rdev_new_offset = +__ATTR(new_offset, S_IRUGO|S_IWUSR, new_offset_show, new_offset_store); + static ssize_t rdev_size_show(struct md_rdev *rdev, char *page) { @@ -2884,6 +3008,8 @@ rdev_size_store(struct md_rdev *rdev, const char *buf, size_t len) if (strict_blocks_to_sectors(buf, §ors) < 0) return -EINVAL; + if (rdev->data_offset != rdev->new_data_offset) + return -EINVAL; /* too confusing */ if (my_mddev->pers && rdev->raid_disk >= 0) { if (my_mddev->persistent) { sectors = super_types[my_mddev->major_version]. @@ -3020,6 +3146,7 @@ static struct attribute *rdev_default_attrs[] = { &rdev_errors.attr, &rdev_slot.attr, &rdev_offset.attr, + &rdev_new_offset.attr, &rdev_size.attr, &rdev_recovery_start.attr, &rdev_bad_blocks.attr, @@ -3094,6 +3221,7 @@ int md_rdev_init(struct md_rdev *rdev) rdev->raid_disk = -1; rdev->flags = 0; rdev->data_offset = 0; + rdev->new_data_offset = 0; rdev->sb_events = 0; rdev->last_read_error.tv_sec = 0; rdev->last_read_error.tv_nsec = 0; @@ -3598,7 +3726,17 @@ raid_disks_store(struct mddev *mddev, const char *buf, size_t len) if (mddev->pers) rv = update_raid_disks(mddev, n); else if (mddev->reshape_position != MaxSector) { + struct md_rdev *rdev; int olddisks = mddev->raid_disks - mddev->delta_disks; + + rdev_for_each(rdev, mddev) { + if (olddisks < n && + rdev->data_offset < rdev->new_data_offset) + return -EINVAL; + if (olddisks > n && + rdev->data_offset > rdev->new_data_offset) + return -EINVAL; + } mddev->delta_disks = n - olddisks; mddev->raid_disks = n; mddev->reshape_backwards = (mddev->delta_disks < 0); @@ -4445,6 +4583,7 @@ reshape_position_show(struct mddev *mddev, char *page) static ssize_t reshape_position_store(struct mddev *mddev, const char *buf, size_t len) { + struct md_rdev *rdev; char *e; unsigned long long new = simple_strtoull(buf, &e, 10); if (mddev->pers) @@ -4457,6 +4596,8 @@ reshape_position_store(struct mddev *mddev, const char *buf, size_t len) mddev->new_level = mddev->level; mddev->new_layout = mddev->layout; mddev->new_chunk_sectors = mddev->chunk_sectors; + rdev_for_each(rdev, mddev) + rdev->new_data_offset = rdev->data_offset; return len; } @@ -6001,6 +6142,7 @@ static int update_size(struct mddev *mddev, sector_t num_sectors) static int update_raid_disks(struct mddev *mddev, int raid_disks) { int rv; + struct md_rdev *rdev; /* change the number of raid disks */ if (mddev->pers->check_reshape == NULL) return -EINVAL; @@ -6009,6 +6151,16 @@ static int update_raid_disks(struct mddev *mddev, int raid_disks) return -EINVAL; if (mddev->sync_thread || mddev->reshape_position != MaxSector) return -EBUSY; + + rdev_for_each(rdev, mddev) { + if (mddev->raid_disks < raid_disks && + rdev->data_offset < rdev->new_data_offset) + return -EINVAL; + if (mddev->raid_disks > raid_disks && + rdev->data_offset > rdev->new_data_offset) + return -EINVAL; + } + mddev->delta_disks = raid_disks - mddev->raid_disks; if (mddev->delta_disks < 0) mddev->reshape_backwards = 1; @@ -7709,6 +7861,20 @@ void md_wait_for_blocked_rdev(struct md_rdev *rdev, struct mddev *mddev) } EXPORT_SYMBOL(md_wait_for_blocked_rdev); +void md_finish_reshape(struct mddev *mddev) +{ + /* called be personality module when reshape completes. */ + struct md_rdev *rdev; + + rdev_for_each(rdev, mddev) { + if (rdev->data_offset > rdev->new_data_offset) + rdev->sectors += rdev->data_offset - rdev->new_data_offset; + else + rdev->sectors -= rdev->new_data_offset - rdev->data_offset; + rdev->data_offset = rdev->new_data_offset; + } +} +EXPORT_SYMBOL(md_finish_reshape); /* Bad block management. * We can record which blocks on each device are 'bad' and so just @@ -7957,10 +8123,15 @@ static int md_set_badblocks(struct badblocks *bb, sector_t s, int sectors, } int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors, - int acknowledged) + int is_new) { - int rv = md_set_badblocks(&rdev->badblocks, - s + rdev->data_offset, sectors, acknowledged); + int rv; + if (is_new) + s += rdev->new_data_offset; + else + s += rdev->data_offset; + rv = md_set_badblocks(&rdev->badblocks, + s, sectors, 0); if (rv) { /* Make sure they get written out promptly */ sysfs_notify_dirent_safe(rdev->sysfs_state); @@ -8066,11 +8237,15 @@ out: return rv; } -int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors) +int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors, + int is_new) { + if (is_new) + s += rdev->new_data_offset; + else + s += rdev->data_offset; return md_clear_badblocks(&rdev->badblocks, - s + rdev->data_offset, - sectors); + s, sectors); } EXPORT_SYMBOL_GPL(rdev_clear_badblocks); diff --git a/drivers/md/md.h b/drivers/md/md.h index d51c0ca37777..98913e8dac1a 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -55,6 +55,7 @@ struct md_rdev { int sb_loaded; __u64 sb_events; sector_t data_offset; /* start of data in array */ + sector_t new_data_offset;/* only relevant while reshaping */ sector_t sb_start; /* offset of the super block (in 512byte sectors) */ int sb_size; /* bytes in the superblock */ int preferred_minor; /* autorun support */ @@ -193,8 +194,9 @@ static inline int is_badblock(struct md_rdev *rdev, sector_t s, int sectors, return 0; } extern int rdev_set_badblocks(struct md_rdev *rdev, sector_t s, int sectors, - int acknowledged); -extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors); + int is_new); +extern int rdev_clear_badblocks(struct md_rdev *rdev, sector_t s, int sectors, + int is_new); extern void md_ack_all_badblocks(struct badblocks *bb); struct mddev { @@ -592,6 +594,7 @@ extern void md_write_start(struct mddev *mddev, struct bio *bi); extern void md_write_end(struct mddev *mddev); extern void md_done_sync(struct mddev *mddev, int blocks, int ok); extern void md_error(struct mddev *mddev, struct md_rdev *rdev); +extern void md_finish_reshape(struct mddev *mddev); extern int mddev_congested(struct mddev *mddev, int bits); extern void md_flush_request(struct mddev *mddev, struct bio *bio); diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 15dd59b84e94..71a7dc038a82 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -2024,7 +2024,7 @@ static void handle_sync_write_finished(struct r1conf *conf, struct r1bio *r1_bio continue; if (test_bit(BIO_UPTODATE, &bio->bi_flags) && test_bit(R1BIO_MadeGood, &r1_bio->state)) { - rdev_clear_badblocks(rdev, r1_bio->sector, s); + rdev_clear_badblocks(rdev, r1_bio->sector, s, 0); } if (!test_bit(BIO_UPTODATE, &bio->bi_flags) && test_bit(R1BIO_WriteError, &r1_bio->state)) { @@ -2044,7 +2044,7 @@ static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio) struct md_rdev *rdev = conf->mirrors[m].rdev; rdev_clear_badblocks(rdev, r1_bio->sector, - r1_bio->sectors); + r1_bio->sectors, 0); rdev_dec_pending(rdev, conf->mddev); } else if (r1_bio->bios[m] != NULL) { /* This drive got a write error. We need to diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 3f91c2e1dfe7..832fb4d56657 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -2480,7 +2480,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio) rdev_clear_badblocks( rdev, r10_bio->devs[m].addr, - r10_bio->sectors); + r10_bio->sectors, 0); } else { if (!rdev_set_badblocks( rdev, @@ -2496,7 +2496,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio) rdev_clear_badblocks( rdev, r10_bio->devs[m].addr, - r10_bio->sectors); + r10_bio->sectors, 0); } else { if (!rdev_set_badblocks( rdev, @@ -2515,7 +2515,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio) rdev_clear_badblocks( rdev, r10_bio->devs[m].addr, - r10_bio->sectors); + r10_bio->sectors, 0); rdev_dec_pending(rdev, conf->mddev); } else if (bio != NULL && !test_bit(BIO_UPTODATE, &bio->bi_flags)) { @@ -2532,7 +2532,7 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio) rdev_clear_badblocks( rdev, r10_bio->devs[m].addr, - r10_bio->sectors); + r10_bio->sectors, 0); rdev_dec_pending(rdev, conf->mddev); } } diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 0abbd3447cfb..3705585d7567 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3561,7 +3561,7 @@ finish: if (test_and_clear_bit(R5_MadeGood, &dev->flags)) { rdev = conf->disks[i].rdev; rdev_clear_badblocks(rdev, sh->sector, - STRIPE_SECTORS); + STRIPE_SECTORS, 0); rdev_dec_pending(rdev, conf->mddev); } if (test_and_clear_bit(R5_MadeGoodRepl, &dev->flags)) { @@ -3570,7 +3570,7 @@ finish: /* rdev have been moved down */ rdev = conf->disks[i].rdev; rdev_clear_badblocks(rdev, sh->sector, - STRIPE_SECTORS); + STRIPE_SECTORS, 0); rdev_dec_pending(rdev, conf->mddev); } } @@ -5505,10 +5505,14 @@ static int raid5_start_reshape(struct mddev *mddev) if (!check_stripe_cache(mddev)) return -ENOSPC; - rdev_for_each(rdev, mddev) + rdev_for_each(rdev, mddev) { + /* Don't support changing data_offset yet */ + if (rdev->new_data_offset != rdev->data_offset) + return -EINVAL; if (!test_bit(In_sync, &rdev->flags) && !test_bit(Faulty, &rdev->flags)) spares++; + } if (spares - mddev->degraded < mddev->delta_disks - conf->max_degraded) /* Not enough devices even to make a degraded array diff --git a/include/linux/raid/md_p.h b/include/linux/raid/md_p.h index 07e05f92d050..ee753536ab70 100644 --- a/include/linux/raid/md_p.h +++ b/include/linux/raid/md_p.h @@ -233,7 +233,10 @@ struct mdp_superblock_1 { __le32 delta_disks; /* change in number of raid_disks */ __le32 new_layout; /* new layout */ __le32 new_chunk; /* new chunk size (512byte sectors) */ - __u8 pad1[128-124]; /* set to 0 when written */ + __le32 new_offset; /* signed number to add to data_offset in new + * layout. 0 == no-change. This can be + * different on each device in the array. + */ /* constant this-device information - 64 bytes */ __le64 data_offset; /* sector start of data, often 0 */ @@ -285,11 +288,14 @@ struct mdp_superblock_1 { * of devices, but is going * backwards anyway. */ +#define MD_FEATURE_NEW_OFFSET 64 /* new_offset must be honoured */ #define MD_FEATURE_ALL (MD_FEATURE_BITMAP_OFFSET \ |MD_FEATURE_RECOVERY_OFFSET \ |MD_FEATURE_RESHAPE_ACTIVE \ |MD_FEATURE_BAD_BLOCKS \ |MD_FEATURE_REPLACEMENT \ - |MD_FEATURE_RESHAPE_BACKWARDS) + |MD_FEATURE_RESHAPE_BACKWARDS \ + |MD_FEATURE_NEW_OFFSET \ + ) #endif -- cgit v1.2.3 From 545c87957f4d53867b62921625f36df8c4b1bc08 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 22 May 2012 13:54:30 +1000 Subject: md: dm-raid should call helper function to clear rdev. dm-raid currently open-codes the freeing of some members of and rdev. It is more maintainable to have it call common code from md.c which does this for all call-sites. So remove free_disk_sb to md_rdev_clear, export it, and use it in dm-raid.c Signed-off-by: NeilBrown --- drivers/md/dm-raid.c | 5 +---- drivers/md/md.c | 8 ++++---- drivers/md/md.h | 1 + 3 files changed, 6 insertions(+), 8 deletions(-) (limited to 'drivers/md/md.h') diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c index 68965e663248..73a068da10d9 100644 --- a/drivers/md/dm-raid.c +++ b/drivers/md/dm-raid.c @@ -155,10 +155,7 @@ static void context_free(struct raid_set *rs) for (i = 0; i < rs->md.raid_disks; i++) { if (rs->dev[i].meta_dev) dm_put_device(rs->ti, rs->dev[i].meta_dev); - if (rs->dev[i].rdev.sb_page) - put_page(rs->dev[i].rdev.sb_page); - rs->dev[i].rdev.sb_page = NULL; - rs->dev[i].rdev.sb_loaded = 0; + md_rdev_clear(&rs->dev[i].rdev); if (rs->dev[i].data_dev) dm_put_device(rs->ti, rs->dev[i].data_dev); } diff --git a/drivers/md/md.c b/drivers/md/md.c index 8fe1abf1b89c..d557e557ff8f 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -803,7 +803,7 @@ static int alloc_disk_sb(struct md_rdev * rdev) return 0; } -static void free_disk_sb(struct md_rdev * rdev) +void md_rdev_clear(struct md_rdev *rdev) { if (rdev->sb_page) { put_page(rdev->sb_page); @@ -817,7 +817,7 @@ static void free_disk_sb(struct md_rdev * rdev) rdev->bb_page = NULL; } } - +EXPORT_SYMBOL_GPL(md_rdev_clear); static void super_written(struct bio *bio, int error) { @@ -2244,7 +2244,7 @@ static void export_rdev(struct md_rdev * rdev) bdevname(rdev->bdev,b)); if (rdev->mddev) MD_BUG(); - free_disk_sb(rdev); + md_rdev_clear(rdev); #ifndef MODULE if (test_bit(AutoDetected, &rdev->flags)) md_autodetect_dev(rdev->bdev->bd_dev); @@ -3324,7 +3324,7 @@ static struct md_rdev *md_import_device(dev_t newdev, int super_format, int supe abort_free: if (rdev->bdev) unlock_rdev(rdev); - free_disk_sb(rdev); + md_rdev_clear(rdev); kfree(rdev->badblocks.page); kfree(rdev); return ERR_PTR(err); diff --git a/drivers/md/md.h b/drivers/md/md.h index 98913e8dac1a..360937389e64 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -619,6 +619,7 @@ extern int md_run(struct mddev *mddev); extern void md_stop(struct mddev *mddev); extern void md_stop_writes(struct mddev *mddev); extern int md_rdev_init(struct md_rdev *rdev); +extern void md_rdev_clear(struct md_rdev *rdev); extern void mddev_suspend(struct mddev *mddev); extern void mddev_resume(struct mddev *mddev); -- cgit v1.2.3 From 6409bb05a9831f6af36a20b97cda13059c2ef1b6 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 22 May 2012 13:55:07 +1000 Subject: md/bitmap: add new 'space' attribute for bitmaps. If we are to allow bitmaps to be resized when the array is resized, we need to know how much space there is. So create an attribute to store this information and set appropriate defaults. It can be set more precisely via sysfs, or future metadata extensions may allow it to be recorded. Signed-off-by: NeilBrown --- drivers/md/bitmap.c | 39 +++++++++++++++++++++++++++++++++++++++ drivers/md/md.c | 33 +++++++++++++++++++++++++++++++-- drivers/md/md.h | 3 +++ 3 files changed, 73 insertions(+), 2 deletions(-) (limited to 'drivers/md/md.h') diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index c7784a985676..ac688fb54e1d 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -1934,6 +1934,44 @@ location_store(struct mddev *mddev, const char *buf, size_t len) static struct md_sysfs_entry bitmap_location = __ATTR(location, S_IRUGO|S_IWUSR, location_show, location_store); +/* 'bitmap/space' is the space available at 'location' for the + * bitmap. This allows the kernel to know when it is safe to + * resize the bitmap to match a resized array. + */ +static ssize_t +space_show(struct mddev *mddev, char *page) +{ + return sprintf(page, "%lu\n", mddev->bitmap_info.space); +} + +static ssize_t +space_store(struct mddev *mddev, const char *buf, size_t len) +{ + unsigned long sectors; + int rv; + + rv = kstrtoul(buf, 10, §ors); + if (rv) + return rv; + + if (sectors == 0) + return -EINVAL; + + if (mddev->bitmap && + sectors < ((mddev->bitmap->file_pages - 1) * PAGE_SIZE + + mddev->bitmap->last_page_size + 511) >> 9) + return -EFBIG; /* Bitmap is too big for this small space */ + + /* could make sure it isn't too big, but that isn't really + * needed - user-space should be careful. + */ + mddev->bitmap_info.space = sectors; + return len; +} + +static struct md_sysfs_entry bitmap_space = +__ATTR(space, S_IRUGO|S_IWUSR, space_show, space_store); + static ssize_t timeout_show(struct mddev *mddev, char *page) { @@ -2109,6 +2147,7 @@ __ATTR(max_backlog_used, S_IRUGO | S_IWUSR, static struct attribute *md_bitmap_attrs[] = { &bitmap_location.attr, + &bitmap_space.attr, &bitmap_timeout.attr, &bitmap_backlog.attr, &bitmap_chunksize.attr, diff --git a/drivers/md/md.c b/drivers/md/md.c index ac99616f48d4..9a677f2078a7 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1197,7 +1197,10 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev) mddev->dev_sectors = ((sector_t)sb->size) * 2; mddev->events = ev1; mddev->bitmap_info.offset = 0; + mddev->bitmap_info.space = 0; + /* bitmap can use 60 K after the 4K superblocks */ mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9; + mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9); mddev->reshape_backwards = 0; if (mddev->minor_version >= 91) { @@ -1234,9 +1237,12 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev) mddev->max_disks = MD_SB_DISKS; if (sb->state & (1<bitmap_info.file == NULL) + mddev->bitmap_info.file == NULL) { mddev->bitmap_info.offset = mddev->bitmap_info.default_offset; + mddev->bitmap_info.space = + mddev->bitmap_info.space; + } } else if (mddev->pers == NULL) { /* Insist on good event counter while assembling, except @@ -1677,7 +1683,12 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev) mddev->dev_sectors = le64_to_cpu(sb->size); mddev->events = ev1; mddev->bitmap_info.offset = 0; + mddev->bitmap_info.space = 0; + /* Default location for bitmap is 1K after superblock + * using 3K - total of 4K + */ mddev->bitmap_info.default_offset = 1024 >> 9; + mddev->bitmap_info.default_space = (4096-1024) >> 9; mddev->reshape_backwards = 0; mddev->recovery_cp = le64_to_cpu(sb->resync_offset); @@ -1686,9 +1697,23 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev) mddev->max_disks = (4096-256)/2; if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_BITMAP_OFFSET) && - mddev->bitmap_info.file == NULL ) + mddev->bitmap_info.file == NULL) { mddev->bitmap_info.offset = (__s32)le32_to_cpu(sb->bitmap_offset); + /* Metadata doesn't record how much space is available. + * For 1.0, we assume we can use up to the superblock + * if before, else to 4K beyond superblock. + * For others, assume no change is possible. + */ + if (mddev->minor_version > 0) + mddev->bitmap_info.space = 0; + else if (mddev->bitmap_info.offset > 0) + mddev->bitmap_info.space = + 8 - mddev->bitmap_info.offset; + else + mddev->bitmap_info.space = + -mddev->bitmap_info.offset; + } if ((le32_to_cpu(sb->feature_map) & MD_FEATURE_RESHAPE_ACTIVE)) { mddev->reshape_position = le64_to_cpu(sb->reshape_position); @@ -5280,6 +5305,7 @@ static void md_clean(struct mddev *mddev) mddev->merge_check_needed = 0; mddev->bitmap_info.offset = 0; mddev->bitmap_info.default_offset = 0; + mddev->bitmap_info.default_space = 0; mddev->bitmap_info.chunksize = 0; mddev->bitmap_info.daemon_sleep = 0; mddev->bitmap_info.max_write_behind = 0; @@ -6076,6 +6102,7 @@ static int set_array_info(struct mddev * mddev, mdu_array_info_t *info) set_bit(MD_CHANGE_DEVS, &mddev->flags); mddev->bitmap_info.default_offset = MD_SB_BYTES >> 9; + mddev->bitmap_info.default_space = 64*2 - (MD_SB_BYTES >> 9); mddev->bitmap_info.offset = 0; mddev->reshape_position = MaxSector; @@ -6258,6 +6285,8 @@ static int update_array_info(struct mddev *mddev, mdu_array_info_t *info) return -EINVAL; mddev->bitmap_info.offset = mddev->bitmap_info.default_offset; + mddev->bitmap_info.space = + mddev->bitmap_info.default_space; mddev->pers->quiesce(mddev, 1); rv = bitmap_create(mddev); if (!rv) diff --git a/drivers/md/md.h b/drivers/md/md.h index 360937389e64..7b4a3c318cae 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -393,10 +393,13 @@ struct mddev { * For external metadata, offset * from start of device. */ + unsigned long space; /* space available at this offset */ loff_t default_offset; /* this is the offset to use when * hot-adding a bitmap. It should * eventually be settable by sysfs. */ + unsigned long default_space; /* space available at + * default offset */ struct mutex mutex; unsigned long chunksize; unsigned long daemon_sleep; /* how many jiffies between updates? */ -- cgit v1.2.3