From 63fe08177f92ce21929df8af6361491b98ad12cd Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 14 Apr 2009 12:01:53 +1000 Subject: md: tiny md.h cleanups - update inclusion guard and make sure it covers the whole file - remove superflous #ifdef CONFIG_BLOCK - make sure all required headers are included so that new users aren't required to include others before Signed-off-by: Christoph Hellwig Signed-off-by: NeilBrown --- drivers/md/md.h | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/md.h b/drivers/md/md.h index e9b7f54c24d6..8227ab909d44 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -12,10 +12,17 @@ Software Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA. */ -#ifndef _MD_K_H -#define _MD_K_H - -#ifdef CONFIG_BLOCK +#ifndef _MD_MD_H +#define _MD_MD_H + +#include +#include +#include +#include +#include +#include +#include +#include #define MaxSector (~(sector_t)0) @@ -408,10 +415,6 @@ static inline void safe_put_page(struct page *p) if (p) put_page(p); } -#endif /* CONFIG_BLOCK */ -#endif - - extern int register_md_personality(struct mdk_personality *p); extern int unregister_md_personality(struct mdk_personality *p); extern mdk_thread_t * md_register_thread(void (*run) (mddev_t *mddev), @@ -434,3 +437,5 @@ extern void md_new_event(mddev_t *mddev); extern int md_allow_write(mddev_t *mddev); extern void md_wait_for_blocked_rdev(mdk_rdev_t *rdev, mddev_t *mddev); extern void md_set_array_sectors(mddev_t *mddev, sector_t array_sectors); + +#endif /* _MD_MD_H */ -- cgit v1.2.3 From 6d56e278444bc8323b1bcfcada126b8e4dbba0f4 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 14 Apr 2009 12:01:57 +1000 Subject: md: allow setting newly added device to 'in_sync' via sysfs. When adding devices to an active array via sysfs, there is currently no way to mark a device as 'in-sync' which is useful when incrementally assembling an array. So add that option. Signed-off-by: NeilBrown --- drivers/md/md.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'drivers/md') diff --git a/drivers/md/md.c b/drivers/md/md.c index ed5727c089a9..298731b8e951 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2086,6 +2086,7 @@ state_store(mdk_rdev_t *rdev, const char *buf, size_t len) * -writemostly - clears write_mostly * blocked - sets the Blocked flag * -blocked - clears the Blocked flag + * insync - sets Insync providing device isn't active */ int err = -EINVAL; if (cmd_match(buf, "faulty") && rdev->mddev->pers) { @@ -2117,6 +2118,9 @@ state_store(mdk_rdev_t *rdev, const char *buf, size_t len) set_bit(MD_RECOVERY_NEEDED, &rdev->mddev->recovery); md_wakeup_thread(rdev->mddev->thread); + err = 0; + } else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) { + set_bit(In_sync, &rdev->flags); err = 0; } if (!err && rdev->sysfs_state) @@ -2190,7 +2194,7 @@ slot_store(mdk_rdev_t *rdev, const char *buf, size_t len) } else if (rdev->mddev->pers) { mdk_rdev_t *rdev2; /* Activating a spare .. or possibly reactivating - * if we every get bitmaps working here. + * if we ever get bitmaps working here. */ if (rdev->raid_disk != -1) -- cgit v1.2.3 From acb180b0e335ad88acfed6c8a33e39c05b95dc49 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 14 Apr 2009 16:28:34 +1000 Subject: md: improve usefulness and accuracy of sysfs file md/sync_completed. The sync_completed file reports how much of a resync (or recovery or reshape) has been completed. However due to the possibility of out-of-order completion of writes, it is not certain to be accurate. We have an internal value - mddev->curr_resync_completed - which is an accurate value (though it might not always be quite so uptodate). So: - make curr_resync_completed be uptodate a little more often, particularly when raid5 reshape updates status in the metadata - report curr_resync_completed in the sysfs file - allow poll/select to report all updates to md/sync_completed. This makes sync_completed completed usable by any external metadata handler that wants to record this status information in its metadata. Signed-off-by: NeilBrown --- drivers/md/bitmap.c | 1 + drivers/md/md.c | 34 ++++++++++++++++++++++------------ drivers/md/raid5.c | 4 ++++ 3 files changed, 27 insertions(+), 12 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index f8a9f7ab2cb8..e4510c976b34 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -1479,6 +1479,7 @@ void bitmap_cond_end_sync(struct bitmap *bitmap, sector_t sector) s += blocks; } bitmap->last_end_sync = jiffies; + sysfs_notify(&bitmap->mddev->kobj, NULL, "sync_completed"); } static void bitmap_set_memory_bits(struct bitmap *bitmap, sector_t offset, int needed) diff --git a/drivers/md/md.c b/drivers/md/md.c index 298731b8e951..7af64f3846a6 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2017,6 +2017,8 @@ repeat: clear_bit(MD_CHANGE_PENDING, &mddev->flags); spin_unlock_irq(&mddev->write_lock); wake_up(&mddev->sb_wait); + if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) + sysfs_notify(&mddev->kobj, NULL, "sync_completed"); } @@ -3486,12 +3488,15 @@ sync_completed_show(mddev_t *mddev, char *page) { unsigned long max_sectors, resync; + if (!test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) + return sprintf(page, "none\n"); + if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) max_sectors = mddev->resync_max_sectors; else max_sectors = mddev->dev_sectors; - resync = (mddev->curr_resync - atomic_read(&mddev->recovery_active)); + resync = mddev->curr_resync_completed; return sprintf(page, "%lu / %lu\n", resync, max_sectors); } @@ -6338,18 +6343,12 @@ void md_do_sync(mddev_t *mddev) sector_t sectors; skipped = 0; - if (j >= mddev->resync_max) { - sysfs_notify(&mddev->kobj, NULL, "sync_completed"); - wait_event(mddev->recovery_wait, - mddev->resync_max > j - || kthread_should_stop()); - } - if (kthread_should_stop()) - goto interrupted; - if (mddev->curr_resync > mddev->curr_resync_completed && - (mddev->curr_resync - mddev->curr_resync_completed) - > (max_sectors >> 4)) { + if ((mddev->curr_resync > mddev->curr_resync_completed && + (mddev->curr_resync - mddev->curr_resync_completed) + > (max_sectors >> 4)) || + j >= mddev->resync_max + ) { /* time to update curr_resync_completed */ blk_unplug(mddev->queue); wait_event(mddev->recovery_wait, @@ -6357,7 +6356,17 @@ void md_do_sync(mddev_t *mddev) mddev->curr_resync_completed = mddev->curr_resync; set_bit(MD_CHANGE_CLEAN, &mddev->flags); + sysfs_notify(&mddev->kobj, NULL, "sync_completed"); } + + if (j >= mddev->resync_max) + wait_event(mddev->recovery_wait, + mddev->resync_max > j + || kthread_should_stop()); + + if (kthread_should_stop()) + goto interrupted; + sectors = mddev->pers->sync_request(mddev, j, &skipped, currspeed < speed_min(mddev)); if (sectors == 0) { @@ -6465,6 +6474,7 @@ void md_do_sync(mddev_t *mddev) skip: mddev->curr_resync = 0; + mddev->curr_resync_completed = 0; mddev->resync_min = 0; mddev->resync_max = MaxSector; sysfs_notify(&mddev->kobj, NULL, "sync_completed"); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 3bbc6d647044..76892ac72544 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3845,6 +3845,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped wait_event(conf->wait_for_overlap, atomic_read(&conf->reshape_stripes)==0); mddev->reshape_position = conf->reshape_progress; + mddev->curr_resync_completed = mddev->curr_resync; conf->reshape_checkpoint = jiffies; set_bit(MD_CHANGE_DEVS, &mddev->flags); md_wakeup_thread(mddev->thread); @@ -3854,6 +3855,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped conf->reshape_safe = mddev->reshape_position; spin_unlock_irq(&conf->device_lock); wake_up(&conf->wait_for_overlap); + sysfs_notify(&mddev->kobj, NULL, "sync_completed"); } if (mddev->delta_disks < 0) { @@ -3943,6 +3945,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped wait_event(conf->wait_for_overlap, atomic_read(&conf->reshape_stripes) == 0); mddev->reshape_position = conf->reshape_progress; + mddev->curr_resync_completed = mddev->curr_resync; conf->reshape_checkpoint = jiffies; set_bit(MD_CHANGE_DEVS, &mddev->flags); md_wakeup_thread(mddev->thread); @@ -3953,6 +3956,7 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped conf->reshape_safe = mddev->reshape_position; spin_unlock_irq(&conf->device_lock); wake_up(&conf->wait_for_overlap); + sysfs_notify(&mddev->kobj, NULL, "sync_completed"); } return reshape_sectors; } -- cgit v1.2.3 From 8f3d8ba20e67991b531e9c0227dcd1f99271a32c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 7 Apr 2009 19:55:13 +0200 Subject: block: move bio list helpers into bio.h It's used by DM and MD and generally useful, so move the bio list helpers into bio.h. Signed-off-by: Christoph Hellwig Acked-by: Alasdair G Kergon Signed-off-by: Jens Axboe --- drivers/md/dm-bio-list.h | 117 -------------------------------------------- drivers/md/dm-delay.c | 2 - drivers/md/dm-mpath.c | 1 - drivers/md/dm-raid1.c | 1 - drivers/md/dm-region-hash.c | 1 - drivers/md/dm-snap.c | 1 - drivers/md/dm.c | 1 - drivers/md/raid1.c | 1 - drivers/md/raid10.c | 1 - include/linux/bio.h | 109 +++++++++++++++++++++++++++++++++++++++++ 10 files changed, 109 insertions(+), 126 deletions(-) delete mode 100644 drivers/md/dm-bio-list.h (limited to 'drivers/md') diff --git a/drivers/md/dm-bio-list.h b/drivers/md/dm-bio-list.h deleted file mode 100644 index 345098b4ca77..000000000000 --- a/drivers/md/dm-bio-list.h +++ /dev/null @@ -1,117 +0,0 @@ -/* - * Copyright (C) 2004 Red Hat UK Ltd. - * - * This file is released under the GPL. - */ - -#ifndef DM_BIO_LIST_H -#define DM_BIO_LIST_H - -#include - -#ifdef CONFIG_BLOCK - -struct bio_list { - struct bio *head; - struct bio *tail; -}; - -static inline int bio_list_empty(const struct bio_list *bl) -{ - return bl->head == NULL; -} - -static inline void bio_list_init(struct bio_list *bl) -{ - bl->head = bl->tail = NULL; -} - -#define bio_list_for_each(bio, bl) \ - for (bio = (bl)->head; bio; bio = bio->bi_next) - -static inline unsigned bio_list_size(const struct bio_list *bl) -{ - unsigned sz = 0; - struct bio *bio; - - bio_list_for_each(bio, bl) - sz++; - - return sz; -} - -static inline void bio_list_add(struct bio_list *bl, struct bio *bio) -{ - bio->bi_next = NULL; - - if (bl->tail) - bl->tail->bi_next = bio; - else - bl->head = bio; - - bl->tail = bio; -} - -static inline void bio_list_add_head(struct bio_list *bl, struct bio *bio) -{ - bio->bi_next = bl->head; - - bl->head = bio; - - if (!bl->tail) - bl->tail = bio; -} - -static inline void bio_list_merge(struct bio_list *bl, struct bio_list *bl2) -{ - if (!bl2->head) - return; - - if (bl->tail) - bl->tail->bi_next = bl2->head; - else - bl->head = bl2->head; - - bl->tail = bl2->tail; -} - -static inline void bio_list_merge_head(struct bio_list *bl, - struct bio_list *bl2) -{ - if (!bl2->head) - return; - - if (bl->head) - bl2->tail->bi_next = bl->head; - else - bl->tail = bl2->tail; - - bl->head = bl2->head; -} - -static inline struct bio *bio_list_pop(struct bio_list *bl) -{ - struct bio *bio = bl->head; - - if (bio) { - bl->head = bl->head->bi_next; - if (!bl->head) - bl->tail = NULL; - - bio->bi_next = NULL; - } - - return bio; -} - -static inline struct bio *bio_list_get(struct bio_list *bl) -{ - struct bio *bio = bl->head; - - bl->head = bl->tail = NULL; - - return bio; -} - -#endif /* CONFIG_BLOCK */ -#endif diff --git a/drivers/md/dm-delay.c b/drivers/md/dm-delay.c index 59ee1b015d2d..559dbb52bc85 100644 --- a/drivers/md/dm-delay.c +++ b/drivers/md/dm-delay.c @@ -15,8 +15,6 @@ #include -#include "dm-bio-list.h" - #define DM_MSG_PREFIX "delay" struct delay_c { diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 095f77bf9681..6a386ab4f7eb 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -8,7 +8,6 @@ #include #include "dm-path-selector.h" -#include "dm-bio-list.h" #include "dm-bio-record.h" #include "dm-uevent.h" diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 536ef0bef154..076fbb4e967a 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -5,7 +5,6 @@ * This file is released under the GPL. */ -#include "dm-bio-list.h" #include "dm-bio-record.h" #include diff --git a/drivers/md/dm-region-hash.c b/drivers/md/dm-region-hash.c index 59f8d9df9e1a..7b899be0b087 100644 --- a/drivers/md/dm-region-hash.c +++ b/drivers/md/dm-region-hash.c @@ -14,7 +14,6 @@ #include #include "dm.h" -#include "dm-bio-list.h" #define DM_MSG_PREFIX "region hash" diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 981a0413068f..d73f17fc7778 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -22,7 +22,6 @@ #include #include "dm-exception-store.h" -#include "dm-bio-list.h" #define DM_MSG_PREFIX "snapshots" diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 8a994be035ba..424f7b048c30 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -6,7 +6,6 @@ */ #include "dm.h" -#include "dm-bio-list.h" #include "dm-uevent.h" #include diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 274b491a11c1..36df9109cde1 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -35,7 +35,6 @@ #include #include #include "md.h" -#include "dm-bio-list.h" #include "raid1.h" #include "bitmap.h" diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index e293d92641ac..81a54f17417e 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -22,7 +22,6 @@ #include #include #include "md.h" -#include "dm-bio-list.h" #include "raid10.h" #include "bitmap.h" diff --git a/include/linux/bio.h b/include/linux/bio.h index b900d2c67d29..b89cf2d82898 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -504,6 +504,115 @@ static inline int bio_has_data(struct bio *bio) return bio && bio->bi_io_vec != NULL; } +/* + * BIO list managment for use by remapping drivers (e.g. DM or MD). + * + * A bio_list anchors a singly-linked list of bios chained through the bi_next + * member of the bio. The bio_list also caches the last list member to allow + * fast access to the tail. + */ +struct bio_list { + struct bio *head; + struct bio *tail; +}; + +static inline int bio_list_empty(const struct bio_list *bl) +{ + return bl->head == NULL; +} + +static inline void bio_list_init(struct bio_list *bl) +{ + bl->head = bl->tail = NULL; +} + +#define bio_list_for_each(bio, bl) \ + for (bio = (bl)->head; bio; bio = bio->bi_next) + +static inline unsigned bio_list_size(const struct bio_list *bl) +{ + unsigned sz = 0; + struct bio *bio; + + bio_list_for_each(bio, bl) + sz++; + + return sz; +} + +static inline void bio_list_add(struct bio_list *bl, struct bio *bio) +{ + bio->bi_next = NULL; + + if (bl->tail) + bl->tail->bi_next = bio; + else + bl->head = bio; + + bl->tail = bio; +} + +static inline void bio_list_add_head(struct bio_list *bl, struct bio *bio) +{ + bio->bi_next = bl->head; + + bl->head = bio; + + if (!bl->tail) + bl->tail = bio; +} + +static inline void bio_list_merge(struct bio_list *bl, struct bio_list *bl2) +{ + if (!bl2->head) + return; + + if (bl->tail) + bl->tail->bi_next = bl2->head; + else + bl->head = bl2->head; + + bl->tail = bl2->tail; +} + +static inline void bio_list_merge_head(struct bio_list *bl, + struct bio_list *bl2) +{ + if (!bl2->head) + return; + + if (bl->head) + bl2->tail->bi_next = bl->head; + else + bl->tail = bl2->tail; + + bl->head = bl2->head; +} + +static inline struct bio *bio_list_pop(struct bio_list *bl) +{ + struct bio *bio = bl->head; + + if (bio) { + bl->head = bl->head->bi_next; + if (!bl->head) + bl->tail = NULL; + + bio->bi_next = NULL; + } + + return bio; +} + +static inline struct bio *bio_list_get(struct bio_list *bl) +{ + struct bio *bio = bl->head; + + bl->head = bl->tail = NULL; + + return bio; +} + #if defined(CONFIG_BLK_DEV_INTEGRITY) #define bip_vec_idx(bip, idx) (&(bip->bip_vec[(idx)])) -- cgit v1.2.3 From c03f6a19699fce4389888a45db8245969311d868 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 17 Apr 2009 11:06:30 +1000 Subject: md: update sync_completed and reshape_position even more often. There are circumstances when a user-space process might need to "oversee" a resync/reshape process. For example when doing an in-place reshape of a raid5, it is prudent to take a backup of each section before reshaping it as this is the only way to provide safety against an unplanned shutdown (i.e. crash/power failure). The sync_max sysfs value can be used to stop the resync from advancing beyond a particular point. So user-space can: suspend IO to the first section and back it up set 'sync_max' to the end of the section wait for 'sync_completed' to reach that point resume IO on the first section and move on to the next section. However this process requires the kernel and user-space to run in lock-step which could introduce unnecessary delays. It would be better if a 'double buffered' approach could be used with userspace and kernel space working on different sections with the 'next' section always ready when the 'current' section is finished. One problem with implementing this is that sync_completed is only guaranteed to be updated when the sync process reaches sync_max. (it is updated on a time basis at other times, but it is hard to rely on that). This defeats some of the double buffering. With this patch, sync_completed (and reshape_position) get updated as the current position approaches sync_max, so there is room for userspace to advance sync_max early without losing updates. To be precise, sync_completed is updated when the current sync position reaches half way between the current value of sync_completed and the value of sync_max. This will usually be a good time for user space to update sync_max. If sync_max does not get updated, the updates to sync_completed (together with associated metadata updates) will occur at an exponentially increasing frequency which will get unreasonably fast (one update every page) immediately before the process hits sync_max and stops. So the update rate will be unreasonably fast only for an insignificant period of time. Signed-off-by: NeilBrown --- drivers/md/md.c | 3 ++- drivers/md/raid5.c | 3 ++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/md.c b/drivers/md/md.c index 7af64f3846a6..612343fdde94 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -6347,7 +6347,8 @@ void md_do_sync(mddev_t *mddev) if ((mddev->curr_resync > mddev->curr_resync_completed && (mddev->curr_resync - mddev->curr_resync_completed) > (max_sectors >> 4)) || - j >= mddev->resync_max + (j - mddev->curr_resync_completed)*2 + >= mddev->resync_max - mddev->curr_resync_completed ) { /* time to update curr_resync_completed */ blk_unplug(mddev->queue); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 76892ac72544..4616bc3a6e71 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -3940,7 +3940,8 @@ static sector_t reshape_request(mddev_t *mddev, sector_t sector_nr, int *skipped * then we need to write out the superblock. */ sector_nr += reshape_sectors; - if (sector_nr >= mddev->resync_max) { + if ((sector_nr - mddev->curr_resync_completed) * 2 + >= mddev->resync_max - mddev->curr_resync_completed) { /* Cannot proceed until we've updated the superblock... */ wait_event(conf->wait_for_overlap, atomic_read(&conf->reshape_stripes) == 0); -- cgit v1.2.3 From 1f59390339f263c0fe7908fbe54466dbf3a64b58 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 20 Apr 2009 11:50:24 +1000 Subject: md: support bitmaps on RAID10 arrays larger then 2 terabytes .. and other arrays with components larger than 2 terabytes. We use a "long" rather than a "sector_t" in part of the bitmap size calculations, which is sad. Reported-by: "Mario 'BitKoenig' Holbe" Signed-off-by: NeilBrown --- drivers/md/bitmap.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/md') diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index e4510c976b34..1fb91edc7de2 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -1590,7 +1590,7 @@ void bitmap_destroy(mddev_t *mddev) int bitmap_create(mddev_t *mddev) { struct bitmap *bitmap; - unsigned long blocks = mddev->resync_max_sectors; + sector_t blocks = mddev->resync_max_sectors; unsigned long chunks; unsigned long pages; struct file *file = mddev->bitmap_file; @@ -1632,8 +1632,8 @@ int bitmap_create(mddev_t *mddev) bitmap->chunkshift = ffz(~bitmap->chunksize); /* now that chunksize and chunkshift are set, we can use these macros */ - chunks = (blocks + CHUNK_BLOCK_RATIO(bitmap) - 1) / - CHUNK_BLOCK_RATIO(bitmap); + chunks = (blocks + CHUNK_BLOCK_RATIO(bitmap) - 1) >> + CHUNK_BLOCK_SHIFT(bitmap); pages = (chunks + PAGE_COUNTER_RATIO - 1) / PAGE_COUNTER_RATIO; BUG_ON(!pages); -- cgit v1.2.3