From 06d91a5fe0b50c9060e70bdf7786f8a3c66249db Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 21 Jun 2005 17:17:12 -0700 Subject: [PATCH] md: improve locking on 'safemode' and move superblock writes When md marks the superblock dirty before a write, it calls generic_make_request (to write the superblock) from within generic_make_request (to write the first dirty block), which could cause problems later. With this patch, the superblock write is always done by the helper thread, and write request are delayed until that write completes. Also, the locking around marking the array dirty and writing the superblock is improved to avoid possible races. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/raid/md.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/raid/md.h') diff --git a/include/linux/raid/md.h b/include/linux/raid/md.h index a6a67d102bfa..cfde8f497d6d 100644 --- a/include/linux/raid/md.h +++ b/include/linux/raid/md.h @@ -69,7 +69,7 @@ extern mdk_thread_t * md_register_thread (void (*run) (mddev_t *mddev), extern void md_unregister_thread (mdk_thread_t *thread); extern void md_wakeup_thread(mdk_thread_t *thread); extern void md_check_recovery(mddev_t *mddev); -extern void md_write_start(mddev_t *mddev); +extern int md_write_start(mddev_t *mddev, struct bio *bi); extern void md_write_end(mddev_t *mddev); extern void md_handle_safemode(mddev_t *mddev); extern void md_done_sync(mddev_t *mddev, int blocks, int ok); -- cgit v1.2.3 From 3d310eb7b3df1252e8595d059d982b0a9825a137 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 21 Jun 2005 17:17:26 -0700 Subject: [PATCH] md: fix deadlock due to md thread processing delayed requests. Before completing a 'write' the md superblock might need to be updated. This is best done by the md_thread. The current code schedules this up and queues the write request for later handling by the md_thread. However some personalities (Raid5/raid6) will deadlock if the md_thread tries to submit requests to its own array. So this patch changes things so the processes submitting the request waits for the superblock to be written and then submits the request itself. This fixes a recently-created deadlock in raid5/raid6 Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/md.c | 45 ++++++++++++++++----------------------------- drivers/md/raid1.c | 4 ++-- drivers/md/raid10.c | 3 +-- drivers/md/raid5.c | 3 +-- drivers/md/raid6main.c | 3 +-- include/linux/raid/md.h | 2 +- include/linux/raid/md_k.h | 2 +- 7 files changed, 23 insertions(+), 39 deletions(-) (limited to 'include/linux/raid/md.h') diff --git a/drivers/md/md.c b/drivers/md/md.c index 789b114f860a..7075bebb7f37 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -224,8 +224,8 @@ static mddev_t * mddev_find(dev_t unit) INIT_LIST_HEAD(&new->all_mddevs); init_timer(&new->safemode_timer); atomic_set(&new->active, 1); - bio_list_init(&new->write_list); spin_lock_init(&new->write_lock); + init_waitqueue_head(&new->sb_wait); new->queue = blk_alloc_queue(GFP_KERNEL); if (!new->queue) { @@ -1307,6 +1307,7 @@ repeat: if (!mddev->persistent) { mddev->sb_dirty = 0; spin_unlock(&mddev->write_lock); + wake_up(&mddev->sb_wait); return; } spin_unlock(&mddev->write_lock); @@ -1348,6 +1349,7 @@ repeat: } mddev->sb_dirty = 0; spin_unlock(&mddev->write_lock); + wake_up(&mddev->sb_wait); } @@ -3368,29 +3370,26 @@ void md_done_sync(mddev_t *mddev, int blocks, int ok) /* md_write_start(mddev, bi) * If we need to update some array metadata (e.g. 'active' flag - * in superblock) before writing, queue bi for later writing - * and return 0, else return 1 and it will be written now + * in superblock) before writing, schedule a superblock update + * and wait for it to complete. */ -int md_write_start(mddev_t *mddev, struct bio *bi) +void md_write_start(mddev_t *mddev, struct bio *bi) { + DEFINE_WAIT(w); if (bio_data_dir(bi) != WRITE) - return 1; + return; atomic_inc(&mddev->writes_pending); - spin_lock(&mddev->write_lock); - if (mddev->in_sync == 0 && mddev->sb_dirty == 0) { - spin_unlock(&mddev->write_lock); - return 1; - } - bio_list_add(&mddev->write_list, bi); - if (mddev->in_sync) { - mddev->in_sync = 0; - mddev->sb_dirty = 1; + spin_lock(&mddev->write_lock); + if (mddev->in_sync) { + mddev->in_sync = 0; + mddev->sb_dirty = 1; + md_wakeup_thread(mddev->thread); + } + spin_unlock(&mddev->write_lock); } - spin_unlock(&mddev->write_lock); - md_wakeup_thread(mddev->thread); - return 0; + wait_event(mddev->sb_wait, mddev->sb_dirty==0); } void md_write_end(mddev_t *mddev) @@ -3685,7 +3684,6 @@ void md_check_recovery(mddev_t *mddev) mddev->sb_dirty || test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) || test_bit(MD_RECOVERY_DONE, &mddev->recovery) || - mddev->write_list.head || (mddev->safemode == 1) || (mddev->safemode == 2 && ! atomic_read(&mddev->writes_pending) && !mddev->in_sync && mddev->recovery_cp == MaxSector) @@ -3694,7 +3692,6 @@ void md_check_recovery(mddev_t *mddev) if (mddev_trylock(mddev)==0) { int spares =0; - struct bio *blist; spin_lock(&mddev->write_lock); if (mddev->safemode && !atomic_read(&mddev->writes_pending) && @@ -3704,21 +3701,11 @@ void md_check_recovery(mddev_t *mddev) } if (mddev->safemode == 1) mddev->safemode = 0; - blist = bio_list_get(&mddev->write_list); spin_unlock(&mddev->write_lock); if (mddev->sb_dirty) md_update_sb(mddev); - while (blist) { - struct bio *b = blist; - blist = blist->bi_next; - b->bi_next = NULL; - generic_make_request(b); - /* we already counted this, so need to un-count */ - md_write_end(mddev); - } - if (test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) && !test_bit(MD_RECOVERY_DONE, &mddev->recovery)) { diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 3f5234fe3593..98b09773e79e 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -561,8 +561,8 @@ static int make_request(request_queue_t *q, struct bio * bio) * thread has put up a bar for new requests. * Continue immediately if no resync is active currently. */ - if (md_write_start(mddev, bio)==0) - return 0; + md_write_start(mddev, bio); /* wait on superblock update early */ + spin_lock_irq(&conf->resync_lock); wait_event_lock_irq(conf->wait_resume, !conf->barrier, conf->resync_lock, ); conf->nr_pending++; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 8476515bfdc7..fd7324a86d13 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -700,8 +700,7 @@ static int make_request(request_queue_t *q, struct bio * bio) return 0; } - if (md_write_start(mddev, bio) == 0) - return 0; + md_write_start(mddev, bio); /* * Register the new request and wait if the reconstruction diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 1ce3f5aaa984..93a9726cc2d6 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -1411,8 +1411,7 @@ static int make_request (request_queue_t *q, struct bio * bi) sector_t logical_sector, last_sector; struct stripe_head *sh; - if (md_write_start(mddev, bi)==0) - return 0; + md_write_start(mddev, bi); if (bio_data_dir(bi)==WRITE) { disk_stat_inc(mddev->gendisk, writes); diff --git a/drivers/md/raid6main.c b/drivers/md/raid6main.c index d9c385496dc5..f62ea1a73d0d 100644 --- a/drivers/md/raid6main.c +++ b/drivers/md/raid6main.c @@ -1570,8 +1570,7 @@ static int make_request (request_queue_t *q, struct bio * bi) sector_t logical_sector, last_sector; struct stripe_head *sh; - if (md_write_start(mddev, bi)==0) - return 0; + md_write_start(mddev, bi); if (bio_data_dir(bi)==WRITE) { disk_stat_inc(mddev->gendisk, writes); diff --git a/include/linux/raid/md.h b/include/linux/raid/md.h index cfde8f497d6d..75f41d8faed2 100644 --- a/include/linux/raid/md.h +++ b/include/linux/raid/md.h @@ -69,7 +69,7 @@ extern mdk_thread_t * md_register_thread (void (*run) (mddev_t *mddev), extern void md_unregister_thread (mdk_thread_t *thread); extern void md_wakeup_thread(mdk_thread_t *thread); extern void md_check_recovery(mddev_t *mddev); -extern int md_write_start(mddev_t *mddev, struct bio *bi); +extern void md_write_start(mddev_t *mddev, struct bio *bi); extern void md_write_end(mddev_t *mddev); extern void md_handle_safemode(mddev_t *mddev); extern void md_done_sync(mddev_t *mddev, int blocks, int ok); diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index 6cdcb4434c6c..3e977025cf43 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -261,7 +261,7 @@ struct mddev_s sector_t recovery_cp; spinlock_t write_lock; - struct bio_list write_list; + wait_queue_head_t sb_wait; /* for waiting on superblock updates */ unsigned int safemode; /* if set, update "clean" superblock * when no writes pending. -- cgit v1.2.3 From a654b9d8f851f4ca02649d5825cbe6c608adb10c Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Tue, 21 Jun 2005 17:17:27 -0700 Subject: [PATCH] md: allow md intent bitmap to be stored near the superblock. This provides an alternate to storing the bitmap in a separate file. The bitmap can be stored at a given offset from the superblock. Obviously the creator of the array must make sure this doesn't intersect with data.... After is good for version-0.90 superblocks. Signed-off-by: Neil Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- drivers/md/bitmap.c | 132 +++++++++++++++++++++++++++++++++++--------- drivers/md/md.c | 40 +++++++++++++- include/linux/raid/bitmap.h | 2 + include/linux/raid/md.h | 15 ++++- include/linux/raid/md_k.h | 4 ++ include/linux/raid/md_p.h | 7 ++- 6 files changed, 170 insertions(+), 30 deletions(-) (limited to 'include/linux/raid/md.h') diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 204564dc6a0d..030d6861051a 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -116,7 +116,7 @@ static unsigned char *bitmap_alloc_page(struct bitmap *bitmap) if (!page) printk("%s: bitmap_alloc_page FAILED\n", bmname(bitmap)); else - printk("%s: bitmap_alloc_page: allocated page at %p\n", + PRINTK("%s: bitmap_alloc_page: allocated page at %p\n", bmname(bitmap), page); return page; } @@ -258,13 +258,61 @@ char *file_path(struct file *file, char *buf, int count) * basic page I/O operations */ +/* IO operations when bitmap is stored near all superblocks */ +static struct page *read_sb_page(mddev_t *mddev, long offset, unsigned long index) +{ + /* choose a good rdev and read the page from there */ + + mdk_rdev_t *rdev; + struct list_head *tmp; + struct page *page = alloc_page(GFP_KERNEL); + sector_t target; + + if (!page) + return ERR_PTR(-ENOMEM); + do { + ITERATE_RDEV(mddev, rdev, tmp) + if (rdev->in_sync && !rdev->faulty) + goto found; + return ERR_PTR(-EIO); + + found: + target = (rdev->sb_offset << 1) + offset + index * (PAGE_SIZE/512); + + } while (!sync_page_io(rdev->bdev, target, PAGE_SIZE, page, READ)); + + page->index = index; + return page; +} + +static int write_sb_page(mddev_t *mddev, long offset, struct page *page, int wait) +{ + mdk_rdev_t *rdev; + struct list_head *tmp; + + ITERATE_RDEV(mddev, rdev, tmp) + if (rdev->in_sync && !rdev->faulty) + md_super_write(mddev, rdev, + (rdev->sb_offset<<1) + offset + + page->index * (PAGE_SIZE/512), + PAGE_SIZE, + page); + + if (wait) + wait_event(mddev->sb_wait, atomic_read(&mddev->pending_writes)==0); + return 0; +} + /* - * write out a page + * write out a page to a file */ static int write_page(struct bitmap *bitmap, struct page *page, int wait) { int ret = -ENOMEM; + if (bitmap->file == NULL) + return write_sb_page(bitmap->mddev, bitmap->offset, page, wait); + lock_page(page); ret = page->mapping->a_ops->prepare_write(NULL, page, 0, PAGE_SIZE); @@ -394,7 +442,12 @@ static int bitmap_read_sb(struct bitmap *bitmap) int err = -EINVAL; /* page 0 is the superblock, read it... */ - bitmap->sb_page = read_page(bitmap->file, 0, &bytes_read); + if (bitmap->file) + bitmap->sb_page = read_page(bitmap->file, 0, &bytes_read); + else { + bitmap->sb_page = read_sb_page(bitmap->mddev, bitmap->offset, 0); + bytes_read = PAGE_SIZE; + } if (IS_ERR(bitmap->sb_page)) { err = PTR_ERR(bitmap->sb_page); bitmap->sb_page = NULL; @@ -625,14 +678,16 @@ static void bitmap_file_kick(struct bitmap *bitmap) bitmap_mask_state(bitmap, BITMAP_STALE, MASK_SET); bitmap_update_sb(bitmap); - path = kmalloc(PAGE_SIZE, GFP_KERNEL); - if (path) - ptr = file_path(bitmap->file, path, PAGE_SIZE); + if (bitmap->file) { + path = kmalloc(PAGE_SIZE, GFP_KERNEL); + if (path) + ptr = file_path(bitmap->file, path, PAGE_SIZE); - printk(KERN_ALERT "%s: kicking failed bitmap file %s from array!\n", - bmname(bitmap), ptr ? ptr : ""); + printk(KERN_ALERT "%s: kicking failed bitmap file %s from array!\n", + bmname(bitmap), ptr ? ptr : ""); - kfree(path); + kfree(path); + } bitmap_file_put(bitmap); @@ -676,7 +731,7 @@ static void bitmap_file_set_bit(struct bitmap *bitmap, sector_t block) void *kaddr; unsigned long chunk = block >> CHUNK_BLOCK_SHIFT(bitmap); - if (!bitmap->file || !bitmap->filemap) { + if (!bitmap->filemap) { return; } @@ -715,7 +770,7 @@ int bitmap_unplug(struct bitmap *bitmap) * flushed out to disk */ for (i = 0; i < bitmap->file_pages; i++) { spin_lock_irqsave(&bitmap->lock, flags); - if (!bitmap->file || !bitmap->filemap) { + if (!bitmap->filemap) { spin_unlock_irqrestore(&bitmap->lock, flags); return 0; } @@ -732,11 +787,15 @@ int bitmap_unplug(struct bitmap *bitmap) return 1; } if (wait) { /* if any writes were performed, we need to wait on them */ - spin_lock_irq(&bitmap->write_lock); - wait_event_lock_irq(bitmap->write_wait, - list_empty(&bitmap->complete_pages), bitmap->write_lock, - wake_up_process(bitmap->writeback_daemon->tsk)); - spin_unlock_irq(&bitmap->write_lock); + if (bitmap->file) { + spin_lock_irq(&bitmap->write_lock); + wait_event_lock_irq(bitmap->write_wait, + list_empty(&bitmap->complete_pages), bitmap->write_lock, + wake_up_process(bitmap->writeback_daemon->tsk)); + spin_unlock_irq(&bitmap->write_lock); + } else + wait_event(bitmap->mddev->sb_wait, + atomic_read(&bitmap->mddev->pending_writes)==0); } return 0; } @@ -764,7 +823,7 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, int in_sync) chunks = bitmap->chunks; file = bitmap->file; - BUG_ON(!file); + BUG_ON(!file && !bitmap->offset); #if INJECT_FAULTS_3 outofdate = 1; @@ -779,7 +838,7 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, int in_sync) num_pages = (bytes + sizeof(bitmap_super_t) + PAGE_SIZE - 1) / PAGE_SIZE; - if (i_size_read(file->f_mapping->host) < bytes + sizeof(bitmap_super_t)) { + if (file && i_size_read(file->f_mapping->host) < bytes + sizeof(bitmap_super_t)) { printk(KERN_INFO "%s: bitmap file too short %lu < %lu\n", bmname(bitmap), (unsigned long) i_size_read(file->f_mapping->host), @@ -816,14 +875,18 @@ static int bitmap_init_from_disk(struct bitmap *bitmap, int in_sync) */ page = bitmap->sb_page; offset = sizeof(bitmap_super_t); - } else { + } else if (file) { page = read_page(file, index, &dummy); - if (IS_ERR(page)) { /* read error */ - ret = PTR_ERR(page); - goto out; - } + offset = 0; + } else { + page = read_sb_page(bitmap->mddev, bitmap->offset, index); offset = 0; } + if (IS_ERR(page)) { /* read error */ + ret = PTR_ERR(page); + goto out; + } + oldindex = index; oldpage = page; kmap(page); @@ -874,6 +937,19 @@ out: return ret; } +void bitmap_write_all(struct bitmap *bitmap) +{ + /* We don't actually write all bitmap blocks here, + * just flag them as needing to be written + */ + + unsigned long chunks = bitmap->chunks; + unsigned long bytes = (chunks+7)/8 + sizeof(bitmap_super_t); + unsigned long num_pages = (bytes + PAGE_SIZE-1) / PAGE_SIZE; + while (num_pages--) + bitmap->filemap_attr[num_pages] |= BITMAP_PAGE_NEEDWRITE; +} + static void bitmap_count_page(struct bitmap *bitmap, sector_t offset, int inc) { @@ -913,7 +989,7 @@ int bitmap_daemon_work(struct bitmap *bitmap) for (j = 0; j < bitmap->chunks; j++) { bitmap_counter_t *bmc; spin_lock_irqsave(&bitmap->lock, flags); - if (!bitmap->file || !bitmap->filemap) { + if (!bitmap->filemap) { /* error or shutdown */ spin_unlock_irqrestore(&bitmap->lock, flags); break; @@ -1072,6 +1148,7 @@ static int bitmap_start_daemon(struct bitmap *bitmap, mdk_thread_t **ptr, spin_lock_irqsave(&bitmap->lock, flags); *ptr = NULL; + if (!bitmap->file) /* no need for daemon if there's no backing file */ goto out_unlock; @@ -1416,9 +1493,11 @@ int bitmap_create(mddev_t *mddev) BUG_ON(sizeof(bitmap_super_t) != 256); - if (!file) /* bitmap disabled, nothing to do */ + if (!file && !mddev->bitmap_offset) /* bitmap disabled, nothing to do */ return 0; + BUG_ON(file && mddev->bitmap_offset); + bitmap = kmalloc(sizeof(*bitmap), GFP_KERNEL); if (!bitmap) return -ENOMEM; @@ -1438,7 +1517,8 @@ int bitmap_create(mddev_t *mddev) return -ENOMEM; bitmap->file = file; - get_file(file); + bitmap->offset = mddev->bitmap_offset; + if (file) get_file(file); /* read superblock from bitmap file (this sets bitmap->chunksize) */ err = bitmap_read_sb(bitmap); if (err) diff --git a/drivers/md/md.c b/drivers/md/md.c index 7075bebb7f37..fde8acfac320 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -337,7 +337,7 @@ static int bi_complete(struct bio *bio, unsigned int bytes_done, int error) return 0; } -static int sync_page_io(struct block_device *bdev, sector_t sector, int size, +int sync_page_io(struct block_device *bdev, sector_t sector, int size, struct page *page, int rw) { struct bio *bio = bio_alloc(GFP_NOIO, 1); @@ -609,6 +609,17 @@ static int super_90_validate(mddev_t *mddev, mdk_rdev_t *rdev) memcpy(mddev->uuid+12,&sb->set_uuid3, 4); mddev->max_disks = MD_SB_DISKS; + + if (sb->state & (1<bitmap_file == NULL) { + if (mddev->level != 1) { + /* FIXME use a better test */ + printk(KERN_WARNING "md: bitmaps only support for raid1\n"); + return -EINVAL; + } + mddev->bitmap_offset = (MD_SB_BYTES >> 9); + } + } else if (mddev->pers == NULL) { /* Insist on good event counter while assembling */ __u64 ev1 = md_event(sb); @@ -702,6 +713,9 @@ static void super_90_sync(mddev_t *mddev, mdk_rdev_t *rdev) sb->layout = mddev->layout; sb->chunk_size = mddev->chunk_size; + if (mddev->bitmap && mddev->bitmap_file == NULL) + sb->state |= (1<disks[0].state = (1<uuid, sb->set_uuid, 16); mddev->max_disks = (4096-256)/2; + + if ((le32_to_cpu(sb->feature_map) & 1) && + mddev->bitmap_file == NULL ) { + if (mddev->level != 1) { + printk(KERN_WARNING "md: bitmaps only supported for raid1\n"); + return -EINVAL; + } + mddev->bitmap_offset = (__s32)le32_to_cpu(sb->bitmap_offset); + } } else if (mddev->pers == NULL) { /* Insist of good event counter while assembling */ __u64 ev1 = le64_to_cpu(sb->events); @@ -960,6 +983,11 @@ static void super_1_sync(mddev_t *mddev, mdk_rdev_t *rdev) else sb->resync_offset = cpu_to_le64(0); + if (mddev->bitmap && mddev->bitmap_file == NULL) { + sb->bitmap_offset = cpu_to_le32((__u32)mddev->bitmap_offset); + sb->feature_map = cpu_to_le32(1); + } + max_dev = 0; ITERATE_RDEV(mddev,rdev2,tmp) if (rdev2->desc_nr+1 > max_dev) @@ -2406,7 +2434,8 @@ static int set_bitmap_file(mddev_t *mddev, int fd) mdname(mddev)); fput(mddev->bitmap_file); mddev->bitmap_file = NULL; - } + } else + mddev->bitmap_offset = 0; /* file overrides offset */ return err; } @@ -3774,6 +3803,13 @@ void md_check_recovery(mddev_t *mddev) set_bit(MD_RECOVERY_RUNNING, &mddev->recovery); if (!spares) set_bit(MD_RECOVERY_SYNC, &mddev->recovery); + if (spares && mddev->bitmap && ! mddev->bitmap->file) { + /* We are adding a device or devices to an array + * which has the bitmap stored on all devices. + * So make sure all bitmap pages get written + */ + bitmap_write_all(mddev->bitmap); + } mddev->sync_thread = md_register_thread(md_do_sync, mddev, "%s_resync"); diff --git a/include/linux/raid/bitmap.h b/include/linux/raid/bitmap.h index cfe60cfc8f3d..e24b74b11150 100644 --- a/include/linux/raid/bitmap.h +++ b/include/linux/raid/bitmap.h @@ -217,6 +217,7 @@ struct bitmap { /* bitmap spinlock */ spinlock_t lock; + long offset; /* offset from superblock if file is NULL */ struct file *file; /* backing disk file */ struct page *sb_page; /* cached copy of the bitmap file superblock */ struct page **filemap; /* list of cache pages for the file */ @@ -255,6 +256,7 @@ void bitmap_print_sb(struct bitmap *bitmap); int bitmap_update_sb(struct bitmap *bitmap); int bitmap_setallbits(struct bitmap *bitmap); +void bitmap_write_all(struct bitmap *bitmap); /* these are exported */ int bitmap_startwrite(struct bitmap *bitmap, sector_t offset, unsigned long sectors); diff --git a/include/linux/raid/md.h b/include/linux/raid/md.h index 75f41d8faed2..ffa316ce4dc8 100644 --- a/include/linux/raid/md.h +++ b/include/linux/raid/md.h @@ -60,7 +60,14 @@ */ #define MD_MAJOR_VERSION 0 #define MD_MINOR_VERSION 90 -#define MD_PATCHLEVEL_VERSION 1 +/* + * MD_PATCHLEVEL_VERSION indicates kernel functionality. + * >=1 means different superblock formats are selectable using SET_ARRAY_INFO + * and major_version/minor_version accordingly + * >=2 means that Internal bitmaps are supported by setting MD_SB_BITMAP_PRESENT + * in the super status byte + */ +#define MD_PATCHLEVEL_VERSION 2 extern int register_md_personality (int p_num, mdk_personality_t *p); extern int unregister_md_personality (int p_num); @@ -78,6 +85,12 @@ extern void md_unplug_mddev(mddev_t *mddev); extern void md_print_devices (void); +extern void md_super_write(mddev_t *mddev, mdk_rdev_t *rdev, + sector_t sector, int size, struct page *page); +extern int sync_page_io(struct block_device *bdev, sector_t sector, int size, + struct page *page, int rw); + + #define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); } #endif diff --git a/include/linux/raid/md_k.h b/include/linux/raid/md_k.h index 3e977025cf43..a3725b57fb7d 100644 --- a/include/linux/raid/md_k.h +++ b/include/linux/raid/md_k.h @@ -273,6 +273,10 @@ struct mddev_s struct bitmap *bitmap; /* the bitmap for the device */ struct file *bitmap_file; /* the bitmap file */ + long bitmap_offset; /* offset from superblock of + * start of bitmap. May be + * negative, but not '0' + */ struct list_head all_mddevs; }; diff --git a/include/linux/raid/md_p.h b/include/linux/raid/md_p.h index 8ba95d67329f..8e592a25a8b5 100644 --- a/include/linux/raid/md_p.h +++ b/include/linux/raid/md_p.h @@ -96,6 +96,7 @@ typedef struct mdp_device_descriptor_s { #define MD_SB_CLEAN 0 #define MD_SB_ERRORS 1 +#define MD_SB_BITMAP_PRESENT 8 /* bitmap may be present nearby */ typedef struct mdp_superblock_s { /* * Constant generic information @@ -184,7 +185,7 @@ struct mdp_superblock_1 { /* constant array information - 128 bytes */ __u32 magic; /* MD_SB_MAGIC: 0xa92b4efc - little endian */ __u32 major_version; /* 1 */ - __u32 feature_map; /* 0 for now */ + __u32 feature_map; /* bit 0 set if 'bitmap_offset' is meaningful */ __u32 pad0; /* always set to 0 when writing */ __u8 set_uuid[16]; /* user-space generated. */ @@ -197,6 +198,10 @@ struct mdp_superblock_1 { __u32 chunksize; /* in 512byte sectors */ __u32 raid_disks; + __u32 bitmap_offset; /* sectors after start of superblock that bitmap starts + * NOTE: signed, so bitmap can be before superblock + * only meaningful of feature_map[0] is set. + */ __u8 pad1[128-96]; /* set to 0 when written */ /* constant this-device information - 64 bytes */ -- cgit v1.2.3