diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/Kconfig | 11 | ||||
-rw-r--r-- | drivers/md/dm-target.c | 3 | ||||
-rw-r--r-- | drivers/md/md.c | 50 | ||||
-rw-r--r-- | drivers/md/raid0.c | 5 | ||||
-rw-r--r-- | drivers/md/raid1.c | 48 | ||||
-rw-r--r-- | drivers/md/raid10.c | 52 | ||||
-rw-r--r-- | drivers/md/raid5.c | 34 | ||||
-rw-r--r-- | drivers/md/raid6main.c | 31 |
8 files changed, 128 insertions, 106 deletions
diff --git a/drivers/md/Kconfig b/drivers/md/Kconfig index fd2aae150ccc..ac25a48362ac 100644 --- a/drivers/md/Kconfig +++ b/drivers/md/Kconfig @@ -139,11 +139,12 @@ config MD_RAID5_RESHAPE is online. However it is still EXPERIMENTAL code. It should work, but please be sure that you have backups. - You will need a version of mdadm newer than 2.3.1. During the - early stage of reshape there is a critical section where live data - is being over-written. A crash during this time needs extra care - for recovery. The newer mdadm takes a copy of the data in the - critical section and will restore it, if necessary, after a crash. + You will need mdadm verion 2.4.1 or later to use this + feature safely. During the early stage of reshape there is + a critical section where live data is being over-written. A + crash during this time needs extra care for recovery. The + newer mdadm takes a copy of the data in the critical section + and will restore it, if necessary, after a crash. The mdadm usage is e.g. mdadm --grow /dev/md1 --raid-disks=6 diff --git a/drivers/md/dm-target.c b/drivers/md/dm-target.c index aecd9e0c2616..64fd8e79ea4c 100644 --- a/drivers/md/dm-target.c +++ b/drivers/md/dm-target.c @@ -78,8 +78,7 @@ void dm_put_target_type(struct target_type *t) if (--ti->use == 0) module_put(ti->tt.module); - if (ti->use < 0) - BUG(); + BUG_ON(ti->use < 0); up_read(&_lock); return; diff --git a/drivers/md/md.c b/drivers/md/md.c index 039e071c1007..f19b874753a9 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -163,9 +163,19 @@ void md_new_event(mddev_t *mddev) { atomic_inc(&md_event_count); wake_up(&md_event_waiters); + sysfs_notify(&mddev->kobj, NULL, "sync_action"); } EXPORT_SYMBOL_GPL(md_new_event); +/* Alternate version that can be called from interrupts + * when calling sysfs_notify isn't needed. + */ +void md_new_event_inintr(mddev_t *mddev) +{ + atomic_inc(&md_event_count); + wake_up(&md_event_waiters); +} + /* * Enables to iterate over all existing md arrays * all_mddevs_lock protects this list. @@ -215,13 +225,11 @@ static void mddev_put(mddev_t *mddev) return; if (!mddev->raid_disks && list_empty(&mddev->disks)) { list_del(&mddev->all_mddevs); - /* that blocks */ + spin_unlock(&all_mddevs_lock); blk_cleanup_queue(mddev->queue); - /* that also blocks */ kobject_unregister(&mddev->kobj); - /* result blows... */ - } - spin_unlock(&all_mddevs_lock); + } else + spin_unlock(&all_mddevs_lock); } static mddev_t * mddev_find(dev_t unit) @@ -280,11 +288,6 @@ static inline int mddev_lock(mddev_t * mddev) return mutex_lock_interruptible(&mddev->reconfig_mutex); } -static inline void mddev_lock_uninterruptible(mddev_t * mddev) -{ - mutex_lock(&mddev->reconfig_mutex); -} - static inline int mddev_trylock(mddev_t * mddev) { return mutex_trylock(&mddev->reconfig_mutex); @@ -2258,7 +2261,7 @@ action_store(mddev_t *mddev, const char *page, size_t len) } else { if (cmd_match(page, "check")) set_bit(MD_RECOVERY_CHECK, &mddev->recovery); - else if (cmd_match(page, "repair")) + else if (!cmd_match(page, "repair")) return -EINVAL; set_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); set_bit(MD_RECOVERY_SYNC, &mddev->recovery); @@ -2459,9 +2462,11 @@ md_attr_show(struct kobject *kobj, struct attribute *attr, char *page) if (!entry->show) return -EIO; - mddev_lock(mddev); - rv = entry->show(mddev, page); - mddev_unlock(mddev); + rv = mddev_lock(mddev); + if (!rv) { + rv = entry->show(mddev, page); + mddev_unlock(mddev); + } return rv; } @@ -2475,9 +2480,11 @@ md_attr_store(struct kobject *kobj, struct attribute *attr, if (!entry->store) return -EIO; - mddev_lock(mddev); - rv = entry->store(mddev, page, length); - mddev_unlock(mddev); + rv = mddev_lock(mddev); + if (!rv) { + rv = entry->store(mddev, page, length); + mddev_unlock(mddev); + } return rv; } @@ -4151,7 +4158,7 @@ void md_error(mddev_t *mddev, mdk_rdev_t *rdev) set_bit(MD_RECOVERY_INTR, &mddev->recovery); set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); md_wakeup_thread(mddev->thread); - md_new_event(mddev); + md_new_event_inintr(mddev); } /* seq_file implementation /proc/mdstat */ @@ -4342,8 +4349,9 @@ static int md_seq_show(struct seq_file *seq, void *v) return 0; } - if (mddev_lock(mddev)!=0) + if (mddev_lock(mddev) < 0) return -EINTR; + if (mddev->pers || mddev->raid_disks || !list_empty(&mddev->disks)) { seq_printf(seq, "%s : %sactive", mdname(mddev), mddev->pers ? "" : "in"); @@ -5029,8 +5037,10 @@ static int md_notify_reboot(struct notifier_block *this, printk(KERN_INFO "md: stopping all md devices.\n"); ITERATE_MDDEV(mddev,tmp) - if (mddev_trylock(mddev)) + if (mddev_trylock(mddev)) { do_md_stop (mddev, 1); + mddev_unlock(mddev); + } /* * certain more exotic SCSI devices are known to be * volatile wrt too early system reboots. While the diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c index 678f4dbbea1d..cb8c6317e4e5 100644 --- a/drivers/md/raid0.c +++ b/drivers/md/raid0.c @@ -331,13 +331,14 @@ static int raid0_run (mddev_t *mddev) goto out_free_conf; size = conf->strip_zone[cur].size; - for (i=0; i< nb_zone; i++) { - conf->hash_table[i] = conf->strip_zone + cur; + conf->hash_table[0] = conf->strip_zone + cur; + for (i=1; i< nb_zone; i++) { while (size <= conf->hash_spacing) { cur++; size += conf->strip_zone[cur].size; } size -= conf->hash_spacing; + conf->hash_table[i] = conf->strip_zone + cur; } if (conf->preshift) { conf->hash_spacing >>= conf->preshift; diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 3cb0872a845d..4070eff6f0f8 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -315,10 +315,11 @@ static int raid1_end_write_request(struct bio *bio, unsigned int bytes_done, int if (r1_bio->bios[mirror] == bio) break; - if (error == -ENOTSUPP && test_bit(R1BIO_Barrier, &r1_bio->state)) { + if (error == -EOPNOTSUPP && test_bit(R1BIO_Barrier, &r1_bio->state)) { set_bit(BarriersNotsupp, &conf->mirrors[mirror].rdev->flags); set_bit(R1BIO_BarrierRetry, &r1_bio->state); r1_bio->mddev->barriers_work = 0; + /* Don't rdev_dec_pending in this branch - keep it for the retry */ } else { /* * this branch is our 'one mirror IO has finished' event handler: @@ -365,6 +366,7 @@ static int raid1_end_write_request(struct bio *bio, unsigned int bytes_done, int } } } + rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev); } /* * @@ -374,11 +376,9 @@ static int raid1_end_write_request(struct bio *bio, unsigned int bytes_done, int if (atomic_dec_and_test(&r1_bio->remaining)) { if (test_bit(R1BIO_BarrierRetry, &r1_bio->state)) { reschedule_retry(r1_bio); - /* Don't dec_pending yet, we want to hold - * the reference over the retry - */ goto out; } + /* it really is the end of this request */ if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { /* free extra copy of the data pages */ int i = bio->bi_vcnt; @@ -393,8 +393,6 @@ static int raid1_end_write_request(struct bio *bio, unsigned int bytes_done, int md_write_end(r1_bio->mddev); raid_end_bio_io(r1_bio); } - - rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev); out: if (to_put) bio_put(to_put); @@ -753,18 +751,24 @@ static int make_request(request_queue_t *q, struct bio * bio) const int rw = bio_data_dir(bio); int do_barriers; - if (unlikely(!mddev->barriers_work && bio_barrier(bio))) { - bio_endio(bio, bio->bi_size, -EOPNOTSUPP); - return 0; - } - /* * Register the new request and wait if the reconstruction * thread has put up a bar for new requests. * Continue immediately if no resync is active currently. + * We test barriers_work *after* md_write_start as md_write_start + * may cause the first superblock write, and that will check out + * if barriers work. */ + md_write_start(mddev, bio); /* wait on superblock update early */ + if (unlikely(!mddev->barriers_work && bio_barrier(bio))) { + if (rw == WRITE) + md_write_end(mddev); + bio_endio(bio, bio->bi_size, -EOPNOTSUPP); + return 0; + } + wait_barrier(conf); disk_stat_inc(mddev->gendisk, ios[rw]); @@ -1135,8 +1139,19 @@ static int end_sync_write(struct bio *bio, unsigned int bytes_done, int error) mirror = i; break; } - if (!uptodate) + if (!uptodate) { + int sync_blocks = 0; + sector_t s = r1_bio->sector; + long sectors_to_go = r1_bio->sectors; + /* make sure these bits doesn't get cleared. */ + do { + bitmap_end_sync(mddev->bitmap, r1_bio->sector, + &sync_blocks, 1); + s += sync_blocks; + sectors_to_go -= sync_blocks; + } while (sectors_to_go > 0); md_error(mddev, conf->mirrors[mirror].rdev); + } update_head_pos(mirror, r1_bio); @@ -1393,10 +1408,11 @@ static void raid1d(mddev_t *mddev) unplug = 1; } else if (test_bit(R1BIO_BarrierRetry, &r1_bio->state)) { /* some requests in the r1bio were BIO_RW_BARRIER - * requests which failed with -ENOTSUPP. Hohumm.. + * requests which failed with -EOPNOTSUPP. Hohumm.. * Better resubmit without the barrier. * We know which devices to resubmit for, because * all others have had their bios[] entry cleared. + * We already have a nr_pending reference on these rdevs. */ int i; clear_bit(R1BIO_BarrierRetry, &r1_bio->state); @@ -1547,8 +1563,7 @@ static int init_resync(conf_t *conf) int buffs; buffs = RESYNC_WINDOW / RESYNC_BLOCK_SIZE; - if (conf->r1buf_pool) - BUG(); + BUG_ON(conf->r1buf_pool); conf->r1buf_pool = mempool_create(buffs, r1buf_pool_alloc, r1buf_pool_free, conf->poolinfo); if (!conf->r1buf_pool) @@ -1721,8 +1736,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i !conf->fullsync && !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) break; - if (sync_blocks < (PAGE_SIZE>>9)) - BUG(); + BUG_ON(sync_blocks < (PAGE_SIZE>>9)); if (len > (sync_blocks<<9)) len = sync_blocks<<9; } diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index ab90a6d12020..1440935414e6 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1117,8 +1117,7 @@ static int end_sync_read(struct bio *bio, unsigned int bytes_done, int error) for (i=0; i<conf->copies; i++) if (r10_bio->devs[i].bio == bio) break; - if (i == conf->copies) - BUG(); + BUG_ON(i == conf->copies); update_head_pos(i, r10_bio); d = r10_bio->devs[i].devnum; @@ -1408,43 +1407,54 @@ static void raid10d(mddev_t *mddev) if (s > (PAGE_SIZE>>9)) s = PAGE_SIZE >> 9; + rcu_read_lock(); do { int d = r10_bio->devs[sl].devnum; - rdev = conf->mirrors[d].rdev; + rdev = rcu_dereference(conf->mirrors[d].rdev); if (rdev && - test_bit(In_sync, &rdev->flags) && - sync_page_io(rdev->bdev, - r10_bio->devs[sl].addr + - sect + rdev->data_offset, - s<<9, - conf->tmppage, READ)) - success = 1; - else { - sl++; - if (sl == conf->copies) - sl = 0; + test_bit(In_sync, &rdev->flags)) { + atomic_inc(&rdev->nr_pending); + rcu_read_unlock(); + success = sync_page_io(rdev->bdev, + r10_bio->devs[sl].addr + + sect + rdev->data_offset, + s<<9, + conf->tmppage, READ); + rdev_dec_pending(rdev, mddev); + rcu_read_lock(); + if (success) + break; } + sl++; + if (sl == conf->copies) + sl = 0; } while (!success && sl != r10_bio->read_slot); + rcu_read_unlock(); if (success) { int start = sl; /* write it back and re-read */ + rcu_read_lock(); while (sl != r10_bio->read_slot) { int d; if (sl==0) sl = conf->copies; sl--; d = r10_bio->devs[sl].devnum; - rdev = conf->mirrors[d].rdev; - atomic_add(s, &rdev->corrected_errors); + rdev = rcu_dereference(conf->mirrors[d].rdev); if (rdev && test_bit(In_sync, &rdev->flags)) { + atomic_inc(&rdev->nr_pending); + rcu_read_unlock(); + atomic_add(s, &rdev->corrected_errors); if (sync_page_io(rdev->bdev, r10_bio->devs[sl].addr + sect + rdev->data_offset, s<<9, conf->tmppage, WRITE) == 0) /* Well, this device is dead */ md_error(mddev, rdev); + rdev_dec_pending(rdev, mddev); + rcu_read_lock(); } } sl = start; @@ -1454,17 +1464,22 @@ static void raid10d(mddev_t *mddev) sl = conf->copies; sl--; d = r10_bio->devs[sl].devnum; - rdev = conf->mirrors[d].rdev; + rdev = rcu_dereference(conf->mirrors[d].rdev); if (rdev && test_bit(In_sync, &rdev->flags)) { + atomic_inc(&rdev->nr_pending); + rcu_read_unlock(); if (sync_page_io(rdev->bdev, r10_bio->devs[sl].addr + sect + rdev->data_offset, s<<9, conf->tmppage, READ) == 0) /* Well, this device is dead */ md_error(mddev, rdev); + rdev_dec_pending(rdev, mddev); + rcu_read_lock(); } } + rcu_read_unlock(); } else { /* Cannot read from anywhere -- bye bye array */ md_error(mddev, conf->mirrors[r10_bio->devs[r10_bio->read_slot].devnum].rdev); @@ -1518,8 +1533,7 @@ static int init_resync(conf_t *conf) int buffs; buffs = RESYNC_WINDOW / RESYNC_BLOCK_SIZE; - if (conf->r10buf_pool) - BUG(); + BUG_ON(conf->r10buf_pool); conf->r10buf_pool = mempool_create(buffs, r10buf_pool_alloc, r10buf_pool_free, conf); if (!conf->r10buf_pool) return -ENOMEM; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index dae740adaf65..31843604049c 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -73,10 +73,8 @@ static void print_raid5_conf (raid5_conf_t *conf); static void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh) { if (atomic_dec_and_test(&sh->count)) { - if (!list_empty(&sh->lru)) - BUG(); - if (atomic_read(&conf->active_stripes)==0) - BUG(); + BUG_ON(!list_empty(&sh->lru)); + BUG_ON(atomic_read(&conf->active_stripes)==0); if (test_bit(STRIPE_HANDLE, &sh->state)) { if (test_bit(STRIPE_DELAYED, &sh->state)) list_add_tail(&sh->lru, &conf->delayed_list); @@ -184,10 +182,8 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx, int raid5_conf_t *conf = sh->raid_conf; int i; - if (atomic_read(&sh->count) != 0) - BUG(); - if (test_bit(STRIPE_HANDLE, &sh->state)) - BUG(); + BUG_ON(atomic_read(&sh->count) != 0); + BUG_ON(test_bit(STRIPE_HANDLE, &sh->state)); CHECK_DEVLOCK(); PRINTK("init_stripe called, stripe %llu\n", @@ -269,8 +265,7 @@ static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector init_stripe(sh, sector, pd_idx, disks); } else { if (atomic_read(&sh->count)) { - if (!list_empty(&sh->lru)) - BUG(); + BUG_ON(!list_empty(&sh->lru)); } else { if (!test_bit(STRIPE_HANDLE, &sh->state)) atomic_inc(&conf->active_stripes); @@ -465,8 +460,7 @@ static int drop_one_stripe(raid5_conf_t *conf) spin_unlock_irq(&conf->device_lock); if (!sh) return 0; - if (atomic_read(&sh->count)) - BUG(); + BUG_ON(atomic_read(&sh->count)); shrink_buffers(sh, conf->pool_size); kmem_cache_free(conf->slab_cache, sh); atomic_dec(&conf->active_stripes); @@ -882,8 +876,7 @@ static void compute_parity(struct stripe_head *sh, int method) ptr[0] = page_address(sh->dev[pd_idx].page); switch(method) { case READ_MODIFY_WRITE: - if (!test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags)) - BUG(); + BUG_ON(!test_bit(R5_UPTODATE, &sh->dev[pd_idx].flags)); for (i=disks ; i-- ;) { if (i==pd_idx) continue; @@ -896,7 +889,7 @@ static void compute_parity(struct stripe_head *sh, int method) if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) wake_up(&conf->wait_for_overlap); - if (sh->dev[i].written) BUG(); + BUG_ON(sh->dev[i].written); sh->dev[i].written = chosen; check_xor(); } @@ -912,7 +905,7 @@ static void compute_parity(struct stripe_head *sh, int method) if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) wake_up(&conf->wait_for_overlap); - if (sh->dev[i].written) BUG(); + BUG_ON(sh->dev[i].written); sh->dev[i].written = chosen; } break; @@ -995,8 +988,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in if (*bip && (*bip)->bi_sector < bi->bi_sector + ((bi->bi_size)>>9)) goto overlap; - if (*bip && bi->bi_next && (*bip) != bi->bi_next) - BUG(); + BUG_ON(*bip && bi->bi_next && (*bip) != bi->bi_next); if (*bip) bi->bi_next = *bip; *bip = bi; @@ -1430,8 +1422,7 @@ static void handle_stripe(struct stripe_head *sh) set_bit(STRIPE_HANDLE, &sh->state); if (failed == 0) { char *pagea; - if (uptodate != disks) - BUG(); + BUG_ON(uptodate != disks); compute_parity(sh, CHECK_PARITY); uptodate--; pagea = page_address(sh->dev[sh->pd_idx].page); @@ -2096,8 +2087,7 @@ static void raid5d (mddev_t *mddev) list_del_init(first); atomic_inc(&sh->count); - if (atomic_read(&sh->count)!= 1) - BUG(); + BUG_ON(atomic_read(&sh->count)!= 1); spin_unlock_irq(&conf->device_lock); handled++; diff --git a/drivers/md/raid6main.c b/drivers/md/raid6main.c index 6df4930fddec..bc69355e0100 100644 --- a/drivers/md/raid6main.c +++ b/drivers/md/raid6main.c @@ -91,10 +91,8 @@ static void print_raid6_conf (raid6_conf_t *conf); static void __release_stripe(raid6_conf_t *conf, struct stripe_head *sh) { if (atomic_dec_and_test(&sh->count)) { - if (!list_empty(&sh->lru)) - BUG(); - if (atomic_read(&conf->active_stripes)==0) - BUG(); + BUG_ON(!list_empty(&sh->lru)); + BUG_ON(atomic_read(&conf->active_stripes)==0); if (test_bit(STRIPE_HANDLE, &sh->state)) { if (test_bit(STRIPE_DELAYED, &sh->state)) list_add_tail(&sh->lru, &conf->delayed_list); @@ -202,10 +200,8 @@ static void init_stripe(struct stripe_head *sh, sector_t sector, int pd_idx) raid6_conf_t *conf = sh->raid_conf; int disks = conf->raid_disks, i; - if (atomic_read(&sh->count) != 0) - BUG(); - if (test_bit(STRIPE_HANDLE, &sh->state)) - BUG(); + BUG_ON(atomic_read(&sh->count) != 0); + BUG_ON(test_bit(STRIPE_HANDLE, &sh->state)); CHECK_DEVLOCK(); PRINTK("init_stripe called, stripe %llu\n", @@ -284,13 +280,11 @@ static struct stripe_head *get_active_stripe(raid6_conf_t *conf, sector_t sector init_stripe(sh, sector, pd_idx); } else { if (atomic_read(&sh->count)) { - if (!list_empty(&sh->lru)) - BUG(); + BUG_ON(!list_empty(&sh->lru)); } else { if (!test_bit(STRIPE_HANDLE, &sh->state)) atomic_inc(&conf->active_stripes); - if (list_empty(&sh->lru)) - BUG(); + BUG_ON(list_empty(&sh->lru)); list_del_init(&sh->lru); } } @@ -353,8 +347,7 @@ static int drop_one_stripe(raid6_conf_t *conf) spin_unlock_irq(&conf->device_lock); if (!sh) return 0; - if (atomic_read(&sh->count)) - BUG(); + BUG_ON(atomic_read(&sh->count)); shrink_buffers(sh, conf->raid_disks); kmem_cache_free(conf->slab_cache, sh); atomic_dec(&conf->active_stripes); @@ -780,7 +773,7 @@ static void compute_parity(struct stripe_head *sh, int method) if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags)) wake_up(&conf->wait_for_overlap); - if (sh->dev[i].written) BUG(); + BUG_ON(sh->dev[i].written); sh->dev[i].written = chosen; } break; @@ -970,8 +963,7 @@ static int add_stripe_bio(struct stripe_head *sh, struct bio *bi, int dd_idx, in if (*bip && (*bip)->bi_sector < bi->bi_sector + ((bi->bi_size)>>9)) goto overlap; - if (*bip && bi->bi_next && (*bip) != bi->bi_next) - BUG(); + BUG_ON(*bip && bi->bi_next && (*bip) != bi->bi_next); if (*bip) bi->bi_next = *bip; *bip = bi; @@ -1906,8 +1898,7 @@ static void raid6d (mddev_t *mddev) list_del_init(first); atomic_inc(&sh->count); - if (atomic_read(&sh->count)!= 1) - BUG(); + BUG_ON(atomic_read(&sh->count)!= 1); spin_unlock_irq(&conf->device_lock); handled++; @@ -2151,6 +2142,8 @@ static int run(mddev_t *mddev) } /* Ok, everything is just fine now */ + sysfs_create_group(&mddev->kobj, &raid6_attrs_group); + mddev->array_size = mddev->size * (mddev->raid_disks - 2); mddev->queue->unplug_fn = raid6_unplug_device; |