diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/dm-bio-list.h | 3 | ||||
-rw-r--r-- | drivers/md/dm-ioctl.c | 3 | ||||
-rw-r--r-- | drivers/md/dm-log.c | 4 | ||||
-rw-r--r-- | drivers/md/dm-mpath.c | 13 | ||||
-rw-r--r-- | drivers/md/dm-raid1.c | 20 | ||||
-rw-r--r-- | drivers/md/dm-table.c | 2 | ||||
-rw-r--r-- | drivers/md/md.c | 6 | ||||
-rw-r--r-- | drivers/md/raid1.c | 13 | ||||
-rw-r--r-- | drivers/md/raid10.c | 6 | ||||
-rw-r--r-- | drivers/md/raid5.c | 9 | ||||
-rw-r--r-- | drivers/md/raid6main.c | 27 |
11 files changed, 69 insertions, 37 deletions
diff --git a/drivers/md/dm-bio-list.h b/drivers/md/dm-bio-list.h index bc021e1fd4d1..bbf4615f0e30 100644 --- a/drivers/md/dm-bio-list.h +++ b/drivers/md/dm-bio-list.h @@ -33,6 +33,9 @@ static inline void bio_list_add(struct bio_list *bl, struct bio *bio) static inline void bio_list_merge(struct bio_list *bl, struct bio_list *bl2) { + if (!bl2->head) + return; + if (bl->tail) bl->tail->bi_next = bl2->head; else diff --git a/drivers/md/dm-ioctl.c b/drivers/md/dm-ioctl.c index 54ec737195e0..07d44e19536e 100644 --- a/drivers/md/dm-ioctl.c +++ b/drivers/md/dm-ioctl.c @@ -425,8 +425,8 @@ static void list_version_get_needed(struct target_type *tt, void *needed_param) { size_t *needed = needed_param; + *needed += sizeof(struct dm_target_versions); *needed += strlen(tt->name); - *needed += sizeof(tt->version); *needed += ALIGN_MASK; } @@ -974,6 +974,7 @@ static int table_load(struct dm_ioctl *param, size_t param_size) if (!hc) { DMWARN("device doesn't appear to be in the dev hash table."); up_write(&_hash_lock); + dm_table_put(t); return -ENXIO; } diff --git a/drivers/md/dm-log.c b/drivers/md/dm-log.c index e110655eabdb..a76349cb10a5 100644 --- a/drivers/md/dm-log.c +++ b/drivers/md/dm-log.c @@ -333,10 +333,10 @@ static int core_ctr(struct dirty_log *log, struct dm_target *ti, lc->sync = sync; /* - * Work out how many words we need to hold the bitset. + * Work out how many "unsigned long"s we need to hold the bitset. */ bitset_size = dm_round_up(region_count, - sizeof(*lc->clean_bits) << BYTE_SHIFT); + sizeof(unsigned long) << BYTE_SHIFT); bitset_size >>= BYTE_SHIFT; lc->bitset_uint32_count = bitset_size / 4; diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index f9b7b32d5d5c..f72a82fb9434 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -1000,6 +1000,7 @@ static int do_end_io(struct multipath *m, struct bio *bio, { struct hw_handler *hwh = &m->hw_handler; unsigned err_flags = MP_FAIL_PATH; /* Default behavior */ + unsigned long flags; if (!error) return 0; /* I/O complete */ @@ -1010,17 +1011,17 @@ static int do_end_io(struct multipath *m, struct bio *bio, if (error == -EOPNOTSUPP) return error; - spin_lock(&m->lock); + spin_lock_irqsave(&m->lock, flags); if (!m->nr_valid_paths) { if (!m->queue_if_no_path) { - spin_unlock(&m->lock); + spin_unlock_irqrestore(&m->lock, flags); return -EIO; } else { - spin_unlock(&m->lock); + spin_unlock_irqrestore(&m->lock, flags); goto requeue; } } - spin_unlock(&m->lock); + spin_unlock_irqrestore(&m->lock, flags); if (hwh->type && hwh->type->error) err_flags = hwh->type->error(hwh, bio); @@ -1040,12 +1041,12 @@ static int do_end_io(struct multipath *m, struct bio *bio, dm_bio_restore(&mpio->details, bio); /* queue for the daemon to resubmit or fail */ - spin_lock(&m->lock); + spin_lock_irqsave(&m->lock, flags); bio_list_add(&m->queued_ios, bio); m->queue_size++; if (!m->queue_io) queue_work(kmultipathd, &m->process_queued_ios); - spin_unlock(&m->lock); + spin_unlock_irqrestore(&m->lock, flags); return 1; /* io not complete */ } diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index 2375709a392c..6b0fc1670929 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -376,16 +376,18 @@ static void rh_inc(struct region_hash *rh, region_t region) read_lock(&rh->hash_lock); reg = __rh_find(rh, region); + spin_lock_irq(&rh->region_lock); atomic_inc(®->pending); - spin_lock_irq(&rh->region_lock); if (reg->state == RH_CLEAN) { - rh->log->type->mark_region(rh->log, reg->key); - reg->state = RH_DIRTY; list_del_init(®->list); /* take off the clean list */ - } - spin_unlock_irq(&rh->region_lock); + spin_unlock_irq(&rh->region_lock); + + rh->log->type->mark_region(rh->log, reg->key); + } else + spin_unlock_irq(&rh->region_lock); + read_unlock(&rh->hash_lock); } @@ -408,21 +410,17 @@ static void rh_dec(struct region_hash *rh, region_t region) reg = __rh_lookup(rh, region); read_unlock(&rh->hash_lock); + spin_lock_irqsave(&rh->region_lock, flags); if (atomic_dec_and_test(®->pending)) { - spin_lock_irqsave(&rh->region_lock, flags); - if (atomic_read(®->pending)) { /* check race */ - spin_unlock_irqrestore(&rh->region_lock, flags); - return; - } if (reg->state == RH_RECOVERING) { list_add_tail(®->list, &rh->quiesced_regions); } else { reg->state = RH_CLEAN; list_add(®->list, &rh->clean_regions); } - spin_unlock_irqrestore(&rh->region_lock, flags); should_wake = 1; } + spin_unlock_irqrestore(&rh->region_lock, flags); if (should_wake) wake(); diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c index a6d3baa46f61..a6f2dc66c3db 100644 --- a/drivers/md/dm-table.c +++ b/drivers/md/dm-table.c @@ -638,7 +638,7 @@ int dm_split_args(int *argc, char ***argvp, char *input) static void check_for_valid_limits(struct io_restrictions *rs) { if (!rs->max_sectors) - rs->max_sectors = MAX_SECTORS; + rs->max_sectors = SAFE_MAX_SECTORS; if (!rs->max_phys_segments) rs->max_phys_segments = MAX_PHYS_SEGMENTS; if (!rs->max_hw_segments) diff --git a/drivers/md/md.c b/drivers/md/md.c index 78c7418478d6..8175a2a222da 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1028,7 +1028,6 @@ static int super_1_validate(mddev_t *mddev, mdk_rdev_t *rdev) mddev->size = le64_to_cpu(sb->size)/2; mddev->events = le64_to_cpu(sb->events); mddev->bitmap_offset = 0; - mddev->default_bitmap_offset = 0; mddev->default_bitmap_offset = 1024; mddev->recovery_cp = le64_to_cpu(sb->resync_offset); @@ -1730,7 +1729,7 @@ level_show(mddev_t *mddev, char *page) if (p == NULL && mddev->raid_disks == 0) return 0; if (mddev->level >= 0) - return sprintf(page, "RAID-%d\n", mddev->level); + return sprintf(page, "raid%d\n", mddev->level); else return sprintf(page, "%s\n", p->name); } @@ -2932,6 +2931,9 @@ static int set_array_info(mddev_t * mddev, mdu_array_info_t *info) mddev->sb_dirty = 1; + mddev->default_bitmap_offset = MD_SB_BYTES >> 9; + mddev->bitmap_offset = 0; + /* * Generate a 128 bit UUID */ diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 2da9d3ba902d..229d7b204297 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -320,7 +320,6 @@ static int raid1_end_write_request(struct bio *bio, unsigned int bytes_done, int * this branch is our 'one mirror IO has finished' event handler: */ r1_bio->bios[mirror] = NULL; - bio_put(bio); if (!uptodate) { md_error(r1_bio->mddev, conf->mirrors[mirror].rdev); /* an I/O failed, we can't clear the bitmap */ @@ -377,7 +376,6 @@ static int raid1_end_write_request(struct bio *bio, unsigned int bytes_done, int } if (test_bit(R1BIO_BehindIO, &r1_bio->state)) { /* free extra copy of the data pages */ -/* FIXME bio has been freed!!! */ int i = bio->bi_vcnt; while (i--) __free_page(bio->bi_io_vec[i].bv_page); @@ -391,6 +389,9 @@ static int raid1_end_write_request(struct bio *bio, unsigned int bytes_done, int raid_end_bio_io(r1_bio); } + if (r1_bio->bios[mirror]==NULL) + bio_put(bio); + rdev_dec_pending(conf->mirrors[mirror].rdev, conf->mddev); return 0; } @@ -953,9 +954,6 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) int mirror = 0; mirror_info_t *p; - if (rdev->saved_raid_disk >= 0 && - conf->mirrors[rdev->saved_raid_disk].rdev == NULL) - mirror = rdev->saved_raid_disk; for (mirror=0; mirror < mddev->raid_disks; mirror++) if ( !(p=conf->mirrors+mirror)->rdev) { @@ -972,7 +970,10 @@ static int raid1_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) p->head_position = 0; rdev->raid_disk = mirror; found = 1; - if (rdev->saved_raid_disk != mirror) + /* As all devices are equivalent, we don't need a full recovery + * if this was recently any drive of the array + */ + if (rdev->saved_raid_disk < 0) conf->fullsync = 1; rcu_assign_pointer(p->rdev, rdev); break; diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 867f06ae33d9..713dc9c2c730 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -552,7 +552,11 @@ static int read_balance(conf_t *conf, r10bio_t *r10_bio) !test_bit(In_sync, &rdev->flags)) continue; - if (!atomic_read(&rdev->nr_pending)) { + /* This optimisation is debatable, and completely destroys + * sequential read speed for 'far copies' arrays. So only + * keep it for 'near' arrays, and review those later. + */ + if (conf->near_copies > 1 && !atomic_read(&rdev->nr_pending)) { disk = ndisk; slot = nslot; break; diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index e2a40283e323..fafc4bc045f7 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -98,7 +98,7 @@ static inline void __release_stripe(raid5_conf_t *conf, struct stripe_head *sh) list_add_tail(&sh->lru, &conf->inactive_list); atomic_dec(&conf->active_stripes); if (!conf->inactive_blocked || - atomic_read(&conf->active_stripes) < (NR_STRIPES*3/4)) + atomic_read(&conf->active_stripes) < (conf->max_nr_stripes*3/4)) wake_up(&conf->wait_for_stripe); } } @@ -264,7 +264,8 @@ static struct stripe_head *get_active_stripe(raid5_conf_t *conf, sector_t sector conf->inactive_blocked = 1; wait_event_lock_irq(conf->wait_for_stripe, !list_empty(&conf->inactive_list) && - (atomic_read(&conf->active_stripes) < (NR_STRIPES *3/4) + (atomic_read(&conf->active_stripes) + < (conf->max_nr_stripes *3/4) || !conf->inactive_blocked), conf->device_lock, unplug_slaves(conf->mddev); @@ -1704,7 +1705,9 @@ static void raid5d (mddev_t *mddev) if (conf->seq_flush - conf->seq_write > 0) { int seq = conf->seq_flush; + spin_unlock_irq(&conf->device_lock); bitmap_unplug(mddev->bitmap); + spin_lock_irq(&conf->device_lock); conf->seq_write = seq; activate_bit_delay(conf); } @@ -1915,7 +1918,7 @@ static int run(mddev_t *mddev) goto abort; } } -memory = conf->max_nr_stripes * (sizeof(struct stripe_head) + + memory = conf->max_nr_stripes * (sizeof(struct stripe_head) + conf->raid_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024; if (grow_stripes(conf, conf->max_nr_stripes)) { printk(KERN_ERR diff --git a/drivers/md/raid6main.c b/drivers/md/raid6main.c index eae5a35629c5..0000d162d198 100644 --- a/drivers/md/raid6main.c +++ b/drivers/md/raid6main.c @@ -1702,6 +1702,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i int data_disks = raid_disks - 2; sector_t max_sector = mddev->size << 1; int sync_blocks; + int still_degraded = 0; + int i; if (sector_nr >= max_sector) { /* just being told to finish up .. nothing much to do */ @@ -1710,7 +1712,7 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i if (mddev->curr_resync < max_sector) /* aborted */ bitmap_end_sync(mddev->bitmap, mddev->curr_resync, &sync_blocks, 1); - else /* compelted sync */ + else /* completed sync */ conf->fullsync = 0; bitmap_close_sync(mddev->bitmap); @@ -1748,7 +1750,16 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i */ schedule_timeout_uninterruptible(1); } - bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, 0); + /* Need to check if array will still be degraded after recovery/resync + * We don't need to check the 'failed' flag as when that gets set, + * recovery aborts. + */ + for (i=0; i<mddev->raid_disks; i++) + if (conf->disks[i].rdev == NULL) + still_degraded = 1; + + bitmap_start_sync(mddev->bitmap, sector_nr, &sync_blocks, still_degraded); + spin_lock(&sh->lock); set_bit(STRIPE_SYNCING, &sh->state); clear_bit(STRIPE_INSYNC, &sh->state); @@ -1784,7 +1795,9 @@ static void raid6d (mddev_t *mddev) if (conf->seq_flush - conf->seq_write > 0) { int seq = conf->seq_flush; + spin_unlock_irq(&conf->device_lock); bitmap_unplug(mddev->bitmap); + spin_lock_irq(&conf->device_lock); conf->seq_write = seq; activate_bit_delay(conf); } @@ -2145,9 +2158,15 @@ static int raid6_add_disk(mddev_t *mddev, mdk_rdev_t *rdev) /* no point adding a device */ return 0; /* - * find the disk ... + * find the disk ... but prefer rdev->saved_raid_disk + * if possible. */ - for (disk=0; disk < mddev->raid_disks; disk++) + if (rdev->saved_raid_disk >= 0 && + conf->disks[rdev->saved_raid_disk].rdev == NULL) + disk = rdev->saved_raid_disk; + else + disk = 0; + for ( ; disk < mddev->raid_disks; disk++) if ((p=conf->disks + disk)->rdev == NULL) { clear_bit(In_sync, &rdev->flags); rdev->raid_disk = disk; |