diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/bitmap.c | 45 | ||||
-rw-r--r-- | drivers/md/dm-mpath.c | 16 | ||||
-rw-r--r-- | drivers/md/dm.c | 12 | ||||
-rw-r--r-- | drivers/md/md.c | 43 | ||||
-rw-r--r-- | drivers/md/raid10.c | 9 | ||||
-rw-r--r-- | drivers/md/raid5.c | 32 |
6 files changed, 127 insertions, 30 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 7e65bad522cb..ac89a5deaca2 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -238,15 +238,47 @@ static struct page *read_sb_page(mddev_t *mddev, long offset, unsigned long inde } +static mdk_rdev_t *next_active_rdev(mdk_rdev_t *rdev, mddev_t *mddev) +{ + /* Iterate the disks of an mddev, using rcu to protect access to the + * linked list, and raising the refcount of devices we return to ensure + * they don't disappear while in use. + * As devices are only added or removed when raid_disk is < 0 and + * nr_pending is 0 and In_sync is clear, the entries we return will + * still be in the same position on the list when we re-enter + * list_for_each_continue_rcu. + */ + struct list_head *pos; + rcu_read_lock(); + if (rdev == NULL) + /* start at the beginning */ + pos = &mddev->disks; + else { + /* release the previous rdev and start from there. */ + rdev_dec_pending(rdev, mddev); + pos = &rdev->same_set; + } + list_for_each_continue_rcu(pos, &mddev->disks) { + rdev = list_entry(pos, mdk_rdev_t, same_set); + if (rdev->raid_disk >= 0 && + test_bit(In_sync, &rdev->flags) && + !test_bit(Faulty, &rdev->flags)) { + /* this is a usable devices */ + atomic_inc(&rdev->nr_pending); + rcu_read_unlock(); + return rdev; + } + } + rcu_read_unlock(); + return NULL; +} + static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait) { - mdk_rdev_t *rdev; + mdk_rdev_t *rdev = NULL; mddev_t *mddev = bitmap->mddev; - rcu_read_lock(); - rdev_for_each_rcu(rdev, mddev) - if (test_bit(In_sync, &rdev->flags) - && !test_bit(Faulty, &rdev->flags)) { + while ((rdev = next_active_rdev(rdev, mddev)) != NULL) { int size = PAGE_SIZE; if (page->index == bitmap->file_pages-1) size = roundup(bitmap->last_page_size, @@ -281,8 +313,7 @@ static int write_sb_page(struct bitmap *bitmap, struct page *page, int wait) + page->index * (PAGE_SIZE/512), size, page); - } - rcu_read_unlock(); + } if (wait) md_super_wait(mddev); diff --git a/drivers/md/dm-mpath.c b/drivers/md/dm-mpath.c index 71dd65aa31b6..c2fcf28b4c70 100644 --- a/drivers/md/dm-mpath.c +++ b/drivers/md/dm-mpath.c @@ -63,6 +63,7 @@ struct multipath { const char *hw_handler_name; struct work_struct activate_path; + struct pgpath *pgpath_to_activate; unsigned nr_priority_groups; struct list_head priority_groups; unsigned pg_init_required; /* pg_init needs calling? */ @@ -146,6 +147,7 @@ static struct priority_group *alloc_priority_group(void) static void free_pgpaths(struct list_head *pgpaths, struct dm_target *ti) { + unsigned long flags; struct pgpath *pgpath, *tmp; struct multipath *m = ti->private; @@ -154,6 +156,10 @@ static void free_pgpaths(struct list_head *pgpaths, struct dm_target *ti) if (m->hw_handler_name) scsi_dh_detach(bdev_get_queue(pgpath->path.dev->bdev)); dm_put_device(ti, pgpath->path.dev); + spin_lock_irqsave(&m->lock, flags); + if (m->pgpath_to_activate == pgpath) + m->pgpath_to_activate = NULL; + spin_unlock_irqrestore(&m->lock, flags); free_pgpath(pgpath); } } @@ -421,6 +427,7 @@ static void process_queued_ios(struct work_struct *work) __choose_pgpath(m); pgpath = m->current_pgpath; + m->pgpath_to_activate = m->current_pgpath; if ((pgpath && !m->queue_io) || (!pgpath && !m->queue_if_no_path)) @@ -1093,8 +1100,15 @@ static void activate_path(struct work_struct *work) int ret; struct multipath *m = container_of(work, struct multipath, activate_path); - struct dm_path *path = &m->current_pgpath->path; + struct dm_path *path; + unsigned long flags; + spin_lock_irqsave(&m->lock, flags); + path = &m->pgpath_to_activate->path; + m->pgpath_to_activate = NULL; + spin_unlock_irqrestore(&m->lock, flags); + if (!path) + return; ret = scsi_dh_activate(bdev_get_queue(path->dev->bdev)); pg_init_done(path, ret); } diff --git a/drivers/md/dm.c b/drivers/md/dm.c index bca448e11878..ace998ce59f6 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -837,12 +837,14 @@ static int dm_merge_bvec(struct request_queue *q, struct dm_table *map = dm_get_table(md); struct dm_target *ti; sector_t max_sectors; - int max_size; + int max_size = 0; if (unlikely(!map)) - return 0; + goto out; ti = dm_table_find_target(map, bvm->bi_sector); + if (!dm_target_is_valid(ti)) + goto out_table; /* * Find maximum amount of I/O that won't need splitting @@ -861,14 +863,16 @@ static int dm_merge_bvec(struct request_queue *q, if (max_size && ti->type->merge) max_size = ti->type->merge(ti, bvm, biovec, max_size); +out_table: + dm_table_put(map); + +out: /* * Always allow an entire first page */ if (max_size <= biovec->bv_len && !(bvm->bi_size >> SECTOR_SHIFT)) max_size = biovec->bv_len; - dm_table_put(map); - return max_size; } diff --git a/drivers/md/md.c b/drivers/md/md.c index c7aae66c6f9b..deeac4b44173 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -2393,6 +2393,8 @@ static void analyze_sbs(mddev_t * mddev) } +static void md_safemode_timeout(unsigned long data); + static ssize_t safe_delay_show(mddev_t *mddev, char *page) { @@ -2432,9 +2434,12 @@ safe_delay_store(mddev_t *mddev, const char *cbuf, size_t len) if (msec == 0) mddev->safemode_delay = 0; else { + unsigned long old_delay = mddev->safemode_delay; mddev->safemode_delay = (msec*HZ)/1000; if (mddev->safemode_delay == 0) mddev->safemode_delay = 1; + if (mddev->safemode_delay < old_delay) + md_safemode_timeout((unsigned long)mddev); } return len; } @@ -3836,8 +3841,6 @@ static int do_md_stop(mddev_t * mddev, int mode, int is_open) del_timer_sync(&mddev->safemode_timer); - invalidate_partition(disk, 0); - switch(mode) { case 1: /* readonly */ err = -ENXIO; @@ -4634,6 +4637,11 @@ static int update_size(mddev_t *mddev, sector_t num_sectors) */ if (mddev->sync_thread) return -EBUSY; + if (mddev->bitmap) + /* Sorry, cannot grow a bitmap yet, just remove it, + * grow, and re-add. + */ + return -EBUSY; rdev_for_each(rdev, tmp, mddev) { sector_t avail; avail = rdev->size * 2; @@ -5753,7 +5761,11 @@ void md_do_sync(mddev_t *mddev) * time 'round when curr_resync == 2 */ continue; - prepare_to_wait(&resync_wait, &wq, TASK_UNINTERRUPTIBLE); + /* We need to wait 'interruptible' so as not to + * contribute to the load average, and not to + * be caught by 'softlockup' + */ + prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE); if (!kthread_should_stop() && mddev2->curr_resync >= mddev->curr_resync) { printk(KERN_INFO "md: delaying %s of %s" @@ -5761,6 +5773,8 @@ void md_do_sync(mddev_t *mddev) " share one or more physical units)\n", desc, mdname(mddev), mdname(mddev2)); mddev_put(mddev2); + if (signal_pending(current)) + flush_signals(current); schedule(); finish_wait(&resync_wait, &wq); goto try_again; @@ -5993,7 +6007,7 @@ static int remove_and_add_spares(mddev_t *mddev) } } - if (mddev->degraded) { + if (mddev->degraded && ! mddev->ro) { rdev_for_each(rdev, rtmp, mddev) { if (rdev->raid_disk >= 0 && !test_bit(In_sync, &rdev->flags) && @@ -6067,6 +6081,8 @@ void md_check_recovery(mddev_t *mddev) flush_signals(current); } + if (mddev->ro && !test_bit(MD_RECOVERY_NEEDED, &mddev->recovery)) + return; if ( ! ( (mddev->flags && !mddev->external) || test_bit(MD_RECOVERY_NEEDED, &mddev->recovery) || @@ -6080,6 +6096,15 @@ void md_check_recovery(mddev_t *mddev) if (mddev_trylock(mddev)) { int spares = 0; + if (mddev->ro) { + /* Only thing we do on a ro array is remove + * failed devices. + */ + remove_and_add_spares(mddev); + clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery); + goto unlock; + } + if (!mddev->external) { int did_change = 0; spin_lock_irq(&mddev->write_lock); @@ -6117,7 +6142,8 @@ void md_check_recovery(mddev_t *mddev) /* resync has finished, collect result */ md_unregister_thread(mddev->sync_thread); mddev->sync_thread = NULL; - if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) { + if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) && + !test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) { /* success...*/ /* activate any spares */ if (mddev->pers->spare_active(mddev)) @@ -6169,6 +6195,7 @@ void md_check_recovery(mddev_t *mddev) } else if ((spares = remove_and_add_spares(mddev))) { clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); clear_bit(MD_RECOVERY_CHECK, &mddev->recovery); + clear_bit(MD_RECOVERY_REQUESTED, &mddev->recovery); set_bit(MD_RECOVERY_RECOVER, &mddev->recovery); } else if (mddev->recovery_cp < MaxSector) { set_bit(MD_RECOVERY_SYNC, &mddev->recovery); @@ -6232,7 +6259,11 @@ static int md_notify_reboot(struct notifier_block *this, for_each_mddev(mddev, tmp) if (mddev_trylock(mddev)) { - do_md_stop (mddev, 1, 0); + /* Force a switch to readonly even array + * appears to still be in use. Hence + * the '100'. + */ + do_md_stop (mddev, 1, 100); mddev_unlock(mddev); } /* diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index d41bebb6da0f..e34cd0e62473 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -76,11 +76,13 @@ static void r10bio_pool_free(void *r10_bio, void *data) kfree(r10_bio); } +/* Maximum size of each resync request */ #define RESYNC_BLOCK_SIZE (64*1024) -//#define RESYNC_BLOCK_SIZE PAGE_SIZE -#define RESYNC_SECTORS (RESYNC_BLOCK_SIZE >> 9) #define RESYNC_PAGES ((RESYNC_BLOCK_SIZE + PAGE_SIZE-1) / PAGE_SIZE) -#define RESYNC_WINDOW (2048*1024) +/* amount of memory to reserve for resync requests */ +#define RESYNC_WINDOW (1024*1024) +/* maximum number of concurrent requests, memory permitting */ +#define RESYNC_DEPTH (32*1024*1024/RESYNC_BLOCK_SIZE) /* * When performing a resync, we need to read and compare, so @@ -690,7 +692,6 @@ static int flush_pending_writes(conf_t *conf) * there is no normal IO happeing. It must arrange to call * lower_barrier when the particular background IO completes. */ -#define RESYNC_DEPTH 32 static void raise_barrier(conf_t *conf, int force) { diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 40e939675657..224de022e7c5 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -2568,10 +2568,10 @@ static bool handle_stripe5(struct stripe_head *sh) if (dev->written) s.written++; rdev = rcu_dereference(conf->disks[i].rdev); - if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) { + if (blocked_rdev == NULL && + rdev && unlikely(test_bit(Blocked, &rdev->flags))) { blocked_rdev = rdev; atomic_inc(&rdev->nr_pending); - break; } if (!rdev || !test_bit(In_sync, &rdev->flags)) { /* The ReadError flag will just be confusing now */ @@ -2588,8 +2588,14 @@ static bool handle_stripe5(struct stripe_head *sh) rcu_read_unlock(); if (unlikely(blocked_rdev)) { - set_bit(STRIPE_HANDLE, &sh->state); - goto unlock; + if (s.syncing || s.expanding || s.expanded || + s.to_write || s.written) { + set_bit(STRIPE_HANDLE, &sh->state); + goto unlock; + } + /* There is nothing for the blocked_rdev to block */ + rdev_dec_pending(blocked_rdev, conf->mddev); + blocked_rdev = NULL; } if (s.to_fill && !test_bit(STRIPE_BIOFILL_RUN, &sh->state)) { @@ -2832,10 +2838,10 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) if (dev->written) s.written++; rdev = rcu_dereference(conf->disks[i].rdev); - if (rdev && unlikely(test_bit(Blocked, &rdev->flags))) { + if (blocked_rdev == NULL && + rdev && unlikely(test_bit(Blocked, &rdev->flags))) { blocked_rdev = rdev; atomic_inc(&rdev->nr_pending); - break; } if (!rdev || !test_bit(In_sync, &rdev->flags)) { /* The ReadError flag will just be confusing now */ @@ -2853,9 +2859,16 @@ static bool handle_stripe6(struct stripe_head *sh, struct page *tmp_page) rcu_read_unlock(); if (unlikely(blocked_rdev)) { - set_bit(STRIPE_HANDLE, &sh->state); - goto unlock; + if (s.syncing || s.expanding || s.expanded || + s.to_write || s.written) { + set_bit(STRIPE_HANDLE, &sh->state); + goto unlock; + } + /* There is nothing for the blocked_rdev to block */ + rdev_dec_pending(blocked_rdev, conf->mddev); + blocked_rdev = NULL; } + pr_debug("locked=%d uptodate=%d to_read=%d" " to_write=%d failed=%d failed_num=%d,%d\n", s.locked, s.uptodate, s.to_read, s.to_write, s.failed, @@ -4446,6 +4459,9 @@ static int raid5_check_reshape(mddev_t *mddev) return -EINVAL; /* Cannot shrink array or change level yet */ if (mddev->delta_disks == 0) return 0; /* nothing to do */ + if (mddev->bitmap) + /* Cannot grow a bitmap yet */ + return -EBUSY; /* Can only proceed if there are plenty of stripe_heads. * We need a minimum of one full stripe,, and for sensible progress |