diff options
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r-- | drivers/md/raid5.c | 169 |
1 files changed, 91 insertions, 78 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 8912407a4dd0..92ac251e91e6 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -659,6 +659,7 @@ raid5_get_active_stripe(struct r5conf *conf, sector_t sector, { struct stripe_head *sh; int hash = stripe_hash_locks_hash(sector); + int inc_empty_inactive_list_flag; pr_debug("get_stripe, sector %llu\n", (unsigned long long)sector); @@ -703,7 +704,12 @@ raid5_get_active_stripe(struct r5conf *conf, sector_t sector, atomic_inc(&conf->active_stripes); BUG_ON(list_empty(&sh->lru) && !test_bit(STRIPE_EXPANDING, &sh->state)); + inc_empty_inactive_list_flag = 0; + if (!list_empty(conf->inactive_list + hash)) + inc_empty_inactive_list_flag = 1; list_del_init(&sh->lru); + if (list_empty(conf->inactive_list + hash) && inc_empty_inactive_list_flag) + atomic_inc(&conf->empty_inactive_list_nr); if (sh->group) { sh->group->stripes_cnt--; sh->group = NULL; @@ -762,6 +768,7 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh sector_t head_sector, tmp_sec; int hash; int dd_idx; + int inc_empty_inactive_list_flag; /* Don't cross chunks, so stripe pd_idx/qd_idx is the same */ tmp_sec = sh->sector; @@ -779,7 +786,12 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh atomic_inc(&conf->active_stripes); BUG_ON(list_empty(&head->lru) && !test_bit(STRIPE_EXPANDING, &head->state)); + inc_empty_inactive_list_flag = 0; + if (!list_empty(conf->inactive_list + hash)) + inc_empty_inactive_list_flag = 1; list_del_init(&head->lru); + if (list_empty(conf->inactive_list + hash) && inc_empty_inactive_list_flag) + atomic_inc(&conf->empty_inactive_list_nr); if (head->group) { head->group->stripes_cnt--; head->group = NULL; @@ -993,7 +1005,6 @@ again: set_bit(STRIPE_IO_STARTED, &sh->state); - bio_reset(bi); bi->bi_bdev = rdev->bdev; bio_set_op_attrs(bi, op, op_flags); bi->bi_end_io = op_is_write(op) @@ -1045,7 +1056,6 @@ again: set_bit(STRIPE_IO_STARTED, &sh->state); - bio_reset(rbi); rbi->bi_bdev = rrdev->bdev; bio_set_op_attrs(rbi, op, op_flags); BUG_ON(!op_is_write(op)); @@ -1978,9 +1988,11 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) put_cpu(); } -static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp) +static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp, + int disks) { struct stripe_head *sh; + int i; sh = kmem_cache_zalloc(sc, gfp); if (sh) { @@ -1989,6 +2001,17 @@ static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp) INIT_LIST_HEAD(&sh->batch_list); INIT_LIST_HEAD(&sh->lru); atomic_set(&sh->count, 1); + for (i = 0; i < disks; i++) { + struct r5dev *dev = &sh->dev[i]; + + bio_init(&dev->req); + dev->req.bi_io_vec = &dev->vec; + dev->req.bi_max_vecs = 1; + + bio_init(&dev->rreq); + dev->rreq.bi_io_vec = &dev->rvec; + dev->rreq.bi_max_vecs = 1; + } } return sh; } @@ -1996,7 +2019,7 @@ static int grow_one_stripe(struct r5conf *conf, gfp_t gfp) { struct stripe_head *sh; - sh = alloc_stripe(conf->slab_cache, gfp); + sh = alloc_stripe(conf->slab_cache, gfp, conf->pool_size); if (!sh) return 0; @@ -2167,7 +2190,7 @@ static int resize_stripes(struct r5conf *conf, int newsize) mutex_lock(&conf->cache_size_mutex); for (i = conf->max_nr_stripes; i; i--) { - nsh = alloc_stripe(sc, GFP_KERNEL); + nsh = alloc_stripe(sc, GFP_KERNEL, newsize); if (!nsh) break; @@ -2299,6 +2322,7 @@ static void raid5_end_read_request(struct bio * bi) (unsigned long long)sh->sector, i, atomic_read(&sh->count), bi->bi_error); if (i == disks) { + bio_reset(bi); BUG(); return; } @@ -2399,6 +2423,7 @@ static void raid5_end_read_request(struct bio * bi) } } rdev_dec_pending(rdev, conf->mddev); + bio_reset(bi); clear_bit(R5_LOCKED, &sh->dev[i].flags); set_bit(STRIPE_HANDLE, &sh->state); raid5_release_stripe(sh); @@ -2436,6 +2461,7 @@ static void raid5_end_write_request(struct bio *bi) (unsigned long long)sh->sector, i, atomic_read(&sh->count), bi->bi_error); if (i == disks) { + bio_reset(bi); BUG(); return; } @@ -2472,6 +2498,7 @@ static void raid5_end_write_request(struct bio *bi) if (sh->batch_head && bi->bi_error && !replacement) set_bit(STRIPE_BATCH_ERR, &sh->batch_head->state); + bio_reset(bi); if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags)) clear_bit(R5_LOCKED, &sh->dev[i].flags); set_bit(STRIPE_HANDLE, &sh->state); @@ -2485,16 +2512,6 @@ static void raid5_build_block(struct stripe_head *sh, int i, int previous) { struct r5dev *dev = &sh->dev[i]; - bio_init(&dev->req); - dev->req.bi_io_vec = &dev->vec; - dev->req.bi_max_vecs = 1; - dev->req.bi_private = sh; - - bio_init(&dev->rreq); - dev->rreq.bi_io_vec = &dev->rvec; - dev->rreq.bi_max_vecs = 1; - dev->rreq.bi_private = sh; - dev->flags = 0; dev->sector = raid5_compute_blocknr(sh, i, previous); } @@ -4628,7 +4645,9 @@ finish: } if (!bio_list_empty(&s.return_bi)) { - if (test_bit(MD_CHANGE_PENDING, &conf->mddev->flags)) { + if (test_bit(MD_CHANGE_PENDING, &conf->mddev->flags) && + (s.failed <= conf->max_degraded || + conf->mddev->external == 0)) { spin_lock_irq(&conf->device_lock); bio_list_merge(&conf->return_bi, &s.return_bi); spin_unlock_irq(&conf->device_lock); @@ -6330,22 +6349,20 @@ static int alloc_scratch_buffer(struct r5conf *conf, struct raid5_percpu *percpu return 0; } -static void raid5_free_percpu(struct r5conf *conf) +static int raid456_cpu_dead(unsigned int cpu, struct hlist_node *node) { - unsigned long cpu; + struct r5conf *conf = hlist_entry_safe(node, struct r5conf, node); + + free_scratch_buffer(conf, per_cpu_ptr(conf->percpu, cpu)); + return 0; +} +static void raid5_free_percpu(struct r5conf *conf) +{ if (!conf->percpu) return; -#ifdef CONFIG_HOTPLUG_CPU - unregister_cpu_notifier(&conf->cpu_notify); -#endif - - get_online_cpus(); - for_each_possible_cpu(cpu) - free_scratch_buffer(conf, per_cpu_ptr(conf->percpu, cpu)); - put_online_cpus(); - + cpuhp_state_remove_instance(CPUHP_MD_RAID5_PREPARE, &conf->node); free_percpu(conf->percpu); } @@ -6353,7 +6370,7 @@ static void free_conf(struct r5conf *conf) { if (conf->log) r5l_exit_log(conf->log); - if (conf->shrinker.seeks) + if (conf->shrinker.nr_deferred) unregister_shrinker(&conf->shrinker); free_thread_groups(conf); @@ -6364,64 +6381,28 @@ static void free_conf(struct r5conf *conf) kfree(conf); } -#ifdef CONFIG_HOTPLUG_CPU -static int raid456_cpu_notify(struct notifier_block *nfb, unsigned long action, - void *hcpu) +static int raid456_cpu_up_prepare(unsigned int cpu, struct hlist_node *node) { - struct r5conf *conf = container_of(nfb, struct r5conf, cpu_notify); - long cpu = (long)hcpu; + struct r5conf *conf = hlist_entry_safe(node, struct r5conf, node); struct raid5_percpu *percpu = per_cpu_ptr(conf->percpu, cpu); - switch (action) { - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - if (alloc_scratch_buffer(conf, percpu)) { - pr_err("%s: failed memory allocation for cpu%ld\n", - __func__, cpu); - return notifier_from_errno(-ENOMEM); - } - break; - case CPU_DEAD: - case CPU_DEAD_FROZEN: - case CPU_UP_CANCELED: - case CPU_UP_CANCELED_FROZEN: - free_scratch_buffer(conf, per_cpu_ptr(conf->percpu, cpu)); - break; - default: - break; + if (alloc_scratch_buffer(conf, percpu)) { + pr_err("%s: failed memory allocation for cpu%u\n", + __func__, cpu); + return -ENOMEM; } - return NOTIFY_OK; + return 0; } -#endif static int raid5_alloc_percpu(struct r5conf *conf) { - unsigned long cpu; int err = 0; conf->percpu = alloc_percpu(struct raid5_percpu); if (!conf->percpu) return -ENOMEM; -#ifdef CONFIG_HOTPLUG_CPU - conf->cpu_notify.notifier_call = raid456_cpu_notify; - conf->cpu_notify.priority = 0; - err = register_cpu_notifier(&conf->cpu_notify); - if (err) - return err; -#endif - - get_online_cpus(); - for_each_present_cpu(cpu) { - err = alloc_scratch_buffer(conf, per_cpu_ptr(conf->percpu, cpu)); - if (err) { - pr_err("%s: failed memory allocation for cpu%ld\n", - __func__, cpu); - break; - } - } - put_online_cpus(); - + err = cpuhp_state_add_instance(CPUHP_MD_RAID5_PREPARE, &conf->node); if (!err) { conf->scribble_disks = max(conf->raid_disks, conf->previous_raid_disks); @@ -6620,6 +6601,16 @@ static struct r5conf *setup_conf(struct mddev *mddev) } conf->min_nr_stripes = NR_STRIPES; + if (mddev->reshape_position != MaxSector) { + int stripes = max_t(int, + ((mddev->chunk_sectors << 9) / STRIPE_SIZE) * 4, + ((mddev->new_chunk_sectors << 9) / STRIPE_SIZE) * 4); + conf->min_nr_stripes = max(NR_STRIPES, stripes); + if (conf->min_nr_stripes != NR_STRIPES) + printk(KERN_INFO + "md/raid:%s: force stripe size %d for reshape\n", + mdname(mddev), conf->min_nr_stripes); + } memory = conf->min_nr_stripes * (sizeof(struct stripe_head) + max_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024; atomic_set(&conf->empty_inactive_list_nr, NR_STRIPE_HASH_LOCKS); @@ -6641,7 +6632,12 @@ static struct r5conf *setup_conf(struct mddev *mddev) conf->shrinker.count_objects = raid5_cache_count; conf->shrinker.batch = 128; conf->shrinker.flags = 0; - register_shrinker(&conf->shrinker); + if (register_shrinker(&conf->shrinker)) { + printk(KERN_ERR + "md/raid:%s: couldn't register shrinker.\n", + mdname(mddev)); + goto abort; + } sprintf(pers_name, "raid%d", mddev->new_level); conf->thread = md_register_thread(raid5d, mddev, pers_name); @@ -6826,11 +6822,14 @@ static int raid5_run(struct mddev *mddev) if (IS_ERR(conf)) return PTR_ERR(conf); - if (test_bit(MD_HAS_JOURNAL, &mddev->flags) && !journal_dev) { - printk(KERN_ERR "md/raid:%s: journal disk is missing, force array readonly\n", - mdname(mddev)); - mddev->ro = 1; - set_disk_ro(mddev->gendisk, 1); + if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) { + if (!journal_dev) { + pr_err("md/raid:%s: journal disk is missing, force array readonly\n", + mdname(mddev)); + mddev->ro = 1; + set_disk_ro(mddev->gendisk, 1); + } else if (mddev->recovery_cp == MaxSector) + set_bit(MD_JOURNAL_CLEAN, &mddev->flags); } conf->min_offset_diff = min_offset_diff; @@ -7034,6 +7033,8 @@ static int raid5_run(struct mddev *mddev) else queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); + + blk_queue_max_hw_sectors(mddev->queue, UINT_MAX); } if (journal_dev) { @@ -7953,10 +7954,21 @@ static struct md_personality raid4_personality = static int __init raid5_init(void) { + int ret; + raid5_wq = alloc_workqueue("raid5wq", WQ_UNBOUND|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE|WQ_SYSFS, 0); if (!raid5_wq) return -ENOMEM; + + ret = cpuhp_setup_state_multi(CPUHP_MD_RAID5_PREPARE, + "md/raid5:prepare", + raid456_cpu_up_prepare, + raid456_cpu_dead); + if (ret) { + destroy_workqueue(raid5_wq); + return ret; + } register_md_personality(&raid6_personality); register_md_personality(&raid5_personality); register_md_personality(&raid4_personality); @@ -7968,6 +7980,7 @@ static void raid5_exit(void) unregister_md_personality(&raid6_personality); unregister_md_personality(&raid5_personality); unregister_md_personality(&raid4_personality); + cpuhp_remove_multi_state(CPUHP_MD_RAID5_PREPARE); destroy_workqueue(raid5_wq); } |