diff options
Diffstat (limited to 'drivers/md')
-rw-r--r-- | drivers/md/bcache/btree.c | 6 | ||||
-rw-r--r-- | drivers/md/bcache/debug.c | 6 | ||||
-rw-r--r-- | drivers/md/bcache/movinggc.c | 5 | ||||
-rw-r--r-- | drivers/md/bcache/request.c | 9 | ||||
-rw-r--r-- | drivers/md/bcache/writeback.c | 5 | ||||
-rw-r--r-- | drivers/md/bitmap.c | 51 | ||||
-rw-r--r-- | drivers/md/dm-bufio.c | 2 | ||||
-rw-r--r-- | drivers/md/dm-crypt.c | 9 | ||||
-rw-r--r-- | drivers/md/dm-log-writes.c | 16 | ||||
-rw-r--r-- | drivers/md/dm-rq.c | 2 | ||||
-rw-r--r-- | drivers/md/md-cluster.c | 111 | ||||
-rw-r--r-- | drivers/md/md.c | 84 | ||||
-rw-r--r-- | drivers/md/md.h | 5 | ||||
-rw-r--r-- | drivers/md/raid1.c | 8 | ||||
-rw-r--r-- | drivers/md/raid10.c | 13 | ||||
-rw-r--r-- | drivers/md/raid5-cache.c | 46 | ||||
-rw-r--r-- | drivers/md/raid5.c | 169 | ||||
-rw-r--r-- | drivers/md/raid5.h | 4 |
18 files changed, 307 insertions, 244 deletions
diff --git a/drivers/md/bcache/btree.c b/drivers/md/bcache/btree.c index 76f7534d1dd1..81d3db40cd7b 100644 --- a/drivers/md/bcache/btree.c +++ b/drivers/md/bcache/btree.c @@ -361,12 +361,8 @@ static void __btree_node_write_done(struct closure *cl) static void btree_node_write_done(struct closure *cl) { struct btree *b = container_of(cl, struct btree, io); - struct bio_vec *bv; - int n; - - bio_for_each_segment_all(bv, b->bio, n) - __free_page(bv->bv_page); + bio_free_pages(b->bio); __btree_node_write_done(cl); } diff --git a/drivers/md/bcache/debug.c b/drivers/md/bcache/debug.c index c28df164701e..333a1e5f6ae6 100644 --- a/drivers/md/bcache/debug.c +++ b/drivers/md/bcache/debug.c @@ -107,9 +107,8 @@ void bch_data_verify(struct cached_dev *dc, struct bio *bio) { char name[BDEVNAME_SIZE]; struct bio *check; - struct bio_vec bv, *bv2; + struct bio_vec bv; struct bvec_iter iter; - int i; check = bio_clone(bio, GFP_NOIO); if (!check) @@ -136,8 +135,7 @@ void bch_data_verify(struct cached_dev *dc, struct bio *bio) kunmap_atomic(p1); } - bio_for_each_segment_all(bv2, check, i) - __free_page(bv2->bv_page); + bio_free_pages(check); out_put: bio_put(check); } diff --git a/drivers/md/bcache/movinggc.c b/drivers/md/bcache/movinggc.c index 1881319f2298..5c4bddecfaf0 100644 --- a/drivers/md/bcache/movinggc.c +++ b/drivers/md/bcache/movinggc.c @@ -44,11 +44,8 @@ static void write_moving_finish(struct closure *cl) { struct moving_io *io = container_of(cl, struct moving_io, cl); struct bio *bio = &io->bio.bio; - struct bio_vec *bv; - int i; - bio_for_each_segment_all(bv, bio, i) - __free_page(bv->bv_page); + bio_free_pages(bio); if (io->op.replace_collision) trace_bcache_gc_copy_collision(&io->w->key); diff --git a/drivers/md/bcache/request.c b/drivers/md/bcache/request.c index 4b177fe11ebb..40ffe5e424b3 100644 --- a/drivers/md/bcache/request.c +++ b/drivers/md/bcache/request.c @@ -694,13 +694,8 @@ static void cached_dev_cache_miss_done(struct closure *cl) if (s->iop.replace_collision) bch_mark_cache_miss_collision(s->iop.c, s->d); - if (s->iop.bio) { - int i; - struct bio_vec *bv; - - bio_for_each_segment_all(bv, s->iop.bio, i) - __free_page(bv->bv_page); - } + if (s->iop.bio) + bio_free_pages(s->iop.bio); cached_dev_bio_complete(cl); } diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c index d9fd2a62e5f6..e51644e503a5 100644 --- a/drivers/md/bcache/writeback.c +++ b/drivers/md/bcache/writeback.c @@ -128,11 +128,8 @@ static void write_dirty_finish(struct closure *cl) struct dirty_io *io = container_of(cl, struct dirty_io, cl); struct keybuf_key *w = io->bio.bi_private; struct cached_dev *dc = io->dc; - struct bio_vec *bv; - int i; - bio_for_each_segment_all(bv, &io->bio, i) - __free_page(bv->bv_page); + bio_free_pages(&io->bio); /* This is kind of a dumb way of signalling errors. */ if (KEY_DIRTY(&w->key)) { diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c index 6fff794e0c72..2d826927a3bf 100644 --- a/drivers/md/bitmap.c +++ b/drivers/md/bitmap.c @@ -1903,10 +1903,8 @@ int bitmap_copy_from_slot(struct mddev *mddev, int slot, struct bitmap_counts *counts; struct bitmap *bitmap = bitmap_create(mddev, slot); - if (IS_ERR(bitmap)) { - bitmap_free(bitmap); + if (IS_ERR(bitmap)) return PTR_ERR(bitmap); - } rv = bitmap_init_from_disk(bitmap, 0); if (rv) @@ -2183,19 +2181,29 @@ location_show(struct mddev *mddev, char *page) static ssize_t location_store(struct mddev *mddev, const char *buf, size_t len) { + int rv; + rv = mddev_lock(mddev); + if (rv) + return rv; if (mddev->pers) { - if (!mddev->pers->quiesce) - return -EBUSY; - if (mddev->recovery || mddev->sync_thread) - return -EBUSY; + if (!mddev->pers->quiesce) { + rv = -EBUSY; + goto out; + } + if (mddev->recovery || mddev->sync_thread) { + rv = -EBUSY; + goto out; + } } if (mddev->bitmap || mddev->bitmap_info.file || mddev->bitmap_info.offset) { /* bitmap already configured. Only option is to clear it */ - if (strncmp(buf, "none", 4) != 0) - return -EBUSY; + if (strncmp(buf, "none", 4) != 0) { + rv = -EBUSY; + goto out; + } if (mddev->pers) { mddev->pers->quiesce(mddev, 1); bitmap_destroy(mddev); @@ -2214,21 +2222,25 @@ location_store(struct mddev *mddev, const char *buf, size_t len) /* nothing to be done */; else if (strncmp(buf, "file:", 5) == 0) { /* Not supported yet */ - return -EINVAL; + rv = -EINVAL; + goto out; } else { - int rv; if (buf[0] == '+') rv = kstrtoll(buf+1, 10, &offset); else rv = kstrtoll(buf, 10, &offset); if (rv) - return rv; - if (offset == 0) - return -EINVAL; + goto out; + if (offset == 0) { + rv = -EINVAL; + goto out; + } if (mddev->bitmap_info.external == 0 && mddev->major_version == 0 && - offset != mddev->bitmap_info.default_offset) - return -EINVAL; + offset != mddev->bitmap_info.default_offset) { + rv = -EINVAL; + goto out; + } mddev->bitmap_info.offset = offset; if (mddev->pers) { struct bitmap *bitmap; @@ -2245,7 +2257,7 @@ location_store(struct mddev *mddev, const char *buf, size_t len) mddev->pers->quiesce(mddev, 0); if (rv) { bitmap_destroy(mddev); - return rv; + goto out; } } } @@ -2257,6 +2269,11 @@ location_store(struct mddev *mddev, const char *buf, size_t len) set_bit(MD_CHANGE_DEVS, &mddev->flags); md_wakeup_thread(mddev->thread); } + rv = 0; +out: + mddev_unlock(mddev); + if (rv) + return rv; return len; } diff --git a/drivers/md/dm-bufio.c b/drivers/md/dm-bufio.c index 27f4a802f83c..125aedc3875f 100644 --- a/drivers/md/dm-bufio.c +++ b/drivers/md/dm-bufio.c @@ -1866,7 +1866,7 @@ static int __init dm_bufio_init(void) __cache_size_refresh(); mutex_unlock(&dm_bufio_clients_lock); - dm_bufio_wq = create_singlethread_workqueue("dm_bufio_cache"); + dm_bufio_wq = alloc_workqueue("dm_bufio_cache", WQ_MEM_RECLAIM, 0); if (!dm_bufio_wq) return -ENOMEM; diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c index bcba462a7d14..a2768835d394 100644 --- a/drivers/md/dm-crypt.c +++ b/drivers/md/dm-crypt.c @@ -1454,7 +1454,7 @@ static int crypt_alloc_tfms(struct crypt_config *cc, char *ciphermode) unsigned i; int err; - cc->tfms = kmalloc(cc->tfms_count * sizeof(struct crypto_skcipher *), + cc->tfms = kzalloc(cc->tfms_count * sizeof(struct crypto_skcipher *), GFP_KERNEL); if (!cc->tfms) return -ENOMEM; @@ -1920,6 +1920,13 @@ static int crypt_map(struct dm_target *ti, struct bio *bio) return DM_MAPIO_REMAPPED; } + /* + * Check if bio is too large, split as needed. + */ + if (unlikely(bio->bi_iter.bi_size > (BIO_MAX_PAGES << PAGE_SHIFT)) && + bio_data_dir(bio) == WRITE) + dm_accept_partial_bio(bio, ((BIO_MAX_PAGES << PAGE_SHIFT) >> SECTOR_SHIFT)); + io = dm_per_bio_data(bio, cc->per_bio_data_size); crypt_io_init(io, cc, bio, dm_target_offset(ti, bio->bi_iter.bi_sector)); io->ctx.req = (struct skcipher_request *)(io + 1); diff --git a/drivers/md/dm-log-writes.c b/drivers/md/dm-log-writes.c index 4ab68033f9d1..4dfe38655a49 100644 --- a/drivers/md/dm-log-writes.c +++ b/drivers/md/dm-log-writes.c @@ -149,8 +149,6 @@ static void put_io_block(struct log_writes_c *lc) static void log_end_io(struct bio *bio) { struct log_writes_c *lc = bio->bi_private; - struct bio_vec *bvec; - int i; if (bio->bi_error) { unsigned long flags; @@ -161,9 +159,7 @@ static void log_end_io(struct bio *bio) spin_unlock_irqrestore(&lc->blocks_lock, flags); } - bio_for_each_segment_all(bvec, bio, i) - __free_page(bvec->bv_page); - + bio_free_pages(bio); put_io_block(lc); bio_put(bio); } @@ -259,12 +255,12 @@ static int log_one_block(struct log_writes_c *lc, goto out; sector++; - bio = bio_alloc(GFP_KERNEL, block->vec_cnt); + atomic_inc(&lc->io_blocks); + bio = bio_alloc(GFP_KERNEL, min(block->vec_cnt, BIO_MAX_PAGES)); if (!bio) { DMERR("Couldn't alloc log bio"); goto error; } - atomic_inc(&lc->io_blocks); bio->bi_iter.bi_size = 0; bio->bi_iter.bi_sector = sector; bio->bi_bdev = lc->logdev->bdev; @@ -282,7 +278,7 @@ static int log_one_block(struct log_writes_c *lc, if (ret != block->vecs[i].bv_len) { atomic_inc(&lc->io_blocks); submit_bio(bio); - bio = bio_alloc(GFP_KERNEL, block->vec_cnt - i); + bio = bio_alloc(GFP_KERNEL, min(block->vec_cnt - i, BIO_MAX_PAGES)); if (!bio) { DMERR("Couldn't alloc log bio"); goto error; @@ -459,9 +455,9 @@ static int log_writes_ctr(struct dm_target *ti, unsigned int argc, char **argv) goto bad; } - ret = -EINVAL; lc->log_kthread = kthread_run(log_writes_kthread, lc, "log-write"); - if (!lc->log_kthread) { + if (IS_ERR(lc->log_kthread)) { + ret = PTR_ERR(lc->log_kthread); ti->error = "Couldn't alloc kthread"; dm_put_device(ti, lc->dev); dm_put_device(ti, lc->logdev); diff --git a/drivers/md/dm-rq.c b/drivers/md/dm-rq.c index 877b8f33620e..182b67947dad 100644 --- a/drivers/md/dm-rq.c +++ b/drivers/md/dm-rq.c @@ -985,7 +985,7 @@ int dm_mq_init_request_queue(struct mapped_device *md, struct dm_table *t) dm_init_md_queue(md); /* backfill 'mq' sysfs registration normally done in blk_register_queue */ - blk_mq_register_disk(md->disk); + blk_mq_register_dev(disk_to_dev(md->disk), q); return 0; diff --git a/drivers/md/md-cluster.c b/drivers/md/md-cluster.c index 41573f1f626f..2b13117fb918 100644 --- a/drivers/md/md-cluster.c +++ b/drivers/md/md-cluster.c @@ -10,6 +10,7 @@ #include <linux/module.h> +#include <linux/kthread.h> #include <linux/dlm.h> #include <linux/sched.h> #include <linux/raid/md_p.h> @@ -25,7 +26,8 @@ struct dlm_lock_resource { struct dlm_lksb lksb; char *name; /* lock name. */ uint32_t flags; /* flags to pass to dlm_lock() */ - struct completion completion; /* completion for synchronized locking */ + wait_queue_head_t sync_locking; /* wait queue for synchronized locking */ + bool sync_locking_done; void (*bast)(void *arg, int mode); /* blocking AST function pointer*/ struct mddev *mddev; /* pointing back to mddev. */ int mode; @@ -118,7 +120,8 @@ static void sync_ast(void *arg) struct dlm_lock_resource *res; res = arg; - complete(&res->completion); + res->sync_locking_done = true; + wake_up(&res->sync_locking); } static int dlm_lock_sync(struct dlm_lock_resource *res, int mode) @@ -130,7 +133,8 @@ static int dlm_lock_sync(struct dlm_lock_resource *res, int mode) 0, sync_ast, res, res->bast); if (ret) return ret; - wait_for_completion(&res->completion); + wait_event(res->sync_locking, res->sync_locking_done); + res->sync_locking_done = false; if (res->lksb.sb_status == 0) res->mode = mode; return res->lksb.sb_status; @@ -141,6 +145,44 @@ static int dlm_unlock_sync(struct dlm_lock_resource *res) return dlm_lock_sync(res, DLM_LOCK_NL); } +/* + * An variation of dlm_lock_sync, which make lock request could + * be interrupted + */ +static int dlm_lock_sync_interruptible(struct dlm_lock_resource *res, int mode, + struct mddev *mddev) +{ + int ret = 0; + + ret = dlm_lock(res->ls, mode, &res->lksb, + res->flags, res->name, strlen(res->name), + 0, sync_ast, res, res->bast); + if (ret) + return ret; + + wait_event(res->sync_locking, res->sync_locking_done + || kthread_should_stop() + || test_bit(MD_CLOSING, &mddev->flags)); + if (!res->sync_locking_done) { + /* + * the convert queue contains the lock request when request is + * interrupted, and sync_ast could still be run, so need to + * cancel the request and reset completion + */ + ret = dlm_unlock(res->ls, res->lksb.sb_lkid, DLM_LKF_CANCEL, + &res->lksb, res); + res->sync_locking_done = false; + if (unlikely(ret != 0)) + pr_info("failed to cancel previous lock request " + "%s return %d\n", res->name, ret); + return -EPERM; + } else + res->sync_locking_done = false; + if (res->lksb.sb_status == 0) + res->mode = mode; + return res->lksb.sb_status; +} + static struct dlm_lock_resource *lockres_init(struct mddev *mddev, char *name, void (*bastfn)(void *arg, int mode), int with_lvb) { @@ -151,7 +193,8 @@ static struct dlm_lock_resource *lockres_init(struct mddev *mddev, res = kzalloc(sizeof(struct dlm_lock_resource), GFP_KERNEL); if (!res) return NULL; - init_completion(&res->completion); + init_waitqueue_head(&res->sync_locking); + res->sync_locking_done = false; res->ls = cinfo->lockspace; res->mddev = mddev; res->mode = DLM_LOCK_IV; @@ -194,25 +237,21 @@ out_err: static void lockres_free(struct dlm_lock_resource *res) { - int ret; + int ret = 0; if (!res) return; - /* cancel a lock request or a conversion request that is blocked */ - res->flags |= DLM_LKF_CANCEL; -retry: - ret = dlm_unlock(res->ls, res->lksb.sb_lkid, 0, &res->lksb, res); - if (unlikely(ret != 0)) { - pr_info("%s: failed to unlock %s return %d\n", __func__, res->name, ret); - - /* if a lock conversion is cancelled, then the lock is put - * back to grant queue, need to ensure it is unlocked */ - if (ret == -DLM_ECANCEL) - goto retry; - } - res->flags &= ~DLM_LKF_CANCEL; - wait_for_completion(&res->completion); + /* + * use FORCEUNLOCK flag, so we can unlock even the lock is on the + * waiting or convert queue + */ + ret = dlm_unlock(res->ls, res->lksb.sb_lkid, DLM_LKF_FORCEUNLOCK, + &res->lksb, res); + if (unlikely(ret != 0)) + pr_err("failed to unlock %s return %d\n", res->name, ret); + else + wait_event(res->sync_locking, res->sync_locking_done); kfree(res->name); kfree(res->lksb.sb_lvbptr); @@ -279,7 +318,7 @@ static void recover_bitmaps(struct md_thread *thread) goto clear_bit; } - ret = dlm_lock_sync(bm_lockres, DLM_LOCK_PW); + ret = dlm_lock_sync_interruptible(bm_lockres, DLM_LOCK_PW, mddev); if (ret) { pr_err("md-cluster: Could not DLM lock %s: %d\n", str, ret); @@ -288,7 +327,7 @@ static void recover_bitmaps(struct md_thread *thread) ret = bitmap_copy_from_slot(mddev, slot, &lo, &hi, true); if (ret) { pr_err("md-cluster: Could not copy data from bitmap %d\n", slot); - goto dlm_unlock; + goto clear_bit; } if (hi > 0) { if (lo < mddev->recovery_cp) @@ -300,8 +339,6 @@ static void recover_bitmaps(struct md_thread *thread) md_wakeup_thread(mddev->thread); } } -dlm_unlock: - dlm_unlock_sync(bm_lockres); clear_bit: lockres_free(bm_lockres); clear_bit(slot, &cinfo->recovery_map); @@ -495,9 +532,10 @@ static void process_metadata_update(struct mddev *mddev, struct cluster_msg *msg static void process_remove_disk(struct mddev *mddev, struct cluster_msg *msg) { - struct md_rdev *rdev = md_find_rdev_nr_rcu(mddev, - le32_to_cpu(msg->raid_slot)); + struct md_rdev *rdev; + rcu_read_lock(); + rdev = md_find_rdev_nr_rcu(mddev, le32_to_cpu(msg->raid_slot)); if (rdev) { set_bit(ClusterRemove, &rdev->flags); set_bit(MD_RECOVERY_NEEDED, &mddev->recovery); @@ -506,18 +544,21 @@ static void process_remove_disk(struct mddev *mddev, struct cluster_msg *msg) else pr_warn("%s: %d Could not find disk(%d) to REMOVE\n", __func__, __LINE__, le32_to_cpu(msg->raid_slot)); + rcu_read_unlock(); } static void process_readd_disk(struct mddev *mddev, struct cluster_msg *msg) { - struct md_rdev *rdev = md_find_rdev_nr_rcu(mddev, - le32_to_cpu(msg->raid_slot)); + struct md_rdev *rdev; + rcu_read_lock(); + rdev = md_find_rdev_nr_rcu(mddev, le32_to_cpu(msg->raid_slot)); if (rdev && test_bit(Faulty, &rdev->flags)) clear_bit(Faulty, &rdev->flags); else pr_warn("%s: %d Could not find disk(%d) which is faulty", __func__, __LINE__, le32_to_cpu(msg->raid_slot)); + rcu_read_unlock(); } static int process_recvd_msg(struct mddev *mddev, struct cluster_msg *msg) @@ -770,7 +811,6 @@ static int gather_all_resync_info(struct mddev *mddev, int total_slots) md_check_recovery(mddev); } - dlm_unlock_sync(bm_lockres); lockres_free(bm_lockres); } out: @@ -834,8 +874,10 @@ static int join(struct mddev *mddev, int nodes) goto err; } cinfo->ack_lockres = lockres_init(mddev, "ack", ack_bast, 0); - if (!cinfo->ack_lockres) + if (!cinfo->ack_lockres) { + ret = -ENOMEM; goto err; + } /* get sync CR lock on ACK. */ if (dlm_lock_sync(cinfo->ack_lockres, DLM_LOCK_CR)) pr_err("md-cluster: failed to get a sync CR lock on ACK!(%d)\n", @@ -849,8 +891,10 @@ static int join(struct mddev *mddev, int nodes) pr_info("md-cluster: Joined cluster %s slot %d\n", str, cinfo->slot_number); snprintf(str, 64, "bitmap%04d", cinfo->slot_number - 1); cinfo->bitmap_lockres = lockres_init(mddev, str, NULL, 1); - if (!cinfo->bitmap_lockres) + if (!cinfo->bitmap_lockres) { + ret = -ENOMEM; goto err; + } if (dlm_lock_sync(cinfo->bitmap_lockres, DLM_LOCK_PW)) { pr_err("Failed to get bitmap lock\n"); ret = -EINVAL; @@ -858,8 +902,10 @@ static int join(struct mddev *mddev, int nodes) } cinfo->resync_lockres = lockres_init(mddev, "resync", NULL, 0); - if (!cinfo->resync_lockres) + if (!cinfo->resync_lockres) { + ret = -ENOMEM; goto err; + } return 0; err: @@ -1000,7 +1046,7 @@ static void metadata_update_cancel(struct mddev *mddev) static int resync_start(struct mddev *mddev) { struct md_cluster_info *cinfo = mddev->cluster_info; - return dlm_lock_sync(cinfo->resync_lockres, DLM_LOCK_EX); + return dlm_lock_sync_interruptible(cinfo->resync_lockres, DLM_LOCK_EX, mddev); } static int resync_info_update(struct mddev *mddev, sector_t lo, sector_t hi) @@ -1180,7 +1226,6 @@ static void unlock_all_bitmaps(struct mddev *mddev) if (cinfo->other_bitmap_lockres) { for (i = 0; i < mddev->bitmap_info.nodes - 1; i++) { if (cinfo->other_bitmap_lockres[i]) { - dlm_unlock_sync(cinfo->other_bitmap_lockres[i]); lockres_free(cinfo->other_bitmap_lockres[i]); } } diff --git a/drivers/md/md.c b/drivers/md/md.c index d646f6e444f0..eac84d8ff724 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1604,11 +1604,8 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev) mddev->new_chunk_sectors = mddev->chunk_sectors; } - if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL) { + if (le32_to_cpu(sb->feature_map) & MD_FEATURE_JOURNAL) set_bit(MD_HAS_JOURNAL, &mddev->flags); - if (mddev->recovery_cp == MaxSector) - set_bit(MD_JOURNAL_CLEAN, &mddev->flags); - } } else if (mddev->pers == NULL) { /* Insist of good event counter while assembling, except for * spares (which don't need an event count) */ @@ -5300,6 +5297,21 @@ int md_run(struct mddev *mddev) return err; } if (mddev->queue) { + bool nonrot = true; + + rdev_for_each(rdev, mddev) { + if (rdev->raid_disk >= 0 && + !blk_queue_nonrot(bdev_get_queue(rdev->bdev))) { + nonrot = false; + break; + } + } + if (mddev->degraded) + nonrot = false; + if (nonrot) + queue_flag_set_unlocked(QUEUE_FLAG_NONROT, mddev->queue); + else + queue_flag_clear_unlocked(QUEUE_FLAG_NONROT, mddev->queue); mddev->queue->backing_dev_info.congested_data = mddev; mddev->queue->backing_dev_info.congested_fn = md_congested; } @@ -5457,12 +5469,14 @@ static void md_clean(struct mddev *mddev) mddev->degraded = 0; mddev->safemode = 0; mddev->private = NULL; + mddev->cluster_info = NULL; mddev->bitmap_info.offset = 0; mddev->bitmap_info.default_offset = 0; mddev->bitmap_info.default_space = 0; mddev->bitmap_info.chunksize = 0; mddev->bitmap_info.daemon_sleep = 0; mddev->bitmap_info.max_write_behind = 0; + mddev->bitmap_info.nodes = 0; } static void __md_stop_writes(struct mddev *mddev) @@ -5576,8 +5590,7 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev) mutex_lock(&mddev->open_mutex); if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) || mddev->sync_thread || - test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || - (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags))) { + test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) { printk("md: %s still in use.\n",mdname(mddev)); if (did_freeze) { clear_bit(MD_RECOVERY_FROZEN, &mddev->recovery); @@ -5639,8 +5652,7 @@ static int do_md_stop(struct mddev *mddev, int mode, if ((mddev->pers && atomic_read(&mddev->openers) > !!bdev) || mddev->sysfs_active || mddev->sync_thread || - test_bit(MD_RECOVERY_RUNNING, &mddev->recovery) || - (bdev && !test_bit(MD_STILL_CLOSED, &mddev->flags))) { + test_bit(MD_RECOVERY_RUNNING, &mddev->recovery)) { printk("md: %s still in use.\n",mdname(mddev)); mutex_unlock(&mddev->open_mutex); if (did_freeze) { @@ -5851,6 +5863,9 @@ static int get_array_info(struct mddev *mddev, void __user *arg) working++; if (test_bit(In_sync, &rdev->flags)) insync++; + else if (test_bit(Journal, &rdev->flags)) + /* TODO: add journal count to md_u.h */ + ; else spare++; } @@ -6101,9 +6116,14 @@ static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info) export_rdev(rdev); if (mddev_is_clustered(mddev)) { - if (info->state & (1 << MD_DISK_CANDIDATE)) - md_cluster_ops->new_disk_ack(mddev, (err == 0)); - else { + if (info->state & (1 << MD_DISK_CANDIDATE)) { + if (!err) { + err = md_cluster_ops->new_disk_ack(mddev, + err == 0); + if (err) + md_kick_rdev_from_array(rdev); + } + } else { if (err) md_cluster_ops->add_new_disk_cancel(mddev); else @@ -6821,7 +6841,7 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode, err = -EBUSY; goto out; } - set_bit(MD_STILL_CLOSED, &mddev->flags); + set_bit(MD_CLOSING, &mddev->flags); mutex_unlock(&mddev->open_mutex); sync_blockdev(bdev); } @@ -7070,9 +7090,13 @@ static int md_open(struct block_device *bdev, fmode_t mode) if ((err = mutex_lock_interruptible(&mddev->open_mutex))) goto out; + if (test_bit(MD_CLOSING, &mddev->flags)) { + mutex_unlock(&mddev->open_mutex); + return -ENODEV; + } + err = 0; atomic_inc(&mddev->openers); - clear_bit(MD_STILL_CLOSED, &mddev->flags); mutex_unlock(&mddev->open_mutex); check_disk_change(bdev); @@ -7610,16 +7634,12 @@ EXPORT_SYMBOL(unregister_md_cluster_operations); int md_setup_cluster(struct mddev *mddev, int nodes) { - int err; - - err = request_module("md-cluster"); - if (err) { - pr_err("md-cluster module not found.\n"); - return -ENOENT; - } - + if (!md_cluster_ops) + request_module("md-cluster"); spin_lock(&pers_lock); + /* ensure module won't be unloaded */ if (!md_cluster_ops || !try_module_get(md_cluster_mod)) { + pr_err("can't find md-cluster module or get it's reference.\n"); spin_unlock(&pers_lock); return -ENOENT; } @@ -7862,6 +7882,7 @@ void md_do_sync(struct md_thread *thread) */ do { + int mddev2_minor = -1; mddev->curr_resync = 2; try_again: @@ -7891,10 +7912,14 @@ void md_do_sync(struct md_thread *thread) prepare_to_wait(&resync_wait, &wq, TASK_INTERRUPTIBLE); if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery) && mddev2->curr_resync >= mddev->curr_resync) { - printk(KERN_INFO "md: delaying %s of %s" - " until %s has finished (they" - " share one or more physical units)\n", - desc, mdname(mddev), mdname(mddev2)); + if (mddev2_minor != mddev2->md_minor) { + mddev2_minor = mddev2->md_minor; + printk(KERN_INFO "md: delaying %s of %s" + " until %s has finished (they" + " share one or more physical units)\n", + desc, mdname(mddev), + mdname(mddev2)); + } mddev_put(mddev2); if (signal_pending(current)) flush_signals(current); @@ -8275,16 +8300,13 @@ no_add: static void md_start_sync(struct work_struct *ws) { struct mddev *mddev = container_of(ws, struct mddev, del_work); - int ret = 0; mddev->sync_thread = md_register_thread(md_do_sync, mddev, "resync"); if (!mddev->sync_thread) { - if (!(mddev_is_clustered(mddev) && ret == -EAGAIN)) - printk(KERN_ERR "%s: could not start resync" - " thread...\n", - mdname(mddev)); + printk(KERN_ERR "%s: could not start resync thread...\n", + mdname(mddev)); /* leave the spares where they are, it shouldn't hurt */ clear_bit(MD_RECOVERY_SYNC, &mddev->recovery); clear_bit(MD_RECOVERY_RESHAPE, &mddev->recovery); @@ -8875,7 +8897,9 @@ static void autostart_arrays(int part) list_del(&node_detected_dev->list); dev = node_detected_dev->dev; kfree(node_detected_dev); + mutex_unlock(&detected_devices_mutex); rdev = md_import_device(dev,0, 90); + mutex_lock(&detected_devices_mutex); if (IS_ERR(rdev)) continue; diff --git a/drivers/md/md.h b/drivers/md/md.h index 20c667579ede..2b2041773e79 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -201,9 +201,8 @@ struct mddev { #define MD_CHANGE_PENDING 2 /* switch from 'clean' to 'active' in progress */ #define MD_UPDATE_SB_FLAGS (1 | 2 | 4) /* If these are set, md_update_sb needed */ #define MD_ARRAY_FIRST_USE 3 /* First use of array, needs initialization */ -#define MD_STILL_CLOSED 4 /* If set, then array has not been opened since - * md_ioctl checked on it. - */ +#define MD_CLOSING 4 /* If set, we are closing the array, do not open + * it then */ #define MD_JOURNAL_CLEAN 5 /* A raid with journal is already clean */ #define MD_HAS_JOURNAL 6 /* The raid array has journal feature set */ #define MD_RELOAD_SB 7 /* Reload the superblock because another node diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c index 21dc00eb1989..1961d827dbd1 100644 --- a/drivers/md/raid1.c +++ b/drivers/md/raid1.c @@ -145,12 +145,8 @@ static void * r1buf_pool_alloc(gfp_t gfp_flags, void *data) return r1_bio; out_free_pages: - while (--j >= 0) { - struct bio_vec *bv; - - bio_for_each_segment_all(bv, r1_bio->bios[j], i) - __free_page(bv->bv_page); - } + while (--j >= 0) + bio_free_pages(r1_bio->bios[j]); out_free_bio: while (++j < pi->raid_disks) diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c index 0e4efcd10795..be1a9fca3b2d 100644 --- a/drivers/md/raid10.c +++ b/drivers/md/raid10.c @@ -1064,6 +1064,8 @@ static void __make_request(struct mddev *mddev, struct bio *bio) int max_sectors; int sectors; + md_write_start(mddev, bio); + /* * Register the new request and wait if the reconstruction * thread has put up a bar for new requests. @@ -1445,8 +1447,6 @@ static void raid10_make_request(struct mddev *mddev, struct bio *bio) return; } - md_write_start(mddev, bio); - do { /* @@ -2465,20 +2465,21 @@ static int narrow_write_error(struct r10bio *r10_bio, int i) while (sect_to_write) { struct bio *wbio; + sector_t wsector; if (sectors > sect_to_write) sectors = sect_to_write; /* Write at 'sector' for 'sectors' */ wbio = bio_clone_mddev(bio, GFP_NOIO, mddev); bio_trim(wbio, sector - bio->bi_iter.bi_sector, sectors); - wbio->bi_iter.bi_sector = (r10_bio->devs[i].addr+ - choose_data_offset(r10_bio, rdev) + - (sector - r10_bio->sector)); + wsector = r10_bio->devs[i].addr + (sector - r10_bio->sector); + wbio->bi_iter.bi_sector = wsector + + choose_data_offset(r10_bio, rdev); wbio->bi_bdev = rdev->bdev; bio_set_op_attrs(wbio, REQ_OP_WRITE, 0); if (submit_bio_wait(wbio) < 0) /* Failure! */ - ok = rdev_set_badblocks(rdev, sector, + ok = rdev_set_badblocks(rdev, wsector, sectors, 0) && ok; diff --git a/drivers/md/raid5-cache.c b/drivers/md/raid5-cache.c index 51f76ddbe265..1b1ab4a1d132 100644 --- a/drivers/md/raid5-cache.c +++ b/drivers/md/raid5-cache.c @@ -96,7 +96,6 @@ struct r5l_log { spinlock_t no_space_stripes_lock; bool need_cache_flush; - bool in_teardown; }; /* @@ -704,31 +703,22 @@ static void r5l_write_super_and_discard_space(struct r5l_log *log, mddev = log->rdev->mddev; /* - * This is to avoid a deadlock. r5l_quiesce holds reconfig_mutex and - * wait for this thread to finish. This thread waits for - * MD_CHANGE_PENDING clear, which is supposed to be done in - * md_check_recovery(). md_check_recovery() tries to get - * reconfig_mutex. Since r5l_quiesce already holds the mutex, - * md_check_recovery() fails, so the PENDING never get cleared. The - * in_teardown check workaround this issue. + * Discard could zero data, so before discard we must make sure + * superblock is updated to new log tail. Updating superblock (either + * directly call md_update_sb() or depend on md thread) must hold + * reconfig mutex. On the other hand, raid5_quiesce is called with + * reconfig_mutex hold. The first step of raid5_quiesce() is waitting + * for all IO finish, hence waitting for reclaim thread, while reclaim + * thread is calling this function and waitting for reconfig mutex. So + * there is a deadlock. We workaround this issue with a trylock. + * FIXME: we could miss discard if we can't take reconfig mutex */ - if (!log->in_teardown) { - set_mask_bits(&mddev->flags, 0, - BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING)); - md_wakeup_thread(mddev->thread); - wait_event(mddev->sb_wait, - !test_bit(MD_CHANGE_PENDING, &mddev->flags) || - log->in_teardown); - /* - * r5l_quiesce could run after in_teardown check and hold - * mutex first. Superblock might get updated twice. - */ - if (log->in_teardown) - md_update_sb(mddev, 1); - } else { - WARN_ON(!mddev_is_locked(mddev)); - md_update_sb(mddev, 1); - } + set_mask_bits(&mddev->flags, 0, + BIT(MD_CHANGE_DEVS) | BIT(MD_CHANGE_PENDING)); + if (!mddev_trylock(mddev)) + return; + md_update_sb(mddev, 1); + mddev_unlock(mddev); /* discard IO error really doesn't matter, ignore it */ if (log->last_checkpoint < end) { @@ -827,7 +817,6 @@ void r5l_quiesce(struct r5l_log *log, int state) if (!log || state == 2) return; if (state == 0) { - log->in_teardown = 0; /* * This is a special case for hotadd. In suspend, the array has * no journal. In resume, journal is initialized as well as the @@ -838,11 +827,6 @@ void r5l_quiesce(struct r5l_log *log, int state) log->reclaim_thread = md_register_thread(r5l_reclaim_thread, log->rdev->mddev, "reclaim"); } else if (state == 1) { - /* - * at this point all stripes are finished, so io_unit is at - * least in STRIPE_END state - */ - log->in_teardown = 1; /* make sure r5l_write_super_and_discard_space exits */ mddev = log->rdev->mddev; wake_up(&mddev->sb_wait); diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 8912407a4dd0..92ac251e91e6 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -659,6 +659,7 @@ raid5_get_active_stripe(struct r5conf *conf, sector_t sector, { struct stripe_head *sh; int hash = stripe_hash_locks_hash(sector); + int inc_empty_inactive_list_flag; pr_debug("get_stripe, sector %llu\n", (unsigned long long)sector); @@ -703,7 +704,12 @@ raid5_get_active_stripe(struct r5conf *conf, sector_t sector, atomic_inc(&conf->active_stripes); BUG_ON(list_empty(&sh->lru) && !test_bit(STRIPE_EXPANDING, &sh->state)); + inc_empty_inactive_list_flag = 0; + if (!list_empty(conf->inactive_list + hash)) + inc_empty_inactive_list_flag = 1; list_del_init(&sh->lru); + if (list_empty(conf->inactive_list + hash) && inc_empty_inactive_list_flag) + atomic_inc(&conf->empty_inactive_list_nr); if (sh->group) { sh->group->stripes_cnt--; sh->group = NULL; @@ -762,6 +768,7 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh sector_t head_sector, tmp_sec; int hash; int dd_idx; + int inc_empty_inactive_list_flag; /* Don't cross chunks, so stripe pd_idx/qd_idx is the same */ tmp_sec = sh->sector; @@ -779,7 +786,12 @@ static void stripe_add_to_batch_list(struct r5conf *conf, struct stripe_head *sh atomic_inc(&conf->active_stripes); BUG_ON(list_empty(&head->lru) && !test_bit(STRIPE_EXPANDING, &head->state)); + inc_empty_inactive_list_flag = 0; + if (!list_empty(conf->inactive_list + hash)) + inc_empty_inactive_list_flag = 1; list_del_init(&head->lru); + if (list_empty(conf->inactive_list + hash) && inc_empty_inactive_list_flag) + atomic_inc(&conf->empty_inactive_list_nr); if (head->group) { head->group->stripes_cnt--; head->group = NULL; @@ -993,7 +1005,6 @@ again: set_bit(STRIPE_IO_STARTED, &sh->state); - bio_reset(bi); bi->bi_bdev = rdev->bdev; bio_set_op_attrs(bi, op, op_flags); bi->bi_end_io = op_is_write(op) @@ -1045,7 +1056,6 @@ again: set_bit(STRIPE_IO_STARTED, &sh->state); - bio_reset(rbi); rbi->bi_bdev = rrdev->bdev; bio_set_op_attrs(rbi, op, op_flags); BUG_ON(!op_is_write(op)); @@ -1978,9 +1988,11 @@ static void raid_run_ops(struct stripe_head *sh, unsigned long ops_request) put_cpu(); } -static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp) +static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp, + int disks) { struct stripe_head *sh; + int i; sh = kmem_cache_zalloc(sc, gfp); if (sh) { @@ -1989,6 +2001,17 @@ static struct stripe_head *alloc_stripe(struct kmem_cache *sc, gfp_t gfp) INIT_LIST_HEAD(&sh->batch_list); INIT_LIST_HEAD(&sh->lru); atomic_set(&sh->count, 1); + for (i = 0; i < disks; i++) { + struct r5dev *dev = &sh->dev[i]; + + bio_init(&dev->req); + dev->req.bi_io_vec = &dev->vec; + dev->req.bi_max_vecs = 1; + + bio_init(&dev->rreq); + dev->rreq.bi_io_vec = &dev->rvec; + dev->rreq.bi_max_vecs = 1; + } } return sh; } @@ -1996,7 +2019,7 @@ static int grow_one_stripe(struct r5conf *conf, gfp_t gfp) { struct stripe_head *sh; - sh = alloc_stripe(conf->slab_cache, gfp); + sh = alloc_stripe(conf->slab_cache, gfp, conf->pool_size); if (!sh) return 0; @@ -2167,7 +2190,7 @@ static int resize_stripes(struct r5conf *conf, int newsize) mutex_lock(&conf->cache_size_mutex); for (i = conf->max_nr_stripes; i; i--) { - nsh = alloc_stripe(sc, GFP_KERNEL); + nsh = alloc_stripe(sc, GFP_KERNEL, newsize); if (!nsh) break; @@ -2299,6 +2322,7 @@ static void raid5_end_read_request(struct bio * bi) (unsigned long long)sh->sector, i, atomic_read(&sh->count), bi->bi_error); if (i == disks) { + bio_reset(bi); BUG(); return; } @@ -2399,6 +2423,7 @@ static void raid5_end_read_request(struct bio * bi) } } rdev_dec_pending(rdev, conf->mddev); + bio_reset(bi); clear_bit(R5_LOCKED, &sh->dev[i].flags); set_bit(STRIPE_HANDLE, &sh->state); raid5_release_stripe(sh); @@ -2436,6 +2461,7 @@ static void raid5_end_write_request(struct bio *bi) (unsigned long long)sh->sector, i, atomic_read(&sh->count), bi->bi_error); if (i == disks) { + bio_reset(bi); BUG(); return; } @@ -2472,6 +2498,7 @@ static void raid5_end_write_request(struct bio *bi) if (sh->batch_head && bi->bi_error && !replacement) set_bit(STRIPE_BATCH_ERR, &sh->batch_head->state); + bio_reset(bi); if (!test_and_clear_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags)) clear_bit(R5_LOCKED, &sh->dev[i].flags); set_bit(STRIPE_HANDLE, &sh->state); @@ -2485,16 +2512,6 @@ static void raid5_build_block(struct stripe_head *sh, int i, int previous) { struct r5dev *dev = &sh->dev[i]; - bio_init(&dev->req); - dev->req.bi_io_vec = &dev->vec; - dev->req.bi_max_vecs = 1; - dev->req.bi_private = sh; - - bio_init(&dev->rreq); - dev->rreq.bi_io_vec = &dev->rvec; - dev->rreq.bi_max_vecs = 1; - dev->rreq.bi_private = sh; - dev->flags = 0; dev->sector = raid5_compute_blocknr(sh, i, previous); } @@ -4628,7 +4645,9 @@ finish: } if (!bio_list_empty(&s.return_bi)) { - if (test_bit(MD_CHANGE_PENDING, &conf->mddev->flags)) { + if (test_bit(MD_CHANGE_PENDING, &conf->mddev->flags) && + (s.failed <= conf->max_degraded || + conf->mddev->external == 0)) { spin_lock_irq(&conf->device_lock); bio_list_merge(&conf->return_bi, &s.return_bi); spin_unlock_irq(&conf->device_lock); @@ -6330,22 +6349,20 @@ static int alloc_scratch_buffer(struct r5conf *conf, struct raid5_percpu *percpu return 0; } -static void raid5_free_percpu(struct r5conf *conf) +static int raid456_cpu_dead(unsigned int cpu, struct hlist_node *node) { - unsigned long cpu; + struct r5conf *conf = hlist_entry_safe(node, struct r5conf, node); + + free_scratch_buffer(conf, per_cpu_ptr(conf->percpu, cpu)); + return 0; +} +static void raid5_free_percpu(struct r5conf *conf) +{ if (!conf->percpu) return; -#ifdef CONFIG_HOTPLUG_CPU - unregister_cpu_notifier(&conf->cpu_notify); -#endif - - get_online_cpus(); - for_each_possible_cpu(cpu) - free_scratch_buffer(conf, per_cpu_ptr(conf->percpu, cpu)); - put_online_cpus(); - + cpuhp_state_remove_instance(CPUHP_MD_RAID5_PREPARE, &conf->node); free_percpu(conf->percpu); } @@ -6353,7 +6370,7 @@ static void free_conf(struct r5conf *conf) { if (conf->log) r5l_exit_log(conf->log); - if (conf->shrinker.seeks) + if (conf->shrinker.nr_deferred) unregister_shrinker(&conf->shrinker); free_thread_groups(conf); @@ -6364,64 +6381,28 @@ static void free_conf(struct r5conf *conf) kfree(conf); } -#ifdef CONFIG_HOTPLUG_CPU -static int raid456_cpu_notify(struct notifier_block *nfb, unsigned long action, - void *hcpu) +static int raid456_cpu_up_prepare(unsigned int cpu, struct hlist_node *node) { - struct r5conf *conf = container_of(nfb, struct r5conf, cpu_notify); - long cpu = (long)hcpu; + struct r5conf *conf = hlist_entry_safe(node, struct r5conf, node); struct raid5_percpu *percpu = per_cpu_ptr(conf->percpu, cpu); - switch (action) { - case CPU_UP_PREPARE: - case CPU_UP_PREPARE_FROZEN: - if (alloc_scratch_buffer(conf, percpu)) { - pr_err("%s: failed memory allocation for cpu%ld\n", - __func__, cpu); - return notifier_from_errno(-ENOMEM); - } - break; - case CPU_DEAD: - case CPU_DEAD_FROZEN: - case CPU_UP_CANCELED: - case CPU_UP_CANCELED_FROZEN: - free_scratch_buffer(conf, per_cpu_ptr(conf->percpu, cpu)); - break; - default: - break; + if (alloc_scratch_buffer(conf, percpu)) { + pr_err("%s: failed memory allocation for cpu%u\n", + __func__, cpu); + return -ENOMEM; } - return NOTIFY_OK; + return 0; } -#endif static int raid5_alloc_percpu(struct r5conf *conf) { - unsigned long cpu; int err = 0; conf->percpu = alloc_percpu(struct raid5_percpu); if (!conf->percpu) return -ENOMEM; -#ifdef CONFIG_HOTPLUG_CPU - conf->cpu_notify.notifier_call = raid456_cpu_notify; - conf->cpu_notify.priority = 0; - err = register_cpu_notifier(&conf->cpu_notify); - if (err) - return err; -#endif - - get_online_cpus(); - for_each_present_cpu(cpu) { - err = alloc_scratch_buffer(conf, per_cpu_ptr(conf->percpu, cpu)); - if (err) { - pr_err("%s: failed memory allocation for cpu%ld\n", - __func__, cpu); - break; - } - } - put_online_cpus(); - + err = cpuhp_state_add_instance(CPUHP_MD_RAID5_PREPARE, &conf->node); if (!err) { conf->scribble_disks = max(conf->raid_disks, conf->previous_raid_disks); @@ -6620,6 +6601,16 @@ static struct r5conf *setup_conf(struct mddev *mddev) } conf->min_nr_stripes = NR_STRIPES; + if (mddev->reshape_position != MaxSector) { + int stripes = max_t(int, + ((mddev->chunk_sectors << 9) / STRIPE_SIZE) * 4, + ((mddev->new_chunk_sectors << 9) / STRIPE_SIZE) * 4); + conf->min_nr_stripes = max(NR_STRIPES, stripes); + if (conf->min_nr_stripes != NR_STRIPES) + printk(KERN_INFO + "md/raid:%s: force stripe size %d for reshape\n", + mdname(mddev), conf->min_nr_stripes); + } memory = conf->min_nr_stripes * (sizeof(struct stripe_head) + max_disks * ((sizeof(struct bio) + PAGE_SIZE))) / 1024; atomic_set(&conf->empty_inactive_list_nr, NR_STRIPE_HASH_LOCKS); @@ -6641,7 +6632,12 @@ static struct r5conf *setup_conf(struct mddev *mddev) conf->shrinker.count_objects = raid5_cache_count; conf->shrinker.batch = 128; conf->shrinker.flags = 0; - register_shrinker(&conf->shrinker); + if (register_shrinker(&conf->shrinker)) { + printk(KERN_ERR + "md/raid:%s: couldn't register shrinker.\n", + mdname(mddev)); + goto abort; + } sprintf(pers_name, "raid%d", mddev->new_level); conf->thread = md_register_thread(raid5d, mddev, pers_name); @@ -6826,11 +6822,14 @@ static int raid5_run(struct mddev *mddev) if (IS_ERR(conf)) return PTR_ERR(conf); - if (test_bit(MD_HAS_JOURNAL, &mddev->flags) && !journal_dev) { - printk(KERN_ERR "md/raid:%s: journal disk is missing, force array readonly\n", - mdname(mddev)); - mddev->ro = 1; - set_disk_ro(mddev->gendisk, 1); + if (test_bit(MD_HAS_JOURNAL, &mddev->flags)) { + if (!journal_dev) { + pr_err("md/raid:%s: journal disk is missing, force array readonly\n", + mdname(mddev)); + mddev->ro = 1; + set_disk_ro(mddev->gendisk, 1); + } else if (mddev->recovery_cp == MaxSector) + set_bit(MD_JOURNAL_CLEAN, &mddev->flags); } conf->min_offset_diff = min_offset_diff; @@ -7034,6 +7033,8 @@ static int raid5_run(struct mddev *mddev) else queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue); + + blk_queue_max_hw_sectors(mddev->queue, UINT_MAX); } if (journal_dev) { @@ -7953,10 +7954,21 @@ static struct md_personality raid4_personality = static int __init raid5_init(void) { + int ret; + raid5_wq = alloc_workqueue("raid5wq", WQ_UNBOUND|WQ_MEM_RECLAIM|WQ_CPU_INTENSIVE|WQ_SYSFS, 0); if (!raid5_wq) return -ENOMEM; + + ret = cpuhp_setup_state_multi(CPUHP_MD_RAID5_PREPARE, + "md/raid5:prepare", + raid456_cpu_up_prepare, + raid456_cpu_dead); + if (ret) { + destroy_workqueue(raid5_wq); + return ret; + } register_md_personality(&raid6_personality); register_md_personality(&raid5_personality); register_md_personality(&raid4_personality); @@ -7968,6 +7980,7 @@ static void raid5_exit(void) unregister_md_personality(&raid6_personality); unregister_md_personality(&raid5_personality); unregister_md_personality(&raid4_personality); + cpuhp_remove_multi_state(CPUHP_MD_RAID5_PREPARE); destroy_workqueue(raid5_wq); } diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 517d4b68a1be..57ec49f0839e 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -512,9 +512,7 @@ struct r5conf { } __percpu *percpu; int scribble_disks; int scribble_sectors; -#ifdef CONFIG_HOTPLUG_CPU - struct notifier_block cpu_notify; -#endif + struct hlist_node node; /* * Free stripes pool |