summaryrefslogtreecommitdiff
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/bcache/Kconfig1
-rw-r--r--drivers/md/bcache/bcache.h2
-rw-r--r--drivers/md/bcache/stats.c34
-rw-r--r--drivers/md/bcache/super.c185
-rw-r--r--drivers/md/bcache/writeback.c2
-rw-r--r--drivers/md/bitmap.c8
-rw-r--r--drivers/md/dm-raid.c76
-rw-r--r--drivers/md/md.c55
-rw-r--r--drivers/md/md.h8
-rw-r--r--drivers/md/raid0.c1
-rw-r--r--drivers/md/raid1.c45
-rw-r--r--drivers/md/raid10.c112
-rw-r--r--drivers/md/raid5.c12
13 files changed, 328 insertions, 213 deletions
diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig
index 05c220d05e23..f950c9d29f3e 100644
--- a/drivers/md/bcache/Kconfig
+++ b/drivers/md/bcache/Kconfig
@@ -1,7 +1,6 @@
config BCACHE
tristate "Block device as cache"
- select CLOSURES
---help---
Allows a block device to be used as cache for other devices; uses
a btree for indexing and the layout is optimized for SSDs.
diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h
index 340146d7c17f..d3e15b42a4ab 100644
--- a/drivers/md/bcache/bcache.h
+++ b/drivers/md/bcache/bcache.h
@@ -1241,7 +1241,7 @@ void bch_cache_set_stop(struct cache_set *);
struct cache_set *bch_cache_set_alloc(struct cache_sb *);
void bch_btree_cache_free(struct cache_set *);
int bch_btree_cache_alloc(struct cache_set *);
-void bch_writeback_init_cached_dev(struct cached_dev *);
+void bch_cached_dev_writeback_init(struct cached_dev *);
void bch_moving_init_cache_set(struct cache_set *);
void bch_cache_allocator_exit(struct cache *ca);
diff --git a/drivers/md/bcache/stats.c b/drivers/md/bcache/stats.c
index 64e679449c2a..b8730e714d69 100644
--- a/drivers/md/bcache/stats.c
+++ b/drivers/md/bcache/stats.c
@@ -93,24 +93,6 @@ static struct attribute *bch_stats_files[] = {
};
static KTYPE(bch_stats);
-static void scale_accounting(unsigned long data);
-
-void bch_cache_accounting_init(struct cache_accounting *acc,
- struct closure *parent)
-{
- kobject_init(&acc->total.kobj, &bch_stats_ktype);
- kobject_init(&acc->five_minute.kobj, &bch_stats_ktype);
- kobject_init(&acc->hour.kobj, &bch_stats_ktype);
- kobject_init(&acc->day.kobj, &bch_stats_ktype);
-
- closure_init(&acc->cl, parent);
- init_timer(&acc->timer);
- acc->timer.expires = jiffies + accounting_delay;
- acc->timer.data = (unsigned long) acc;
- acc->timer.function = scale_accounting;
- add_timer(&acc->timer);
-}
-
int bch_cache_accounting_add_kobjs(struct cache_accounting *acc,
struct kobject *parent)
{
@@ -244,3 +226,19 @@ void bch_mark_sectors_bypassed(struct search *s, int sectors)
atomic_add(sectors, &dc->accounting.collector.sectors_bypassed);
atomic_add(sectors, &s->op.c->accounting.collector.sectors_bypassed);
}
+
+void bch_cache_accounting_init(struct cache_accounting *acc,
+ struct closure *parent)
+{
+ kobject_init(&acc->total.kobj, &bch_stats_ktype);
+ kobject_init(&acc->five_minute.kobj, &bch_stats_ktype);
+ kobject_init(&acc->hour.kobj, &bch_stats_ktype);
+ kobject_init(&acc->day.kobj, &bch_stats_ktype);
+
+ closure_init(&acc->cl, parent);
+ init_timer(&acc->timer);
+ acc->timer.expires = jiffies + accounting_delay;
+ acc->timer.data = (unsigned long) acc;
+ acc->timer.function = scale_accounting;
+ add_timer(&acc->timer);
+}
diff --git a/drivers/md/bcache/super.c b/drivers/md/bcache/super.c
index c8046bc4aa57..f88e2b653a3f 100644
--- a/drivers/md/bcache/super.c
+++ b/drivers/md/bcache/super.c
@@ -634,11 +634,10 @@ static int open_dev(struct block_device *b, fmode_t mode)
return 0;
}
-static int release_dev(struct gendisk *b, fmode_t mode)
+static void release_dev(struct gendisk *b, fmode_t mode)
{
struct bcache_device *d = b->private_data;
closure_put(&d->cl);
- return 0;
}
static int ioctl_dev(struct block_device *b, fmode_t mode,
@@ -732,8 +731,7 @@ static void bcache_device_free(struct bcache_device *d)
if (d->c)
bcache_device_detach(d);
-
- if (d->disk)
+ if (d->disk && d->disk->flags & GENHD_FL_UP)
del_gendisk(d->disk);
if (d->disk && d->disk->queue)
blk_cleanup_queue(d->disk->queue);
@@ -756,12 +754,9 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size)
if (!(d->bio_split = bioset_create(4, offsetof(struct bbio, bio))) ||
!(d->unaligned_bvec = mempool_create_kmalloc_pool(1,
sizeof(struct bio_vec) * BIO_MAX_PAGES)) ||
- bio_split_pool_init(&d->bio_split_hook))
-
- return -ENOMEM;
-
- d->disk = alloc_disk(1);
- if (!d->disk)
+ bio_split_pool_init(&d->bio_split_hook) ||
+ !(d->disk = alloc_disk(1)) ||
+ !(q = blk_alloc_queue(GFP_KERNEL)))
return -ENOMEM;
snprintf(d->disk->disk_name, DISK_NAME_LEN, "bcache%i", bcache_minor);
@@ -771,10 +766,6 @@ static int bcache_device_init(struct bcache_device *d, unsigned block_size)
d->disk->fops = &bcache_ops;
d->disk->private_data = d;
- q = blk_alloc_queue(GFP_KERNEL);
- if (!q)
- return -ENOMEM;
-
blk_queue_make_request(q, NULL);
d->disk->queue = q;
q->queuedata = d;
@@ -999,14 +990,17 @@ static void cached_dev_free(struct closure *cl)
mutex_lock(&bch_register_lock);
- bd_unlink_disk_holder(dc->bdev, dc->disk.disk);
+ if (atomic_read(&dc->running))
+ bd_unlink_disk_holder(dc->bdev, dc->disk.disk);
bcache_device_free(&dc->disk);
list_del(&dc->list);
mutex_unlock(&bch_register_lock);
if (!IS_ERR_OR_NULL(dc->bdev)) {
- blk_sync_queue(bdev_get_queue(dc->bdev));
+ if (dc->bdev->bd_disk)
+ blk_sync_queue(bdev_get_queue(dc->bdev));
+
blkdev_put(dc->bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
}
@@ -1028,73 +1022,67 @@ static void cached_dev_flush(struct closure *cl)
static int cached_dev_init(struct cached_dev *dc, unsigned block_size)
{
- int err;
+ int ret;
struct io *io;
-
- closure_init(&dc->disk.cl, NULL);
- set_closure_fn(&dc->disk.cl, cached_dev_flush, system_wq);
+ struct request_queue *q = bdev_get_queue(dc->bdev);
__module_get(THIS_MODULE);
INIT_LIST_HEAD(&dc->list);
+ closure_init(&dc->disk.cl, NULL);
+ set_closure_fn(&dc->disk.cl, cached_dev_flush, system_wq);
kobject_init(&dc->disk.kobj, &bch_cached_dev_ktype);
-
- bch_cache_accounting_init(&dc->accounting, &dc->disk.cl);
-
- err = bcache_device_init(&dc->disk, block_size);
- if (err)
- goto err;
-
- spin_lock_init(&dc->io_lock);
- closure_init_unlocked(&dc->sb_write);
INIT_WORK(&dc->detach, cached_dev_detach_finish);
+ closure_init_unlocked(&dc->sb_write);
+ INIT_LIST_HEAD(&dc->io_lru);
+ spin_lock_init(&dc->io_lock);
+ bch_cache_accounting_init(&dc->accounting, &dc->disk.cl);
dc->sequential_merge = true;
dc->sequential_cutoff = 4 << 20;
- INIT_LIST_HEAD(&dc->io_lru);
- dc->sb_bio.bi_max_vecs = 1;
- dc->sb_bio.bi_io_vec = dc->sb_bio.bi_inline_vecs;
-
for (io = dc->io; io < dc->io + RECENT_IO; io++) {
list_add(&io->lru, &dc->io_lru);
hlist_add_head(&io->hash, dc->io_hash + RECENT_IO);
}
- bch_writeback_init_cached_dev(dc);
+ ret = bcache_device_init(&dc->disk, block_size);
+ if (ret)
+ return ret;
+
+ set_capacity(dc->disk.disk,
+ dc->bdev->bd_part->nr_sects - dc->sb.data_offset);
+
+ dc->disk.disk->queue->backing_dev_info.ra_pages =
+ max(dc->disk.disk->queue->backing_dev_info.ra_pages,
+ q->backing_dev_info.ra_pages);
+
+ bch_cached_dev_request_init(dc);
+ bch_cached_dev_writeback_init(dc);
return 0;
-err:
- bcache_device_stop(&dc->disk);
- return err;
}
/* Cached device - bcache superblock */
-static const char *register_bdev(struct cache_sb *sb, struct page *sb_page,
+static void register_bdev(struct cache_sb *sb, struct page *sb_page,
struct block_device *bdev,
struct cached_dev *dc)
{
char name[BDEVNAME_SIZE];
const char *err = "cannot allocate memory";
- struct gendisk *g;
struct cache_set *c;
- if (!dc || cached_dev_init(dc, sb->block_size << 9) != 0)
- return err;
-
memcpy(&dc->sb, sb, sizeof(struct cache_sb));
- dc->sb_bio.bi_io_vec[0].bv_page = sb_page;
dc->bdev = bdev;
dc->bdev->bd_holder = dc;
- g = dc->disk.disk;
-
- set_capacity(g, dc->bdev->bd_part->nr_sects - dc->sb.data_offset);
-
- g->queue->backing_dev_info.ra_pages =
- max(g->queue->backing_dev_info.ra_pages,
- bdev->bd_queue->backing_dev_info.ra_pages);
+ bio_init(&dc->sb_bio);
+ dc->sb_bio.bi_max_vecs = 1;
+ dc->sb_bio.bi_io_vec = dc->sb_bio.bi_inline_vecs;
+ dc->sb_bio.bi_io_vec[0].bv_page = sb_page;
+ get_page(sb_page);
- bch_cached_dev_request_init(dc);
+ if (cached_dev_init(dc, sb->block_size << 9))
+ goto err;
err = "error creating kobject";
if (kobject_add(&dc->disk.kobj, &part_to_dev(bdev->bd_part)->kobj,
@@ -1103,6 +1091,8 @@ static const char *register_bdev(struct cache_sb *sb, struct page *sb_page,
if (bch_cache_accounting_add_kobjs(&dc->accounting, &dc->disk.kobj))
goto err;
+ pr_info("registered backing device %s", bdevname(bdev, name));
+
list_add(&dc->list, &uncached_devices);
list_for_each_entry(c, &bch_cache_sets, list)
bch_cached_dev_attach(dc, c);
@@ -1111,15 +1101,10 @@ static const char *register_bdev(struct cache_sb *sb, struct page *sb_page,
BDEV_STATE(&dc->sb) == BDEV_STATE_STALE)
bch_cached_dev_run(dc);
- return NULL;
+ return;
err:
- kobject_put(&dc->disk.kobj);
pr_notice("error opening %s: %s", bdevname(bdev, name), err);
- /*
- * Return NULL instead of an error because kobject_put() cleans
- * everything up
- */
- return NULL;
+ bcache_device_stop(&dc->disk);
}
/* Flash only volumes */
@@ -1717,20 +1702,11 @@ static int cache_alloc(struct cache_sb *sb, struct cache *ca)
size_t free;
struct bucket *b;
- if (!ca)
- return -ENOMEM;
-
__module_get(THIS_MODULE);
kobject_init(&ca->kobj, &bch_cache_ktype);
- memcpy(&ca->sb, sb, sizeof(struct cache_sb));
-
INIT_LIST_HEAD(&ca->discards);
- bio_init(&ca->sb_bio);
- ca->sb_bio.bi_max_vecs = 1;
- ca->sb_bio.bi_io_vec = ca->sb_bio.bi_inline_vecs;
-
bio_init(&ca->journal.bio);
ca->journal.bio.bi_max_vecs = 8;
ca->journal.bio.bi_io_vec = ca->journal.bio.bi_inline_vecs;
@@ -1742,18 +1718,17 @@ static int cache_alloc(struct cache_sb *sb, struct cache *ca)
!init_fifo(&ca->free_inc, free << 2, GFP_KERNEL) ||
!init_fifo(&ca->unused, free << 2, GFP_KERNEL) ||
!init_heap(&ca->heap, free << 3, GFP_KERNEL) ||
- !(ca->buckets = vmalloc(sizeof(struct bucket) *
+ !(ca->buckets = vzalloc(sizeof(struct bucket) *
ca->sb.nbuckets)) ||
!(ca->prio_buckets = kzalloc(sizeof(uint64_t) * prio_buckets(ca) *
2, GFP_KERNEL)) ||
!(ca->disk_buckets = alloc_bucket_pages(GFP_KERNEL, ca)) ||
!(ca->alloc_workqueue = alloc_workqueue("bch_allocator", 0, 1)) ||
bio_split_pool_init(&ca->bio_split_hook))
- goto err;
+ return -ENOMEM;
ca->prio_last_buckets = ca->prio_buckets + prio_buckets(ca);
- memset(ca->buckets, 0, ca->sb.nbuckets * sizeof(struct bucket));
for_each_bucket(b, ca)
atomic_set(&b->pin, 0);
@@ -1766,22 +1741,28 @@ err:
return -ENOMEM;
}
-static const char *register_cache(struct cache_sb *sb, struct page *sb_page,
+static void register_cache(struct cache_sb *sb, struct page *sb_page,
struct block_device *bdev, struct cache *ca)
{
char name[BDEVNAME_SIZE];
const char *err = "cannot allocate memory";
- if (cache_alloc(sb, ca) != 0)
- return err;
-
- ca->sb_bio.bi_io_vec[0].bv_page = sb_page;
+ memcpy(&ca->sb, sb, sizeof(struct cache_sb));
ca->bdev = bdev;
ca->bdev->bd_holder = ca;
+ bio_init(&ca->sb_bio);
+ ca->sb_bio.bi_max_vecs = 1;
+ ca->sb_bio.bi_io_vec = ca->sb_bio.bi_inline_vecs;
+ ca->sb_bio.bi_io_vec[0].bv_page = sb_page;
+ get_page(sb_page);
+
if (blk_queue_discard(bdev_get_queue(ca->bdev)))
ca->discard = CACHE_DISCARD(&ca->sb);
+ if (cache_alloc(sb, ca) != 0)
+ goto err;
+
err = "error creating kobject";
if (kobject_add(&ca->kobj, &part_to_dev(bdev->bd_part)->kobj, "bcache"))
goto err;
@@ -1791,15 +1772,10 @@ static const char *register_cache(struct cache_sb *sb, struct page *sb_page,
goto err;
pr_info("registered cache device %s", bdevname(bdev, name));
-
- return NULL;
+ return;
err:
+ pr_notice("error opening %s: %s", bdevname(bdev, name), err);
kobject_put(&ca->kobj);
- pr_info("error opening %s: %s", bdevname(bdev, name), err);
- /* Return NULL instead of an error because kobject_put() cleans
- * everything up
- */
- return NULL;
}
/* Global interfaces/init */
@@ -1833,12 +1809,15 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
bdev = blkdev_get_by_path(strim(path),
FMODE_READ|FMODE_WRITE|FMODE_EXCL,
sb);
- if (bdev == ERR_PTR(-EBUSY))
- err = "device busy";
-
- if (IS_ERR(bdev) ||
- set_blocksize(bdev, 4096))
+ if (IS_ERR(bdev)) {
+ if (bdev == ERR_PTR(-EBUSY))
+ err = "device busy";
goto err;
+ }
+
+ err = "failed to set blocksize";
+ if (set_blocksize(bdev, 4096))
+ goto err_close;
err = read_super(sb, bdev, &sb_page);
if (err)
@@ -1846,33 +1825,33 @@ static ssize_t register_bcache(struct kobject *k, struct kobj_attribute *attr,
if (SB_IS_BDEV(sb)) {
struct cached_dev *dc = kzalloc(sizeof(*dc), GFP_KERNEL);
+ if (!dc)
+ goto err_close;
- err = register_bdev(sb, sb_page, bdev, dc);
+ register_bdev(sb, sb_page, bdev, dc);
} else {
struct cache *ca = kzalloc(sizeof(*ca), GFP_KERNEL);
+ if (!ca)
+ goto err_close;
- err = register_cache(sb, sb_page, bdev, ca);
+ register_cache(sb, sb_page, bdev, ca);
}
-
- if (err) {
- /* register_(bdev|cache) will only return an error if they
- * didn't get far enough to create the kobject - if they did,
- * the kobject destructor will do this cleanup.
- */
+out:
+ if (sb_page)
put_page(sb_page);
-err_close:
- blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
-err:
- if (attr != &ksysfs_register_quiet)
- pr_info("error opening %s: %s", path, err);
- ret = -EINVAL;
- }
-
kfree(sb);
kfree(path);
mutex_unlock(&bch_register_lock);
module_put(THIS_MODULE);
return ret;
+
+err_close:
+ blkdev_put(bdev, FMODE_READ|FMODE_WRITE|FMODE_EXCL);
+err:
+ if (attr != &ksysfs_register_quiet)
+ pr_info("error opening %s: %s", path, err);
+ ret = -EINVAL;
+ goto out;
}
static int bcache_reboot(struct notifier_block *n, unsigned long code, void *x)
diff --git a/drivers/md/bcache/writeback.c b/drivers/md/bcache/writeback.c
index 93e7e31a4bd3..2714ed3991d1 100644
--- a/drivers/md/bcache/writeback.c
+++ b/drivers/md/bcache/writeback.c
@@ -375,7 +375,7 @@ err:
refill_dirty(cl);
}
-void bch_writeback_init_cached_dev(struct cached_dev *dc)
+void bch_cached_dev_writeback_init(struct cached_dev *dc)
{
closure_init_unlocked(&dc->writeback);
init_rwsem(&dc->writeback_lock);
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index 5a2c75499824..a7fd82133b12 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -2002,9 +2002,9 @@ location_store(struct mddev *mddev, const char *buf, size_t len)
} else {
int rv;
if (buf[0] == '+')
- rv = strict_strtoll(buf+1, 10, &offset);
+ rv = kstrtoll(buf+1, 10, &offset);
else
- rv = strict_strtoll(buf, 10, &offset);
+ rv = kstrtoll(buf, 10, &offset);
if (rv)
return rv;
if (offset == 0)
@@ -2139,7 +2139,7 @@ static ssize_t
backlog_store(struct mddev *mddev, const char *buf, size_t len)
{
unsigned long backlog;
- int rv = strict_strtoul(buf, 10, &backlog);
+ int rv = kstrtoul(buf, 10, &backlog);
if (rv)
return rv;
if (backlog > COUNTER_MAX)
@@ -2165,7 +2165,7 @@ chunksize_store(struct mddev *mddev, const char *buf, size_t len)
unsigned long csize;
if (mddev->bitmap)
return -EBUSY;
- rv = strict_strtoul(buf, 10, &csize);
+ rv = kstrtoul(buf, 10, &csize);
if (rv)
return rv;
if (csize < 512 ||
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 1d3fe1a40a9b..4880b69e2e9e 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -380,7 +380,7 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size)
static int validate_raid_redundancy(struct raid_set *rs)
{
unsigned i, rebuild_cnt = 0;
- unsigned rebuilds_per_group, copies, d;
+ unsigned rebuilds_per_group = 0, copies, d;
unsigned group_size, last_group_start;
for (i = 0; i < rs->md.raid_disks; i++)
@@ -504,7 +504,7 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
* First, parse the in-order required arguments
* "chunk_size" is the only argument of this type.
*/
- if ((strict_strtoul(argv[0], 10, &value) < 0)) {
+ if ((kstrtoul(argv[0], 10, &value) < 0)) {
rs->ti->error = "Bad chunk size";
return -EINVAL;
} else if (rs->raid_type->level == 1) {
@@ -585,7 +585,7 @@ static int parse_raid_params(struct raid_set *rs, char **argv,
continue;
}
- if (strict_strtoul(argv[i], 10, &value) < 0) {
+ if (kstrtoul(argv[i], 10, &value) < 0) {
rs->ti->error = "Bad numerical argument given in raid params";
return -EINVAL;
}
@@ -1181,7 +1181,7 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv)
argv++;
/* number of RAID parameters */
- if (strict_strtoul(argv[0], 10, &num_raid_params) < 0) {
+ if (kstrtoul(argv[0], 10, &num_raid_params) < 0) {
ti->error = "Cannot understand number of RAID parameters";
return -EINVAL;
}
@@ -1194,7 +1194,7 @@ static int raid_ctr(struct dm_target *ti, unsigned argc, char **argv)
return -EINVAL;
}
- if ((strict_strtoul(argv[num_raid_params], 10, &num_raid_devs) < 0) ||
+ if ((kstrtoul(argv[num_raid_params], 10, &num_raid_devs) < 0) ||
(num_raid_devs >= INT_MAX)) {
ti->error = "Cannot understand number of raid devices";
return -EINVAL;
@@ -1388,6 +1388,7 @@ static void raid_status(struct dm_target *ti, status_type_t type,
* performing a "check" of the array.
*/
DMEMIT(" %llu",
+ (strcmp(rs->md.last_sync_action, "check")) ? 0 :
(unsigned long long)
atomic64_read(&rs->md.resync_mismatches));
break;
@@ -1572,6 +1573,62 @@ static void raid_postsuspend(struct dm_target *ti)
mddev_suspend(&rs->md);
}
+static void attempt_restore_of_faulty_devices(struct raid_set *rs)
+{
+ int i;
+ uint64_t failed_devices, cleared_failed_devices = 0;
+ unsigned long flags;
+ struct dm_raid_superblock *sb;
+ struct md_rdev *r;
+
+ for (i = 0; i < rs->md.raid_disks; i++) {
+ r = &rs->dev[i].rdev;
+ if (test_bit(Faulty, &r->flags) && r->sb_page &&
+ sync_page_io(r, 0, r->sb_size, r->sb_page, READ, 1)) {
+ DMINFO("Faulty %s device #%d has readable super block."
+ " Attempting to revive it.",
+ rs->raid_type->name, i);
+
+ /*
+ * Faulty bit may be set, but sometimes the array can
+ * be suspended before the personalities can respond
+ * by removing the device from the array (i.e. calling
+ * 'hot_remove_disk'). If they haven't yet removed
+ * the failed device, its 'raid_disk' number will be
+ * '>= 0' - meaning we must call this function
+ * ourselves.
+ */
+ if ((r->raid_disk >= 0) &&
+ (r->mddev->pers->hot_remove_disk(r->mddev, r) != 0))
+ /* Failed to revive this device, try next */
+ continue;
+
+ r->raid_disk = i;
+ r->saved_raid_disk = i;
+ flags = r->flags;
+ clear_bit(Faulty, &r->flags);
+ clear_bit(WriteErrorSeen, &r->flags);
+ clear_bit(In_sync, &r->flags);
+ if (r->mddev->pers->hot_add_disk(r->mddev, r)) {
+ r->raid_disk = -1;
+ r->saved_raid_disk = -1;
+ r->flags = flags;
+ } else {
+ r->recovery_offset = 0;
+ cleared_failed_devices |= 1 << i;
+ }
+ }
+ }
+ if (cleared_failed_devices) {
+ rdev_for_each(r, &rs->md) {
+ sb = page_address(r->sb_page);
+ failed_devices = le64_to_cpu(sb->failed_devices);
+ failed_devices &= ~cleared_failed_devices;
+ sb->failed_devices = cpu_to_le64(failed_devices);
+ }
+ }
+}
+
static void raid_resume(struct dm_target *ti)
{
struct raid_set *rs = ti->private;
@@ -1580,6 +1637,13 @@ static void raid_resume(struct dm_target *ti)
if (!rs->bitmap_loaded) {
bitmap_load(&rs->md);
rs->bitmap_loaded = 1;
+ } else {
+ /*
+ * A secondary resume while the device is active.
+ * Take this opportunity to check whether any failed
+ * devices are reachable again.
+ */
+ attempt_restore_of_faulty_devices(rs);
}
clear_bit(MD_RECOVERY_FROZEN, &rs->md.recovery);
@@ -1588,7 +1652,7 @@ static void raid_resume(struct dm_target *ti)
static struct target_type raid_target = {
.name = "raid",
- .version = {1, 5, 0},
+ .version = {1, 5, 2},
.module = THIS_MODULE,
.ctr = raid_ctr,
.dtr = raid_dtr,
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 681d1099a2d5..dddc87bcf64a 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -521,6 +521,7 @@ void mddev_init(struct mddev *mddev)
init_waitqueue_head(&mddev->recovery_wait);
mddev->reshape_position = MaxSector;
mddev->reshape_backwards = 0;
+ mddev->last_sync_action = "none";
mddev->resync_min = 0;
mddev->resync_max = MaxSector;
mddev->level = LEVEL_NONE;
@@ -2867,7 +2868,7 @@ static ssize_t
offset_store(struct md_rdev *rdev, const char *buf, size_t len)
{
unsigned long long offset;
- if (strict_strtoull(buf, 10, &offset) < 0)
+ if (kstrtoull(buf, 10, &offset) < 0)
return -EINVAL;
if (rdev->mddev->pers && rdev->raid_disk >= 0)
return -EBUSY;
@@ -2895,7 +2896,7 @@ static ssize_t new_offset_store(struct md_rdev *rdev,
unsigned long long new_offset;
struct mddev *mddev = rdev->mddev;
- if (strict_strtoull(buf, 10, &new_offset) < 0)
+ if (kstrtoull(buf, 10, &new_offset) < 0)
return -EINVAL;
if (mddev->sync_thread)
@@ -2961,7 +2962,7 @@ static int strict_blocks_to_sectors(const char *buf, sector_t *sectors)
unsigned long long blocks;
sector_t new;
- if (strict_strtoull(buf, 10, &blocks) < 0)
+ if (kstrtoull(buf, 10, &blocks) < 0)
return -EINVAL;
if (blocks & 1ULL << (8 * sizeof(blocks) - 1))
@@ -3069,7 +3070,7 @@ static ssize_t recovery_start_store(struct md_rdev *rdev, const char *buf, size_
if (cmd_match(buf, "none"))
recovery_start = MaxSector;
- else if (strict_strtoull(buf, 10, &recovery_start))
+ else if (kstrtoull(buf, 10, &recovery_start))
return -EINVAL;
if (rdev->mddev->pers &&
@@ -3497,7 +3498,7 @@ level_store(struct mddev *mddev, const char *buf, size_t len)
if (clevel[len-1] == '\n')
len--;
clevel[len] = 0;
- if (strict_strtol(clevel, 10, &level))
+ if (kstrtol(clevel, 10, &level))
level = LEVEL_NONE;
if (request_module("md-%s", clevel) != 0)
@@ -4272,6 +4273,17 @@ action_store(struct mddev *mddev, const char *page, size_t len)
return len;
}
+static struct md_sysfs_entry md_scan_mode =
+__ATTR(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
+
+static ssize_t
+last_sync_action_show(struct mddev *mddev, char *page)
+{
+ return sprintf(page, "%s\n", mddev->last_sync_action);
+}
+
+static struct md_sysfs_entry md_last_scan_mode = __ATTR_RO(last_sync_action);
+
static ssize_t
mismatch_cnt_show(struct mddev *mddev, char *page)
{
@@ -4280,10 +4292,6 @@ mismatch_cnt_show(struct mddev *mddev, char *page)
atomic64_read(&mddev->resync_mismatches));
}
-static struct md_sysfs_entry md_scan_mode =
-__ATTR(sync_action, S_IRUGO|S_IWUSR, action_show, action_store);
-
-
static struct md_sysfs_entry md_mismatches = __ATTR_RO(mismatch_cnt);
static ssize_t
@@ -4356,7 +4364,7 @@ sync_force_parallel_store(struct mddev *mddev, const char *buf, size_t len)
{
long n;
- if (strict_strtol(buf, 10, &n))
+ if (kstrtol(buf, 10, &n))
return -EINVAL;
if (n != 0 && n != 1)
@@ -4424,7 +4432,7 @@ static ssize_t
min_sync_store(struct mddev *mddev, const char *buf, size_t len)
{
unsigned long long min;
- if (strict_strtoull(buf, 10, &min))
+ if (kstrtoull(buf, 10, &min))
return -EINVAL;
if (min > mddev->resync_max)
return -EINVAL;
@@ -4461,7 +4469,7 @@ max_sync_store(struct mddev *mddev, const char *buf, size_t len)
mddev->resync_max = MaxSector;
else {
unsigned long long max;
- if (strict_strtoull(buf, 10, &max))
+ if (kstrtoull(buf, 10, &max))
return -EINVAL;
if (max < mddev->resync_min)
return -EINVAL;
@@ -4686,6 +4694,7 @@ static struct attribute *md_default_attrs[] = {
static struct attribute *md_redundancy_attrs[] = {
&md_scan_mode.attr,
+ &md_last_scan_mode.attr,
&md_mismatches.attr,
&md_sync_min.attr,
&md_sync_max.attr,
@@ -5268,8 +5277,8 @@ static void md_clean(struct mddev *mddev)
static void __md_stop_writes(struct mddev *mddev)
{
+ set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
if (mddev->sync_thread) {
- set_bit(MD_RECOVERY_FROZEN, &mddev->recovery);
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
md_reap_sync_thread(mddev);
}
@@ -6405,6 +6414,12 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
/* need to ensure md_delayed_delete() has completed */
flush_workqueue(md_misc_wq);
+ if (cmd == HOT_REMOVE_DISK)
+ /* need to ensure recovery thread has run */
+ wait_event_interruptible_timeout(mddev->sb_wait,
+ !test_bit(MD_RECOVERY_NEEDED,
+ &mddev->flags),
+ msecs_to_jiffies(5000));
err = mddev_lock(mddev);
if (err) {
printk(KERN_INFO
@@ -7323,7 +7338,7 @@ void md_do_sync(struct md_thread *thread)
sector_t last_check;
int skipped = 0;
struct md_rdev *rdev;
- char *desc;
+ char *desc, *action = NULL;
struct blk_plug plug;
/* just incase thread restarts... */
@@ -7333,17 +7348,21 @@ void md_do_sync(struct md_thread *thread)
return;
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
- if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery))
+ if (test_bit(MD_RECOVERY_CHECK, &mddev->recovery)) {
desc = "data-check";
- else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery))
+ action = "check";
+ } else if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
desc = "requested-resync";
- else
+ action = "repair";
+ } else
desc = "resync";
} else if (test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
desc = "reshape";
else
desc = "recovery";
+ mddev->last_sync_action = action ?: desc;
+
/* we overload curr_resync somewhat here.
* 0 == not engaged in resync at all
* 2 == checking that there is no conflict with another sync
@@ -7892,6 +7911,8 @@ void md_check_recovery(struct mddev *mddev)
md_new_event(mddev);
}
unlock:
+ wake_up(&mddev->sb_wait);
+
if (!mddev->sync_thread) {
clear_bit(MD_RECOVERY_RUNNING, &mddev->recovery);
if (test_and_clear_bit(MD_RECOVERY_RECOVER,
diff --git a/drivers/md/md.h b/drivers/md/md.h
index 653f992b687a..20f02c0b5f2d 100644
--- a/drivers/md/md.h
+++ b/drivers/md/md.h
@@ -268,6 +268,14 @@ struct mddev {
struct md_thread *thread; /* management thread */
struct md_thread *sync_thread; /* doing resync or reconstruct */
+
+ /* 'last_sync_action' is initialized to "none". It is set when a
+ * sync operation (i.e "data-check", "requested-resync", "resync",
+ * "recovery", or "reshape") is started. It holds this value even
+ * when the sync thread is "frozen" (interrupted) or "idle" (stopped
+ * or finished). It is overwritten when a new sync operation is begun.
+ */
+ char *last_sync_action;
sector_t curr_resync; /* last block scheduled */
/* As resync requests can complete out of order, we cannot easily track
* how much resync has been completed. So we occasionally pause until
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index fcf65e512cf5..c4d420b7d2f4 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -597,6 +597,7 @@ static void *raid0_takeover_raid45(struct mddev *mddev)
mdname(mddev));
return ERR_PTR(-EINVAL);
}
+ rdev->sectors = mddev->dev_sectors;
}
/* Set new parameters */
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 55951182af73..ec734588a1c6 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -417,7 +417,17 @@ static void raid1_end_write_request(struct bio *bio, int error)
r1_bio->bios[mirror] = NULL;
to_put = bio;
- set_bit(R1BIO_Uptodate, &r1_bio->state);
+ /*
+ * Do not set R1BIO_Uptodate if the current device is
+ * rebuilding or Faulty. This is because we cannot use
+ * such device for properly reading the data back (we could
+ * potentially use it, if the current write would have felt
+ * before rdev->recovery_offset, but for simplicity we don't
+ * check this here.
+ */
+ if (test_bit(In_sync, &conf->mirrors[mirror].rdev->flags) &&
+ !test_bit(Faulty, &conf->mirrors[mirror].rdev->flags))
+ set_bit(R1BIO_Uptodate, &r1_bio->state);
/* Maybe we can clear some bad blocks. */
if (is_badblock(conf->mirrors[mirror].rdev,
@@ -870,17 +880,17 @@ static void allow_barrier(struct r1conf *conf)
wake_up(&conf->wait_barrier);
}
-static void freeze_array(struct r1conf *conf)
+static void freeze_array(struct r1conf *conf, int extra)
{
/* stop syncio and normal IO and wait for everything to
* go quite.
* We increment barrier and nr_waiting, and then
- * wait until nr_pending match nr_queued+1
+ * wait until nr_pending match nr_queued+extra
* This is called in the context of one normal IO request
* that has failed. Thus any sync request that might be pending
* will be blocked by nr_pending, and we need to wait for
* pending IO requests to complete or be queued for re-try.
- * Thus the number queued (nr_queued) plus this request (1)
+ * Thus the number queued (nr_queued) plus this request (extra)
* must match the number of pending IOs (nr_pending) before
* we continue.
*/
@@ -888,7 +898,7 @@ static void freeze_array(struct r1conf *conf)
conf->barrier++;
conf->nr_waiting++;
wait_event_lock_irq_cmd(conf->wait_barrier,
- conf->nr_pending == conf->nr_queued+1,
+ conf->nr_pending == conf->nr_queued+extra,
conf->resync_lock,
flush_pending_writes(conf));
spin_unlock_irq(&conf->resync_lock);
@@ -1509,8 +1519,9 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
p = conf->mirrors+mirror;
if (!p->rdev) {
- disk_stack_limits(mddev->gendisk, rdev->bdev,
- rdev->data_offset << 9);
+ if (mddev->gendisk)
+ disk_stack_limits(mddev->gendisk, rdev->bdev,
+ rdev->data_offset << 9);
p->head_position = 0;
rdev->raid_disk = mirror;
@@ -1544,12 +1555,12 @@ static int raid1_add_disk(struct mddev *mddev, struct md_rdev *rdev)
* we wait for all outstanding requests to complete.
*/
synchronize_sched();
- raise_barrier(conf);
- lower_barrier(conf);
+ freeze_array(conf, 0);
+ unfreeze_array(conf);
clear_bit(Unmerged, &rdev->flags);
}
md_integrity_add_rdev(rdev, mddev);
- if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
+ if (mddev->queue && blk_queue_discard(bdev_get_queue(rdev->bdev)))
queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
print_conf(conf);
return err;
@@ -1595,11 +1606,11 @@ static int raid1_remove_disk(struct mddev *mddev, struct md_rdev *rdev)
*/
struct md_rdev *repl =
conf->mirrors[conf->raid_disks + number].rdev;
- raise_barrier(conf);
+ freeze_array(conf, 0);
clear_bit(Replacement, &repl->flags);
p->rdev = repl;
conf->mirrors[conf->raid_disks + number].rdev = NULL;
- lower_barrier(conf);
+ unfreeze_array(conf);
clear_bit(WantReplacement, &rdev->flags);
} else
clear_bit(WantReplacement, &rdev->flags);
@@ -2195,7 +2206,7 @@ static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio)
* frozen
*/
if (mddev->ro == 0) {
- freeze_array(conf);
+ freeze_array(conf, 1);
fix_read_error(conf, r1_bio->read_disk,
r1_bio->sector, r1_bio->sectors);
unfreeze_array(conf);
@@ -2780,8 +2791,8 @@ static int run(struct mddev *mddev)
return PTR_ERR(conf);
if (mddev->queue)
- blk_queue_max_write_same_sectors(mddev->queue,
- mddev->chunk_sectors);
+ blk_queue_max_write_same_sectors(mddev->queue, 0);
+
rdev_for_each(rdev, mddev) {
if (!mddev->gendisk)
continue;
@@ -2963,7 +2974,7 @@ static int raid1_reshape(struct mddev *mddev)
return -ENOMEM;
}
- raise_barrier(conf);
+ freeze_array(conf, 0);
/* ok, everything is stopped */
oldpool = conf->r1bio_pool;
@@ -2994,7 +3005,7 @@ static int raid1_reshape(struct mddev *mddev)
conf->raid_disks = mddev->raid_disks = raid_disks;
mddev->delta_disks = 0;
- lower_barrier(conf);
+ unfreeze_array(conf);
set_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
md_wakeup_thread(mddev->thread);
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 59d4daa5f4c7..cd066b63bdaf 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -97,7 +97,7 @@ static int max_queued_requests = 1024;
static void allow_barrier(struct r10conf *conf);
static void lower_barrier(struct r10conf *conf);
-static int enough(struct r10conf *conf, int ignore);
+static int _enough(struct r10conf *conf, int previous, int ignore);
static sector_t reshape_request(struct mddev *mddev, sector_t sector_nr,
int *skipped);
static void reshape_request_write(struct mddev *mddev, struct r10bio *r10_bio);
@@ -392,11 +392,9 @@ static void raid10_end_read_request(struct bio *bio, int error)
* than fail the last device. Here we redefine
* "uptodate" to mean "Don't want to retry"
*/
- unsigned long flags;
- spin_lock_irqsave(&conf->device_lock, flags);
- if (!enough(conf, rdev->raid_disk))
+ if (!_enough(conf, test_bit(R10BIO_Previous, &r10_bio->state),
+ rdev->raid_disk))
uptodate = 1;
- spin_unlock_irqrestore(&conf->device_lock, flags);
}
if (uptodate) {
raid_end_bio_io(r10_bio);
@@ -490,7 +488,17 @@ static void raid10_end_write_request(struct bio *bio, int error)
sector_t first_bad;
int bad_sectors;
- set_bit(R10BIO_Uptodate, &r10_bio->state);
+ /*
+ * Do not set R10BIO_Uptodate if the current device is
+ * rebuilding or Faulty. This is because we cannot use
+ * such device for properly reading the data back (we could
+ * potentially use it, if the current write would have felt
+ * before rdev->recovery_offset, but for simplicity we don't
+ * check this here.
+ */
+ if (test_bit(In_sync, &rdev->flags) &&
+ !test_bit(Faulty, &rdev->flags))
+ set_bit(R10BIO_Uptodate, &r10_bio->state);
/* Maybe we can clear some bad blocks. */
if (is_badblock(rdev,
@@ -1055,17 +1063,17 @@ static void allow_barrier(struct r10conf *conf)
wake_up(&conf->wait_barrier);
}
-static void freeze_array(struct r10conf *conf)
+static void freeze_array(struct r10conf *conf, int extra)
{
/* stop syncio and normal IO and wait for everything to
* go quiet.
* We increment barrier and nr_waiting, and then
- * wait until nr_pending match nr_queued+1
+ * wait until nr_pending match nr_queued+extra
* This is called in the context of one normal IO request
* that has failed. Thus any sync request that might be pending
* will be blocked by nr_pending, and we need to wait for
* pending IO requests to complete or be queued for re-try.
- * Thus the number queued (nr_queued) plus this request (1)
+ * Thus the number queued (nr_queued) plus this request (extra)
* must match the number of pending IOs (nr_pending) before
* we continue.
*/
@@ -1073,7 +1081,7 @@ static void freeze_array(struct r10conf *conf)
conf->barrier++;
conf->nr_waiting++;
wait_event_lock_irq_cmd(conf->wait_barrier,
- conf->nr_pending == conf->nr_queued+1,
+ conf->nr_pending == conf->nr_queued+extra,
conf->resync_lock,
flush_pending_writes(conf));
@@ -1622,37 +1630,58 @@ static void status(struct seq_file *seq, struct mddev *mddev)
* Don't consider the device numbered 'ignore'
* as we might be about to remove it.
*/
-static int _enough(struct r10conf *conf, struct geom *geo, int ignore)
+static int _enough(struct r10conf *conf, int previous, int ignore)
{
int first = 0;
+ int has_enough = 0;
+ int disks, ncopies;
+ if (previous) {
+ disks = conf->prev.raid_disks;
+ ncopies = conf->prev.near_copies;
+ } else {
+ disks = conf->geo.raid_disks;
+ ncopies = conf->geo.near_copies;
+ }
+ rcu_read_lock();
do {
int n = conf->copies;
int cnt = 0;
int this = first;
while (n--) {
- if (conf->mirrors[this].rdev &&
- this != ignore)
+ struct md_rdev *rdev;
+ if (this != ignore &&
+ (rdev = rcu_dereference(conf->mirrors[this].rdev)) &&
+ test_bit(In_sync, &rdev->flags))
cnt++;
- this = (this+1) % geo->raid_disks;
+ this = (this+1) % disks;
}
if (cnt == 0)
- return 0;
- first = (first + geo->near_copies) % geo->raid_disks;
+ goto out;
+ first = (first + ncopies) % disks;
} while (first != 0);
- return 1;
+ has_enough = 1;
+out:
+ rcu_read_unlock();
+ return has_enough;
}
static int enough(struct r10conf *conf, int ignore)
{
- return _enough(conf, &conf->geo, ignore) &&
- _enough(conf, &conf->prev, ignore);
+ /* when calling 'enough', both 'prev' and 'geo' must
+ * be stable.
+ * This is ensured if ->reconfig_mutex or ->device_lock
+ * is held.
+ */
+ return _enough(conf, 0, ignore) &&
+ _enough(conf, 1, ignore);
}
static void error(struct mddev *mddev, struct md_rdev *rdev)
{
char b[BDEVNAME_SIZE];
struct r10conf *conf = mddev->private;
+ unsigned long flags;
/*
* If it is not operational, then we have already marked it as dead
@@ -1660,18 +1689,18 @@ static void error(struct mddev *mddev, struct md_rdev *rdev)
* next level up know.
* else mark the drive as failed
*/
+ spin_lock_irqsave(&conf->device_lock, flags);
if (test_bit(In_sync, &rdev->flags)
- && !enough(conf, rdev->raid_disk))
+ && !enough(conf, rdev->raid_disk)) {
/*
* Don't fail the drive, just return an IO error.
*/
+ spin_unlock_irqrestore(&conf->device_lock, flags);
return;
+ }
if (test_and_clear_bit(In_sync, &rdev->flags)) {
- unsigned long flags;
- spin_lock_irqsave(&conf->device_lock, flags);
mddev->degraded++;
- spin_unlock_irqrestore(&conf->device_lock, flags);
- /*
+ /*
* if recovery is running, make sure it aborts.
*/
set_bit(MD_RECOVERY_INTR, &mddev->recovery);
@@ -1679,6 +1708,7 @@ static void error(struct mddev *mddev, struct md_rdev *rdev)
set_bit(Blocked, &rdev->flags);
set_bit(Faulty, &rdev->flags);
set_bit(MD_CHANGE_DEVS, &mddev->flags);
+ spin_unlock_irqrestore(&conf->device_lock, flags);
printk(KERN_ALERT
"md/raid10:%s: Disk failure on %s, disabling device.\n"
"md/raid10:%s: Operation continuing on %d devices.\n",
@@ -1781,7 +1811,7 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
* very different from resync
*/
return -EBUSY;
- if (rdev->saved_raid_disk < 0 && !_enough(conf, &conf->prev, -1))
+ if (rdev->saved_raid_disk < 0 && !_enough(conf, 1, -1))
return -EINVAL;
if (rdev->raid_disk >= 0)
@@ -1809,15 +1839,17 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
set_bit(Replacement, &rdev->flags);
rdev->raid_disk = mirror;
err = 0;
- disk_stack_limits(mddev->gendisk, rdev->bdev,
- rdev->data_offset << 9);
+ if (mddev->gendisk)
+ disk_stack_limits(mddev->gendisk, rdev->bdev,
+ rdev->data_offset << 9);
conf->fullsync = 1;
rcu_assign_pointer(p->replacement, rdev);
break;
}
- disk_stack_limits(mddev->gendisk, rdev->bdev,
- rdev->data_offset << 9);
+ if (mddev->gendisk)
+ disk_stack_limits(mddev->gendisk, rdev->bdev,
+ rdev->data_offset << 9);
p->head_position = 0;
p->recovery_disabled = mddev->recovery_disabled - 1;
@@ -1837,8 +1869,8 @@ static int raid10_add_disk(struct mddev *mddev, struct md_rdev *rdev)
* we wait for all outstanding requests to complete.
*/
synchronize_sched();
- raise_barrier(conf, 0);
- lower_barrier(conf);
+ freeze_array(conf, 0);
+ unfreeze_array(conf);
clear_bit(Unmerged, &rdev->flags);
}
md_integrity_add_rdev(rdev, mddev);
@@ -2612,7 +2644,7 @@ static void handle_read_error(struct mddev *mddev, struct r10bio *r10_bio)
r10_bio->devs[slot].bio = NULL;
if (mddev->ro == 0) {
- freeze_array(conf);
+ freeze_array(conf, 1);
fix_read_error(conf, mddev, r10_bio);
unfreeze_array(conf);
} else
@@ -2899,14 +2931,13 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
*/
if (mddev->bitmap == NULL &&
mddev->recovery_cp == MaxSector &&
+ mddev->reshape_position == MaxSector &&
+ !test_bit(MD_RECOVERY_SYNC, &mddev->recovery) &&
!test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery) &&
+ !test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery) &&
conf->fullsync == 0) {
*skipped = 1;
- max_sector = mddev->dev_sectors;
- if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery) ||
- test_bit(MD_RECOVERY_RESHAPE, &mddev->recovery))
- max_sector = mddev->resync_max_sectors;
- return max_sector - sector_nr;
+ return mddev->dev_sectors - sector_nr;
}
skipped:
@@ -3522,7 +3553,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
/* FIXME calc properly */
conf->mirrors = kzalloc(sizeof(struct raid10_info)*(mddev->raid_disks +
- max(0,mddev->delta_disks)),
+ max(0,-mddev->delta_disks)),
GFP_KERNEL);
if (!conf->mirrors)
goto out;
@@ -3609,8 +3640,7 @@ static int run(struct mddev *mddev)
if (mddev->queue) {
blk_queue_max_discard_sectors(mddev->queue,
mddev->chunk_sectors);
- blk_queue_max_write_same_sectors(mddev->queue,
- mddev->chunk_sectors);
+ blk_queue_max_write_same_sectors(mddev->queue, 0);
blk_queue_io_min(mddev->queue, chunk_size);
if (conf->geo.raid_disks % conf->geo.near_copies)
blk_queue_io_opt(mddev->queue, chunk_size * conf->geo.raid_disks);
@@ -3682,7 +3712,7 @@ static int run(struct mddev *mddev)
conf->geo.far_offset == 0)
goto out_free_conf;
if (conf->prev.far_copies != 1 &&
- conf->geo.far_offset == 0)
+ conf->prev.far_offset == 0)
goto out_free_conf;
}
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 9359828ffe26..2bf094a587cb 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -664,6 +664,7 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
if (test_bit(R5_ReadNoMerge, &sh->dev[i].flags))
bi->bi_rw |= REQ_FLUSH;
+ bi->bi_vcnt = 1;
bi->bi_io_vec[0].bv_len = STRIPE_SIZE;
bi->bi_io_vec[0].bv_offset = 0;
bi->bi_size = STRIPE_SIZE;
@@ -701,6 +702,7 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
else
rbi->bi_sector = (sh->sector
+ rrdev->data_offset);
+ rbi->bi_vcnt = 1;
rbi->bi_io_vec[0].bv_len = STRIPE_SIZE;
rbi->bi_io_vec[0].bv_offset = 0;
rbi->bi_size = STRIPE_SIZE;
@@ -4922,7 +4924,7 @@ raid5_store_stripe_cache_size(struct mddev *mddev, const char *page, size_t len)
if (!conf)
return -ENODEV;
- if (strict_strtoul(page, 10, &new))
+ if (kstrtoul(page, 10, &new))
return -EINVAL;
err = raid5_set_cache_size(mddev, new);
if (err)
@@ -4955,7 +4957,7 @@ raid5_store_preread_threshold(struct mddev *mddev, const char *page, size_t len)
if (!conf)
return -ENODEV;
- if (strict_strtoul(page, 10, &new))
+ if (kstrtoul(page, 10, &new))
return -EINVAL;
if (new > conf->max_nr_stripes)
return -EINVAL;
@@ -5464,7 +5466,7 @@ static int run(struct mddev *mddev)
if (mddev->major_version == 0 &&
mddev->minor_version > 90)
rdev->recovery_offset = reshape_offset;
-
+
if (rdev->recovery_offset < reshape_offset) {
/* We need to check old and new layout */
if (!only_parity(rdev->raid_disk,
@@ -5587,6 +5589,8 @@ static int run(struct mddev *mddev)
*/
mddev->queue->limits.discard_zeroes_data = 0;
+ blk_queue_max_write_same_sectors(mddev->queue, 0);
+
rdev_for_each(rdev, mddev) {
disk_stack_limits(mddev->gendisk, rdev->bdev,
rdev->data_offset << 9);
@@ -5910,7 +5914,7 @@ static int check_reshape(struct mddev *mddev)
return 0; /* nothing to do */
if (has_failed(conf))
return -EINVAL;
- if (mddev->delta_disks < 0) {
+ if (mddev->delta_disks < 0 && mddev->reshape_position == MaxSector) {
/* We might be able to shrink, but the devices must
* be made bigger first.
* For raid6, 4 is the minimum size.