summaryrefslogtreecommitdiff
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/bitmap.c3
-rw-r--r--drivers/md/dm-cache-metadata.c2
-rw-r--r--drivers/md/dm-cache-policy-cleaner.c2
-rw-r--r--drivers/md/dm-crypt.c17
-rw-r--r--drivers/md/dm-exception-store.c6
-rw-r--r--drivers/md/dm-exception-store.h5
-rw-r--r--drivers/md/dm-raid.c3
-rw-r--r--drivers/md/dm-snap-persistent.c30
-rw-r--r--drivers/md/dm-snap-transient.c3
-rw-r--r--drivers/md/dm-snap.c14
-rw-r--r--drivers/md/dm-thin.c6
-rw-r--r--drivers/md/dm.c11
-rw-r--r--drivers/md/md.c8
-rw-r--r--drivers/md/multipath.c3
-rw-r--r--drivers/md/persistent-data/dm-btree-remove.c17
-rw-r--r--drivers/md/persistent-data/dm-btree.c2
-rw-r--r--drivers/md/raid0.c12
-rw-r--r--drivers/md/raid1.c28
-rw-r--r--drivers/md/raid10.c52
-rw-r--r--drivers/md/raid5.c17
20 files changed, 159 insertions, 82 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index e51de52eeb94..48b5890c28e3 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -1997,7 +1997,8 @@ int bitmap_resize(struct bitmap *bitmap, sector_t blocks,
if (bitmap->mddev->bitmap_info.offset || bitmap->mddev->bitmap_info.file)
ret = bitmap_storage_alloc(&store, chunks,
!bitmap->mddev->bitmap_info.external,
- bitmap->cluster_slot);
+ mddev_is_clustered(bitmap->mddev)
+ ? bitmap->cluster_slot : 0);
if (ret)
goto err;
diff --git a/drivers/md/dm-cache-metadata.c b/drivers/md/dm-cache-metadata.c
index 20cc36b01b77..0a17d1b91a81 100644
--- a/drivers/md/dm-cache-metadata.c
+++ b/drivers/md/dm-cache-metadata.c
@@ -634,10 +634,10 @@ static int __commit_transaction(struct dm_cache_metadata *cmd,
disk_super = dm_block_data(sblock);
+ disk_super->flags = cpu_to_le32(cmd->flags);
if (mutator)
update_flags(disk_super, mutator);
- disk_super->flags = cpu_to_le32(cmd->flags);
disk_super->mapping_root = cpu_to_le64(cmd->root);
disk_super->hint_root = cpu_to_le64(cmd->hint_root);
disk_super->discard_root = cpu_to_le64(cmd->discard_root);
diff --git a/drivers/md/dm-cache-policy-cleaner.c b/drivers/md/dm-cache-policy-cleaner.c
index 240c9f0e85e7..8a096456579b 100644
--- a/drivers/md/dm-cache-policy-cleaner.c
+++ b/drivers/md/dm-cache-policy-cleaner.c
@@ -436,7 +436,7 @@ static struct dm_cache_policy *wb_create(dm_cblock_t cache_size,
static struct dm_cache_policy_type wb_policy_type = {
.name = "cleaner",
.version = {1, 0, 0},
- .hint_size = 0,
+ .hint_size = 4,
.owner = THIS_MODULE,
.create = wb_create
};
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index d60c88df5234..4b3b6f8aff0c 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -968,7 +968,8 @@ static void crypt_free_buffer_pages(struct crypt_config *cc, struct bio *clone);
/*
* Generate a new unfragmented bio with the given size
- * This should never violate the device limitations
+ * This should never violate the device limitations (but only because
+ * max_segment_size is being constrained to PAGE_SIZE).
*
* This function may be called concurrently. If we allocate from the mempool
* concurrently, there is a possibility of deadlock. For example, if we have
@@ -2045,9 +2046,20 @@ static int crypt_iterate_devices(struct dm_target *ti,
return fn(ti, cc->dev, cc->start, ti->len, data);
}
+static void crypt_io_hints(struct dm_target *ti, struct queue_limits *limits)
+{
+ /*
+ * Unfortunate constraint that is required to avoid the potential
+ * for exceeding underlying device's max_segments limits -- due to
+ * crypt_alloc_buffer() possibly allocating pages for the encryption
+ * bio that are not as physically contiguous as the original bio.
+ */
+ limits->max_segment_size = PAGE_SIZE;
+}
+
static struct target_type crypt_target = {
.name = "crypt",
- .version = {1, 14, 0},
+ .version = {1, 14, 1},
.module = THIS_MODULE,
.ctr = crypt_ctr,
.dtr = crypt_dtr,
@@ -2058,6 +2070,7 @@ static struct target_type crypt_target = {
.resume = crypt_resume,
.message = crypt_message,
.iterate_devices = crypt_iterate_devices,
+ .io_hints = crypt_io_hints,
};
static int __init dm_crypt_init(void)
diff --git a/drivers/md/dm-exception-store.c b/drivers/md/dm-exception-store.c
index ebaa4f803eec..192bb8beeb6b 100644
--- a/drivers/md/dm-exception-store.c
+++ b/drivers/md/dm-exception-store.c
@@ -203,7 +203,7 @@ int dm_exception_store_create(struct dm_target *ti, int argc, char **argv,
return -EINVAL;
}
- tmp_store = kmalloc(sizeof(*tmp_store), GFP_KERNEL);
+ tmp_store = kzalloc(sizeof(*tmp_store), GFP_KERNEL);
if (!tmp_store) {
ti->error = "Exception store allocation failed";
return -ENOMEM;
@@ -215,7 +215,7 @@ int dm_exception_store_create(struct dm_target *ti, int argc, char **argv,
else if (persistent == 'N')
type = get_type("N");
else {
- ti->error = "Persistent flag is not P or N";
+ ti->error = "Exception store type is not P or N";
r = -EINVAL;
goto bad_type;
}
@@ -233,7 +233,7 @@ int dm_exception_store_create(struct dm_target *ti, int argc, char **argv,
if (r)
goto bad;
- r = type->ctr(tmp_store, 0, NULL);
+ r = type->ctr(tmp_store, (strlen(argv[0]) > 1 ? &argv[0][1] : NULL));
if (r) {
ti->error = "Exception store type constructor failed";
goto bad;
diff --git a/drivers/md/dm-exception-store.h b/drivers/md/dm-exception-store.h
index 0b2536247cf5..fae34e7a0b1e 100644
--- a/drivers/md/dm-exception-store.h
+++ b/drivers/md/dm-exception-store.h
@@ -42,8 +42,7 @@ struct dm_exception_store_type {
const char *name;
struct module *module;
- int (*ctr) (struct dm_exception_store *store,
- unsigned argc, char **argv);
+ int (*ctr) (struct dm_exception_store *store, char *options);
/*
* Destroys this object when you've finished with it.
@@ -123,6 +122,8 @@ struct dm_exception_store {
unsigned chunk_shift;
void *context;
+
+ bool userspace_supports_overflow;
};
/*
diff --git a/drivers/md/dm-raid.c b/drivers/md/dm-raid.c
index 97e165183e79..a0901214aef5 100644
--- a/drivers/md/dm-raid.c
+++ b/drivers/md/dm-raid.c
@@ -329,8 +329,7 @@ static int validate_region_size(struct raid_set *rs, unsigned long region_size)
*/
if (min_region_size > (1 << 13)) {
/* If not a power of 2, make it the next power of 2 */
- if (min_region_size & (min_region_size - 1))
- region_size = 1 << fls(region_size);
+ region_size = roundup_pow_of_two(min_region_size);
DMINFO("Choosing default region size of %lu sectors",
region_size);
} else {
diff --git a/drivers/md/dm-snap-persistent.c b/drivers/md/dm-snap-persistent.c
index bf71583296f7..117a05e40090 100644
--- a/drivers/md/dm-snap-persistent.c
+++ b/drivers/md/dm-snap-persistent.c
@@ -7,6 +7,7 @@
#include "dm-exception-store.h"
+#include <linux/ctype.h>
#include <linux/mm.h>
#include <linux/pagemap.h>
#include <linux/vmalloc.h>
@@ -843,10 +844,10 @@ static void persistent_drop_snapshot(struct dm_exception_store *store)
DMWARN("write header failed");
}
-static int persistent_ctr(struct dm_exception_store *store,
- unsigned argc, char **argv)
+static int persistent_ctr(struct dm_exception_store *store, char *options)
{
struct pstore *ps;
+ int r;
/* allocate the pstore */
ps = kzalloc(sizeof(*ps), GFP_KERNEL);
@@ -868,14 +869,32 @@ static int persistent_ctr(struct dm_exception_store *store,
ps->metadata_wq = alloc_workqueue("ksnaphd", WQ_MEM_RECLAIM, 0);
if (!ps->metadata_wq) {
- kfree(ps);
DMERR("couldn't start header metadata update thread");
- return -ENOMEM;
+ r = -ENOMEM;
+ goto err_workqueue;
+ }
+
+ if (options) {
+ char overflow = toupper(options[0]);
+ if (overflow == 'O')
+ store->userspace_supports_overflow = true;
+ else {
+ DMERR("Unsupported persistent store option: %s", options);
+ r = -EINVAL;
+ goto err_options;
+ }
}
store->context = ps;
return 0;
+
+err_options:
+ destroy_workqueue(ps->metadata_wq);
+err_workqueue:
+ kfree(ps);
+
+ return r;
}
static unsigned persistent_status(struct dm_exception_store *store,
@@ -888,7 +907,8 @@ static unsigned persistent_status(struct dm_exception_store *store,
case STATUSTYPE_INFO:
break;
case STATUSTYPE_TABLE:
- DMEMIT(" P %llu", (unsigned long long)store->chunk_size);
+ DMEMIT(" %s %llu", store->userspace_supports_overflow ? "PO" : "P",
+ (unsigned long long)store->chunk_size);
}
return sz;
diff --git a/drivers/md/dm-snap-transient.c b/drivers/md/dm-snap-transient.c
index 1ce9a2586e41..9b7c8c8049d6 100644
--- a/drivers/md/dm-snap-transient.c
+++ b/drivers/md/dm-snap-transient.c
@@ -70,8 +70,7 @@ static void transient_usage(struct dm_exception_store *store,
*metadata_sectors = 0;
}
-static int transient_ctr(struct dm_exception_store *store,
- unsigned argc, char **argv)
+static int transient_ctr(struct dm_exception_store *store, char *options)
{
struct transient_c *tc;
diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c
index c0bcd6516dfe..c06b74e91cd6 100644
--- a/drivers/md/dm-snap.c
+++ b/drivers/md/dm-snap.c
@@ -1098,7 +1098,7 @@ static void stop_merge(struct dm_snapshot *s)
}
/*
- * Construct a snapshot mapping: <origin_dev> <COW-dev> <p/n> <chunk-size>
+ * Construct a snapshot mapping: <origin_dev> <COW-dev> <p|po|n> <chunk-size>
*/
static int snapshot_ctr(struct dm_target *ti, unsigned int argc, char **argv)
{
@@ -1302,6 +1302,7 @@ static void __handover_exceptions(struct dm_snapshot *snap_src,
u.store_swap = snap_dest->store;
snap_dest->store = snap_src->store;
+ snap_dest->store->userspace_supports_overflow = u.store_swap->userspace_supports_overflow;
snap_src->store = u.store_swap;
snap_dest->store->snap = snap_dest;
@@ -1739,8 +1740,11 @@ static int snapshot_map(struct dm_target *ti, struct bio *bio)
pe = __find_pending_exception(s, pe, chunk);
if (!pe) {
- s->snapshot_overflowed = 1;
- DMERR("Snapshot overflowed: Unable to allocate exception.");
+ if (s->store->userspace_supports_overflow) {
+ s->snapshot_overflowed = 1;
+ DMERR("Snapshot overflowed: Unable to allocate exception.");
+ } else
+ __invalidate_snapshot(s, -ENOMEM);
r = -EIO;
goto out_unlock;
}
@@ -2365,7 +2369,7 @@ static struct target_type origin_target = {
static struct target_type snapshot_target = {
.name = "snapshot",
- .version = {1, 14, 0},
+ .version = {1, 15, 0},
.module = THIS_MODULE,
.ctr = snapshot_ctr,
.dtr = snapshot_dtr,
@@ -2379,7 +2383,7 @@ static struct target_type snapshot_target = {
static struct target_type merge_target = {
.name = dm_snapshot_merge_target_name,
- .version = {1, 3, 0},
+ .version = {1, 4, 0},
.module = THIS_MODULE,
.ctr = snapshot_ctr,
.dtr = snapshot_dtr,
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 6578b7bc1fbb..3897b90bd462 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -3201,7 +3201,7 @@ static int pool_ctr(struct dm_target *ti, unsigned argc, char **argv)
metadata_low_callback,
pool);
if (r)
- goto out_free_pt;
+ goto out_flags_changed;
pt->callbacks.congested_fn = pool_is_congested;
dm_table_add_target_callbacks(ti->table, &pt->callbacks);
@@ -4249,6 +4249,10 @@ static void thin_io_hints(struct dm_target *ti, struct queue_limits *limits)
{
struct thin_c *tc = ti->private;
struct pool *pool = tc->pool;
+ struct queue_limits *pool_limits = dm_get_queue_limits(pool->pool_md);
+
+ if (!pool_limits->discard_granularity)
+ return; /* pool's discard support is disabled */
limits->discard_granularity = pool->sectors_per_block << SECTOR_SHIFT;
limits->max_discard_sectors = 2048 * 1024 * 16; /* 16G */
diff --git a/drivers/md/dm.c b/drivers/md/dm.c
index 6264781dc69a..1b5c6047e4f1 100644
--- a/drivers/md/dm.c
+++ b/drivers/md/dm.c
@@ -1001,6 +1001,7 @@ static void end_clone_bio(struct bio *clone)
struct dm_rq_target_io *tio = info->tio;
struct bio *bio = info->orig;
unsigned int nr_bytes = info->orig->bi_iter.bi_size;
+ int error = clone->bi_error;
bio_put(clone);
@@ -1011,13 +1012,13 @@ static void end_clone_bio(struct bio *clone)
* the remainder.
*/
return;
- else if (bio->bi_error) {
+ else if (error) {
/*
* Don't notice the error to the upper layer yet.
* The error handling decision is made by the target driver,
* when the request is completed.
*/
- tio->error = bio->bi_error;
+ tio->error = error;
return;
}
@@ -2837,8 +2838,6 @@ static void __dm_destroy(struct mapped_device *md, bool wait)
might_sleep();
- map = dm_get_live_table(md, &srcu_idx);
-
spin_lock(&_minor_lock);
idr_replace(&_minor_idr, MINOR_ALLOCED, MINOR(disk_devt(dm_disk(md))));
set_bit(DMF_FREEING, &md->flags);
@@ -2852,14 +2851,14 @@ static void __dm_destroy(struct mapped_device *md, bool wait)
* do not race with internal suspend.
*/
mutex_lock(&md->suspend_lock);
+ map = dm_get_live_table(md, &srcu_idx);
if (!dm_suspended_md(md)) {
dm_table_presuspend_targets(map);
dm_table_postsuspend_targets(map);
}
- mutex_unlock(&md->suspend_lock);
-
/* dm_put_live_table must be before msleep, otherwise deadlock is possible */
dm_put_live_table(md, srcu_idx);
+ mutex_unlock(&md->suspend_lock);
/*
* Rare, but there may be I/O requests still going to complete,
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 4f5ecbe94ccb..3fe3d04a968a 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -5409,9 +5409,13 @@ static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
* which will now never happen */
wake_up_process(mddev->sync_thread->tsk);
+ if (mddev->external && test_bit(MD_CHANGE_PENDING, &mddev->flags))
+ return -EBUSY;
mddev_unlock(mddev);
wait_event(resync_wait, !test_bit(MD_RECOVERY_RUNNING,
&mddev->recovery));
+ wait_event(mddev->sb_wait,
+ !test_bit(MD_CHANGE_PENDING, &mddev->flags));
mddev_lock_nointr(mddev);
mutex_lock(&mddev->open_mutex);
@@ -8036,8 +8040,7 @@ static int remove_and_add_spares(struct mddev *mddev,
!test_bit(Bitmap_sync, &rdev->flags)))
continue;
- if (rdev->saved_raid_disk < 0)
- rdev->recovery_offset = 0;
+ rdev->recovery_offset = 0;
if (mddev->pers->
hot_add_disk(mddev, rdev) == 0) {
if (sysfs_link_rdev(mddev, rdev))
@@ -8160,6 +8163,7 @@ void md_check_recovery(struct mddev *mddev)
md_reap_sync_thread(mddev);
clear_bit(MD_RECOVERY_RECOVER, &mddev->recovery);
clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
+ clear_bit(MD_CHANGE_PENDING, &mddev->flags);
goto unlock;
}
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index d222522c52e0..d132f06afdd1 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -470,8 +470,7 @@ static int multipath_run (struct mddev *mddev)
return 0;
out_free_conf:
- if (conf->pool)
- mempool_destroy(conf->pool);
+ mempool_destroy(conf->pool);
kfree(conf->multipaths);
kfree(conf);
mddev->private = NULL;
diff --git a/drivers/md/persistent-data/dm-btree-remove.c b/drivers/md/persistent-data/dm-btree-remove.c
index 421a36c593e3..2e4c4cb79e4d 100644
--- a/drivers/md/persistent-data/dm-btree-remove.c
+++ b/drivers/md/persistent-data/dm-btree-remove.c
@@ -301,11 +301,16 @@ static void redistribute3(struct dm_btree_info *info, struct btree_node *parent,
{
int s;
uint32_t max_entries = le32_to_cpu(left->header.max_entries);
- unsigned target = (nr_left + nr_center + nr_right) / 3;
- BUG_ON(target > max_entries);
+ unsigned total = nr_left + nr_center + nr_right;
+ unsigned target_right = total / 3;
+ unsigned remainder = (target_right * 3) != total;
+ unsigned target_left = target_right + remainder;
+
+ BUG_ON(target_left > max_entries);
+ BUG_ON(target_right > max_entries);
if (nr_left < nr_right) {
- s = nr_left - target;
+ s = nr_left - target_left;
if (s < 0 && nr_center < -s) {
/* not enough in central node */
@@ -316,10 +321,10 @@ static void redistribute3(struct dm_btree_info *info, struct btree_node *parent,
} else
shift(left, center, s);
- shift(center, right, target - nr_right);
+ shift(center, right, target_right - nr_right);
} else {
- s = target - nr_right;
+ s = target_right - nr_right;
if (s > 0 && nr_center < s) {
/* not enough in central node */
shift(center, right, nr_center);
@@ -329,7 +334,7 @@ static void redistribute3(struct dm_btree_info *info, struct btree_node *parent,
} else
shift(center, right, s);
- shift(left, center, nr_left - target);
+ shift(left, center, nr_left - target_left);
}
*key_ptr(parent, c->index) = center->keys[0];
diff --git a/drivers/md/persistent-data/dm-btree.c b/drivers/md/persistent-data/dm-btree.c
index b6cec258cc21..0e09aef43998 100644
--- a/drivers/md/persistent-data/dm-btree.c
+++ b/drivers/md/persistent-data/dm-btree.c
@@ -523,7 +523,7 @@ static int btree_split_beneath(struct shadow_spine *s, uint64_t key)
r = new_block(s->info, &right);
if (r < 0) {
- /* FIXME: put left */
+ unlock_block(s->info, left);
return r;
}
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 63e619b2f44e..f8e5db0cb5aa 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -376,12 +376,6 @@ static int raid0_run(struct mddev *mddev)
struct md_rdev *rdev;
bool discard_supported = false;
- rdev_for_each(rdev, mddev) {
- disk_stack_limits(mddev->gendisk, rdev->bdev,
- rdev->data_offset << 9);
- if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
- discard_supported = true;
- }
blk_queue_max_hw_sectors(mddev->queue, mddev->chunk_sectors);
blk_queue_max_write_same_sectors(mddev->queue, mddev->chunk_sectors);
blk_queue_max_discard_sectors(mddev->queue, mddev->chunk_sectors);
@@ -390,6 +384,12 @@ static int raid0_run(struct mddev *mddev)
blk_queue_io_opt(mddev->queue,
(mddev->chunk_sectors << 9) * mddev->raid_disks);
+ rdev_for_each(rdev, mddev) {
+ disk_stack_limits(mddev->gendisk, rdev->bdev,
+ rdev->data_offset << 9);
+ if (blk_queue_discard(bdev_get_queue(rdev->bdev)))
+ discard_supported = true;
+ }
if (!discard_supported)
queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, mddev->queue);
else
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 4517f06c41ba..d9d031ede4bf 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -881,8 +881,7 @@ static sector_t wait_barrier(struct r1conf *conf, struct bio *bio)
}
if (bio && bio_data_dir(bio) == WRITE) {
- if (bio->bi_iter.bi_sector >=
- conf->mddev->curr_resync_completed) {
+ if (bio->bi_iter.bi_sector >= conf->next_resync) {
if (conf->start_next_window == MaxSector)
conf->start_next_window =
conf->next_resync +
@@ -1516,7 +1515,7 @@ static void close_sync(struct r1conf *conf)
conf->r1buf_pool = NULL;
spin_lock_irq(&conf->resync_lock);
- conf->next_resync = 0;
+ conf->next_resync = MaxSector - 2 * NEXT_NORMALIO_DISTANCE;
conf->start_next_window = MaxSector;
conf->current_window_requests +=
conf->next_window_requests;
@@ -2196,7 +2195,7 @@ static int narrow_write_error(struct r1bio *r1_bio, int i)
bio_trim(wbio, sector - r1_bio->sector, sectors);
wbio->bi_iter.bi_sector += rdev->data_offset;
wbio->bi_bdev = rdev->bdev;
- if (submit_bio_wait(WRITE, wbio) == 0)
+ if (submit_bio_wait(WRITE, wbio) < 0)
/* failure! */
ok = rdev_set_badblocks(rdev, sector,
sectors, 0)
@@ -2259,15 +2258,16 @@ static void handle_write_finished(struct r1conf *conf, struct r1bio *r1_bio)
rdev_dec_pending(conf->mirrors[m].rdev,
conf->mddev);
}
- if (test_bit(R1BIO_WriteError, &r1_bio->state))
- close_write(r1_bio);
if (fail) {
spin_lock_irq(&conf->device_lock);
list_add(&r1_bio->retry_list, &conf->bio_end_io_list);
spin_unlock_irq(&conf->device_lock);
md_wakeup_thread(conf->mddev->thread);
- } else
+ } else {
+ if (test_bit(R1BIO_WriteError, &r1_bio->state))
+ close_write(r1_bio);
raid_end_bio_io(r1_bio);
+ }
}
static void handle_read_error(struct r1conf *conf, struct r1bio *r1_bio)
@@ -2383,9 +2383,13 @@ static void raid1d(struct md_thread *thread)
}
spin_unlock_irqrestore(&conf->device_lock, flags);
while (!list_empty(&tmp)) {
- r1_bio = list_first_entry(&conf->bio_end_io_list,
- struct r1bio, retry_list);
+ r1_bio = list_first_entry(&tmp, struct r1bio,
+ retry_list);
list_del(&r1_bio->retry_list);
+ if (mddev->degraded)
+ set_bit(R1BIO_Degraded, &r1_bio->state);
+ if (test_bit(R1BIO_WriteError, &r1_bio->state))
+ close_write(r1_bio);
raid_end_bio_io(r1_bio);
}
}
@@ -2843,8 +2847,7 @@ static struct r1conf *setup_conf(struct mddev *mddev)
abort:
if (conf) {
- if (conf->r1bio_pool)
- mempool_destroy(conf->r1bio_pool);
+ mempool_destroy(conf->r1bio_pool);
kfree(conf->mirrors);
safe_put_page(conf->tmppage);
kfree(conf->poolinfo);
@@ -2946,8 +2949,7 @@ static void raid1_free(struct mddev *mddev, void *priv)
{
struct r1conf *conf = priv;
- if (conf->r1bio_pool)
- mempool_destroy(conf->r1bio_pool);
+ mempool_destroy(conf->r1bio_pool);
kfree(conf->mirrors);
safe_put_page(conf->tmppage);
kfree(conf->poolinfo);
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 0fc33eb88855..96f365968306 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -39,6 +39,7 @@
* far_copies (stored in second byte of layout)
* far_offset (stored in bit 16 of layout )
* use_far_sets (stored in bit 17 of layout )
+ * use_far_sets_bugfixed (stored in bit 18 of layout )
*
* The data to be stored is divided into chunks using chunksize. Each device
* is divided into far_copies sections. In each section, chunks are laid out
@@ -1497,6 +1498,8 @@ static void status(struct seq_file *seq, struct mddev *mddev)
seq_printf(seq, " %d offset-copies", conf->geo.far_copies);
else
seq_printf(seq, " %d far-copies", conf->geo.far_copies);
+ if (conf->geo.far_set_size != conf->geo.raid_disks)
+ seq_printf(seq, " %d devices per set", conf->geo.far_set_size);
}
seq_printf(seq, " [%d/%d] [", conf->geo.raid_disks,
conf->geo.raid_disks - mddev->degraded);
@@ -2467,7 +2470,7 @@ static int narrow_write_error(struct r10bio *r10_bio, int i)
choose_data_offset(r10_bio, rdev) +
(sector - r10_bio->sector));
wbio->bi_bdev = rdev->bdev;
- if (submit_bio_wait(WRITE, wbio) == 0)
+ if (submit_bio_wait(WRITE, wbio) < 0)
/* Failure! */
ok = rdev_set_badblocks(rdev, sector,
sectors, 0)
@@ -2654,16 +2657,17 @@ static void handle_write_completed(struct r10conf *conf, struct r10bio *r10_bio)
rdev_dec_pending(rdev, conf->mddev);
}
}
- if (test_bit(R10BIO_WriteError,
- &r10_bio->state))
- close_write(r10_bio);
if (fail) {
spin_lock_irq(&conf->device_lock);
list_add(&r10_bio->retry_list, &conf->bio_end_io_list);
spin_unlock_irq(&conf->device_lock);
md_wakeup_thread(conf->mddev->thread);
- } else
+ } else {
+ if (test_bit(R10BIO_WriteError,
+ &r10_bio->state))
+ close_write(r10_bio);
raid_end_bio_io(r10_bio);
+ }
}
}
@@ -2688,9 +2692,15 @@ static void raid10d(struct md_thread *thread)
}
spin_unlock_irqrestore(&conf->device_lock, flags);
while (!list_empty(&tmp)) {
- r10_bio = list_first_entry(&conf->bio_end_io_list,
- struct r10bio, retry_list);
+ r10_bio = list_first_entry(&tmp, struct r10bio,
+ retry_list);
list_del(&r10_bio->retry_list);
+ if (mddev->degraded)
+ set_bit(R10BIO_Degraded, &r10_bio->state);
+
+ if (test_bit(R10BIO_WriteError,
+ &r10_bio->state))
+ close_write(r10_bio);
raid_end_bio_io(r10_bio);
}
}
@@ -3387,7 +3397,7 @@ static int setup_geo(struct geom *geo, struct mddev *mddev, enum geo_type new)
disks = mddev->raid_disks + mddev->delta_disks;
break;
}
- if (layout >> 18)
+ if (layout >> 19)
return -1;
if (chunk < (PAGE_SIZE >> 9) ||
!is_power_of_2(chunk))
@@ -3399,7 +3409,22 @@ static int setup_geo(struct geom *geo, struct mddev *mddev, enum geo_type new)
geo->near_copies = nc;
geo->far_copies = fc;
geo->far_offset = fo;
- geo->far_set_size = (layout & (1<<17)) ? disks / fc : disks;
+ switch (layout >> 17) {
+ case 0: /* original layout. simple but not always optimal */
+ geo->far_set_size = disks;
+ break;
+ case 1: /* "improved" layout which was buggy. Hopefully no-one is
+ * actually using this, but leave code here just in case.*/
+ geo->far_set_size = disks/fc;
+ WARN(geo->far_set_size < fc,
+ "This RAID10 layout does not provide data safety - please backup and create new array\n");
+ break;
+ case 2: /* "improved" layout fixed to match documentation */
+ geo->far_set_size = fc * nc;
+ break;
+ default: /* Not a valid layout */
+ return -1;
+ }
geo->chunk_mask = chunk - 1;
geo->chunk_shift = ffz(~chunk);
return nc*fc;
@@ -3486,8 +3511,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
printk(KERN_ERR "md/raid10:%s: couldn't allocate memory.\n",
mdname(mddev));
if (conf) {
- if (conf->r10bio_pool)
- mempool_destroy(conf->r10bio_pool);
+ mempool_destroy(conf->r10bio_pool);
kfree(conf->mirrors);
safe_put_page(conf->tmppage);
kfree(conf);
@@ -3682,8 +3706,7 @@ static int run(struct mddev *mddev)
out_free_conf:
md_unregister_thread(&mddev->thread);
- if (conf->r10bio_pool)
- mempool_destroy(conf->r10bio_pool);
+ mempool_destroy(conf->r10bio_pool);
safe_put_page(conf->tmppage);
kfree(conf->mirrors);
kfree(conf);
@@ -3696,8 +3719,7 @@ static void raid10_free(struct mddev *mddev, void *priv)
{
struct r10conf *conf = priv;
- if (conf->r10bio_pool)
- mempool_destroy(conf->r10bio_pool);
+ mempool_destroy(conf->r10bio_pool);
safe_put_page(conf->tmppage);
kfree(conf->mirrors);
kfree(conf->mirrors_old);
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 15ef2c641b2b..45933c160697 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -2271,8 +2271,7 @@ static void shrink_stripes(struct r5conf *conf)
drop_one_stripe(conf))
;
- if (conf->slab_cache)
- kmem_cache_destroy(conf->slab_cache);
+ kmem_cache_destroy(conf->slab_cache);
conf->slab_cache = NULL;
}
@@ -3150,6 +3149,8 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
spin_unlock_irq(&sh->stripe_lock);
if (test_and_clear_bit(R5_Overlap, &sh->dev[i].flags))
wake_up(&conf->wait_for_overlap);
+ if (bi)
+ s->to_read--;
while (bi && bi->bi_iter.bi_sector <
sh->dev[i].sector + STRIPE_SECTORS) {
struct bio *nextbi =
@@ -3169,6 +3170,8 @@ handle_failed_stripe(struct r5conf *conf, struct stripe_head *sh,
*/
clear_bit(R5_LOCKED, &sh->dev[i].flags);
}
+ s->to_write = 0;
+ s->written = 0;
if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state))
if (atomic_dec_and_test(&conf->pending_full_writes))
@@ -3300,7 +3303,7 @@ static int need_this_block(struct stripe_head *sh, struct stripe_head_state *s,
*/
return 0;
- for (i = 0; i < s->failed; i++) {
+ for (i = 0; i < s->failed && i < 2; i++) {
if (fdev[i]->towrite &&
!test_bit(R5_UPTODATE, &fdev[i]->flags) &&
!test_bit(R5_OVERWRITE, &fdev[i]->flags))
@@ -3324,7 +3327,7 @@ static int need_this_block(struct stripe_head *sh, struct stripe_head_state *s,
sh->sector < sh->raid_conf->mddev->recovery_cp)
/* reconstruct-write isn't being forced */
return 0;
- for (i = 0; i < s->failed; i++) {
+ for (i = 0; i < s->failed && i < 2; i++) {
if (s->failed_num[i] != sh->pd_idx &&
s->failed_num[i] != sh->qd_idx &&
!test_bit(R5_UPTODATE, &fdev[i]->flags) &&
@@ -3496,6 +3499,7 @@ returnbi:
}
if (!discard_pending &&
test_bit(R5_Discard, &sh->dev[sh->pd_idx].flags)) {
+ int hash;
clear_bit(R5_Discard, &sh->dev[sh->pd_idx].flags);
clear_bit(R5_UPTODATE, &sh->dev[sh->pd_idx].flags);
if (sh->qd_idx >= 0) {
@@ -3509,16 +3513,17 @@ returnbi:
* no updated data, so remove it from hash list and the stripe
* will be reinitialized
*/
- spin_lock_irq(&conf->device_lock);
unhash:
+ hash = sh->hash_lock_index;
+ spin_lock_irq(conf->hash_locks + hash);
remove_hash(sh);
+ spin_unlock_irq(conf->hash_locks + hash);
if (head_sh->batch_head) {
sh = list_first_entry(&sh->batch_list,
struct stripe_head, batch_list);
if (sh != head_sh)
goto unhash;
}
- spin_unlock_irq(&conf->device_lock);
sh = head_sh;
if (test_bit(STRIPE_SYNC_REQUESTED, &sh->state))