summaryrefslogtreecommitdiff
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/dm-raid1.c3
-rw-r--r--drivers/md/dm-region-hash.c5
-rw-r--r--drivers/md/dm-thin.c13
-rw-r--r--drivers/md/md.c45
-rw-r--r--drivers/md/multipath.c3
-rw-r--r--drivers/md/persistent-data/dm-space-map-checker.c54
-rw-r--r--drivers/md/persistent-data/dm-space-map-disk.c11
-rw-r--r--drivers/md/persistent-data/dm-transaction-manager.c11
-rw-r--r--drivers/md/raid1.c26
-rw-r--r--drivers/md/raid10.c26
-rw-r--r--drivers/md/raid5.c67
11 files changed, 177 insertions, 87 deletions
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index d039de8322f0..b58b7a33914a 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -1084,6 +1084,7 @@ static int mirror_ctr(struct dm_target *ti, unsigned int argc, char **argv)
ti->split_io = dm_rh_get_region_size(ms->rh);
ti->num_flush_requests = 1;
ti->num_discard_requests = 1;
+ ti->discard_zeroes_data_unsupported = 1;
ms->kmirrord_wq = alloc_workqueue("kmirrord",
WQ_NON_REENTRANT | WQ_MEM_RECLAIM, 0);
@@ -1214,7 +1215,7 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio,
* We need to dec pending if this was a write.
*/
if (rw == WRITE) {
- if (!(bio->bi_rw & REQ_FLUSH))
+ if (!(bio->bi_rw & (REQ_FLUSH | REQ_DISCARD)))
dm_rh_dec(ms->rh, map_context->ll);
return error;
}
diff --git a/drivers/md/dm-region-hash.c b/drivers/md/dm-region-hash.c
index 7771ed212182..69732e03eb34 100644
--- a/drivers/md/dm-region-hash.c
+++ b/drivers/md/dm-region-hash.c
@@ -404,6 +404,9 @@ void dm_rh_mark_nosync(struct dm_region_hash *rh, struct bio *bio)
return;
}
+ if (bio->bi_rw & REQ_DISCARD)
+ return;
+
/* We must inform the log that the sync count has changed. */
log->type->set_region_sync(log, region, 0);
@@ -524,7 +527,7 @@ void dm_rh_inc_pending(struct dm_region_hash *rh, struct bio_list *bios)
struct bio *bio;
for (bio = bios->head; bio; bio = bio->bi_next) {
- if (bio->bi_rw & REQ_FLUSH)
+ if (bio->bi_rw & (REQ_FLUSH | REQ_DISCARD))
continue;
rh_inc(rh, dm_rh_bio_to_region(rh, bio));
}
diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c
index 37fdaf81bd1f..68694da0d21d 100644
--- a/drivers/md/dm-thin.c
+++ b/drivers/md/dm-thin.c
@@ -1245,7 +1245,10 @@ static void process_discard(struct thin_c *tc, struct bio *bio)
cell_release_singleton(cell, bio);
cell_release_singleton(cell2, bio);
- remap_and_issue(tc, bio, lookup_result.block);
+ if ((!lookup_result.shared) && pool->pf.discard_passdown)
+ remap_and_issue(tc, bio, lookup_result.block);
+ else
+ bio_endio(bio, 0);
}
break;
@@ -2292,6 +2295,13 @@ static int process_reserve_metadata_snap_mesg(unsigned argc, char **argv, struct
if (r)
return r;
+ r = dm_pool_commit_metadata(pool->pmd);
+ if (r) {
+ DMERR("%s: dm_pool_commit_metadata() failed, error = %d",
+ __func__, r);
+ return r;
+ }
+
r = dm_pool_reserve_metadata_snap(pool->pmd);
if (r)
DMWARN("reserve_metadata_snap message failed.");
@@ -2621,6 +2631,7 @@ static int thin_ctr(struct dm_target *ti, unsigned argc, char **argv)
if (tc->pool->pf.discard_enabled) {
ti->discards_supported = 1;
ti->num_discard_requests = 1;
+ ti->discard_zeroes_data_unsupported = 1;
}
dm_put(pool_md);
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 1c2f9048e1ae..d5ab4493c8be 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -2931,6 +2931,7 @@ offset_store(struct md_rdev *rdev, const char *buf, size_t len)
* can be sane */
return -EBUSY;
rdev->data_offset = offset;
+ rdev->new_data_offset = offset;
return len;
}
@@ -3926,8 +3927,8 @@ array_state_show(struct mddev *mddev, char *page)
return sprintf(page, "%s\n", array_states[st]);
}
-static int do_md_stop(struct mddev * mddev, int ro, int is_open);
-static int md_set_readonly(struct mddev * mddev, int is_open);
+static int do_md_stop(struct mddev * mddev, int ro, struct block_device *bdev);
+static int md_set_readonly(struct mddev * mddev, struct block_device *bdev);
static int do_md_run(struct mddev * mddev);
static int restart_array(struct mddev *mddev);
@@ -3943,14 +3944,14 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
/* stopping an active array */
if (atomic_read(&mddev->openers) > 0)
return -EBUSY;
- err = do_md_stop(mddev, 0, 0);
+ err = do_md_stop(mddev, 0, NULL);
break;
case inactive:
/* stopping an active array */
if (mddev->pers) {
if (atomic_read(&mddev->openers) > 0)
return -EBUSY;
- err = do_md_stop(mddev, 2, 0);
+ err = do_md_stop(mddev, 2, NULL);
} else
err = 0; /* already inactive */
break;
@@ -3958,7 +3959,7 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
break; /* not supported yet */
case readonly:
if (mddev->pers)
- err = md_set_readonly(mddev, 0);
+ err = md_set_readonly(mddev, NULL);
else {
mddev->ro = 1;
set_disk_ro(mddev->gendisk, 1);
@@ -3968,7 +3969,7 @@ array_state_store(struct mddev *mddev, const char *buf, size_t len)
case read_auto:
if (mddev->pers) {
if (mddev->ro == 0)
- err = md_set_readonly(mddev, 0);
+ err = md_set_readonly(mddev, NULL);
else if (mddev->ro == 1)
err = restart_array(mddev);
if (err == 0) {
@@ -5351,15 +5352,17 @@ void md_stop(struct mddev *mddev)
}
EXPORT_SYMBOL_GPL(md_stop);
-static int md_set_readonly(struct mddev *mddev, int is_open)
+static int md_set_readonly(struct mddev *mddev, struct block_device *bdev)
{
int err = 0;
mutex_lock(&mddev->open_mutex);
- if (atomic_read(&mddev->openers) > is_open) {
+ if (atomic_read(&mddev->openers) > !!bdev) {
printk("md: %s still in use.\n",mdname(mddev));
err = -EBUSY;
goto out;
}
+ if (bdev)
+ sync_blockdev(bdev);
if (mddev->pers) {
__md_stop_writes(mddev);
@@ -5381,18 +5384,26 @@ out:
* 0 - completely stop and dis-assemble array
* 2 - stop but do not disassemble array
*/
-static int do_md_stop(struct mddev * mddev, int mode, int is_open)
+static int do_md_stop(struct mddev * mddev, int mode,
+ struct block_device *bdev)
{
struct gendisk *disk = mddev->gendisk;
struct md_rdev *rdev;
mutex_lock(&mddev->open_mutex);
- if (atomic_read(&mddev->openers) > is_open ||
+ if (atomic_read(&mddev->openers) > !!bdev ||
mddev->sysfs_active) {
printk("md: %s still in use.\n",mdname(mddev));
mutex_unlock(&mddev->open_mutex);
return -EBUSY;
}
+ if (bdev)
+ /* It is possible IO was issued on some other
+ * open file which was closed before we took ->open_mutex.
+ * As that was not the last close __blkdev_put will not
+ * have called sync_blockdev, so we must.
+ */
+ sync_blockdev(bdev);
if (mddev->pers) {
if (mddev->ro)
@@ -5466,7 +5477,7 @@ static void autorun_array(struct mddev *mddev)
err = do_md_run(mddev);
if (err) {
printk(KERN_WARNING "md: do_md_run() returned %d\n", err);
- do_md_stop(mddev, 0, 0);
+ do_md_stop(mddev, 0, NULL);
}
}
@@ -5784,8 +5795,7 @@ static int add_new_disk(struct mddev * mddev, mdu_disk_info_t *info)
super_types[mddev->major_version].
validate_super(mddev, rdev);
if ((info->state & (1<<MD_DISK_SYNC)) &&
- (!test_bit(In_sync, &rdev->flags) ||
- rdev->raid_disk != info->raid_disk)) {
+ rdev->raid_disk != info->raid_disk) {
/* This was a hot-add request, but events doesn't
* match, so reject it.
*/
@@ -6482,11 +6492,11 @@ static int md_ioctl(struct block_device *bdev, fmode_t mode,
goto done_unlock;
case STOP_ARRAY:
- err = do_md_stop(mddev, 0, 1);
+ err = do_md_stop(mddev, 0, bdev);
goto done_unlock;
case STOP_ARRAY_RO:
- err = md_set_readonly(mddev, 1);
+ err = md_set_readonly(mddev, bdev);
goto done_unlock;
case BLKROSET:
@@ -6751,7 +6761,7 @@ struct md_thread *md_register_thread(void (*run) (struct mddev *), struct mddev
thread->tsk = kthread_run(md_thread, thread,
"%s_%s",
mdname(thread->mddev),
- name ?: mddev->pers->name);
+ name);
if (IS_ERR(thread->tsk)) {
kfree(thread);
return NULL;
@@ -7298,6 +7308,7 @@ void md_do_sync(struct mddev *mddev)
int skipped = 0;
struct md_rdev *rdev;
char *desc;
+ struct blk_plug plug;
/* just incase thread restarts... */
if (test_bit(MD_RECOVERY_DONE, &mddev->recovery))
@@ -7447,6 +7458,7 @@ void md_do_sync(struct mddev *mddev)
}
mddev->curr_resync_completed = j;
+ blk_start_plug(&plug);
while (j < max_sectors) {
sector_t sectors;
@@ -7552,6 +7564,7 @@ void md_do_sync(struct mddev *mddev)
* this also signals 'finished resyncing' to md_stop
*/
out:
+ blk_finish_plug(&plug);
wait_event(mddev->recovery_wait, !atomic_read(&mddev->recovery_active));
/* tell personality that we are finished */
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 9339e67fcc79..61a1833ebaf3 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -474,7 +474,8 @@ static int multipath_run (struct mddev *mddev)
}
{
- mddev->thread = md_register_thread(multipathd, mddev, NULL);
+ mddev->thread = md_register_thread(multipathd, mddev,
+ "multipath");
if (!mddev->thread) {
printk(KERN_ERR "multipath: couldn't allocate thread"
" for %s\n", mdname(mddev));
diff --git a/drivers/md/persistent-data/dm-space-map-checker.c b/drivers/md/persistent-data/dm-space-map-checker.c
index 50ed53bf4aa2..fc90c11620ad 100644
--- a/drivers/md/persistent-data/dm-space-map-checker.c
+++ b/drivers/md/persistent-data/dm-space-map-checker.c
@@ -8,6 +8,7 @@
#include <linux/device-mapper.h>
#include <linux/export.h>
+#include <linux/vmalloc.h>
#ifdef CONFIG_DM_DEBUG_SPACE_MAPS
@@ -89,13 +90,23 @@ static int ca_create(struct count_array *ca, struct dm_space_map *sm)
ca->nr = nr_blocks;
ca->nr_free = nr_blocks;
- ca->counts = kzalloc(sizeof(*ca->counts) * nr_blocks, GFP_KERNEL);
- if (!ca->counts)
- return -ENOMEM;
+
+ if (!nr_blocks)
+ ca->counts = NULL;
+ else {
+ ca->counts = vzalloc(sizeof(*ca->counts) * nr_blocks);
+ if (!ca->counts)
+ return -ENOMEM;
+ }
return 0;
}
+static void ca_destroy(struct count_array *ca)
+{
+ vfree(ca->counts);
+}
+
static int ca_load(struct count_array *ca, struct dm_space_map *sm)
{
int r;
@@ -126,12 +137,14 @@ static int ca_load(struct count_array *ca, struct dm_space_map *sm)
static int ca_extend(struct count_array *ca, dm_block_t extra_blocks)
{
dm_block_t nr_blocks = ca->nr + extra_blocks;
- uint32_t *counts = kzalloc(sizeof(*counts) * nr_blocks, GFP_KERNEL);
+ uint32_t *counts = vzalloc(sizeof(*counts) * nr_blocks);
if (!counts)
return -ENOMEM;
- memcpy(counts, ca->counts, sizeof(*counts) * ca->nr);
- kfree(ca->counts);
+ if (ca->counts) {
+ memcpy(counts, ca->counts, sizeof(*counts) * ca->nr);
+ ca_destroy(ca);
+ }
ca->nr = nr_blocks;
ca->nr_free += extra_blocks;
ca->counts = counts;
@@ -151,11 +164,6 @@ static int ca_commit(struct count_array *old, struct count_array *new)
return 0;
}
-static void ca_destroy(struct count_array *ca)
-{
- kfree(ca->counts);
-}
-
/*----------------------------------------------------------------*/
struct sm_checker {
@@ -343,25 +351,25 @@ struct dm_space_map *dm_sm_checker_create(struct dm_space_map *sm)
int r;
struct sm_checker *smc;
- if (!sm)
- return NULL;
+ if (IS_ERR_OR_NULL(sm))
+ return ERR_PTR(-EINVAL);
smc = kmalloc(sizeof(*smc), GFP_KERNEL);
if (!smc)
- return NULL;
+ return ERR_PTR(-ENOMEM);
memcpy(&smc->sm, &ops_, sizeof(smc->sm));
r = ca_create(&smc->old_counts, sm);
if (r) {
kfree(smc);
- return NULL;
+ return ERR_PTR(r);
}
r = ca_create(&smc->counts, sm);
if (r) {
ca_destroy(&smc->old_counts);
kfree(smc);
- return NULL;
+ return ERR_PTR(r);
}
smc->real_sm = sm;
@@ -371,7 +379,7 @@ struct dm_space_map *dm_sm_checker_create(struct dm_space_map *sm)
ca_destroy(&smc->counts);
ca_destroy(&smc->old_counts);
kfree(smc);
- return NULL;
+ return ERR_PTR(r);
}
r = ca_commit(&smc->old_counts, &smc->counts);
@@ -379,7 +387,7 @@ struct dm_space_map *dm_sm_checker_create(struct dm_space_map *sm)
ca_destroy(&smc->counts);
ca_destroy(&smc->old_counts);
kfree(smc);
- return NULL;
+ return ERR_PTR(r);
}
return &smc->sm;
@@ -391,25 +399,25 @@ struct dm_space_map *dm_sm_checker_create_fresh(struct dm_space_map *sm)
int r;
struct sm_checker *smc;
- if (!sm)
- return NULL;
+ if (IS_ERR_OR_NULL(sm))
+ return ERR_PTR(-EINVAL);
smc = kmalloc(sizeof(*smc), GFP_KERNEL);
if (!smc)
- return NULL;
+ return ERR_PTR(-ENOMEM);
memcpy(&smc->sm, &ops_, sizeof(smc->sm));
r = ca_create(&smc->old_counts, sm);
if (r) {
kfree(smc);
- return NULL;
+ return ERR_PTR(r);
}
r = ca_create(&smc->counts, sm);
if (r) {
ca_destroy(&smc->old_counts);
kfree(smc);
- return NULL;
+ return ERR_PTR(r);
}
smc->real_sm = sm;
diff --git a/drivers/md/persistent-data/dm-space-map-disk.c b/drivers/md/persistent-data/dm-space-map-disk.c
index fc469ba9f627..3d0ed5332883 100644
--- a/drivers/md/persistent-data/dm-space-map-disk.c
+++ b/drivers/md/persistent-data/dm-space-map-disk.c
@@ -290,7 +290,16 @@ struct dm_space_map *dm_sm_disk_create(struct dm_transaction_manager *tm,
dm_block_t nr_blocks)
{
struct dm_space_map *sm = dm_sm_disk_create_real(tm, nr_blocks);
- return dm_sm_checker_create_fresh(sm);
+ struct dm_space_map *smc;
+
+ if (IS_ERR_OR_NULL(sm))
+ return sm;
+
+ smc = dm_sm_checker_create_fresh(sm);
+ if (IS_ERR(smc))
+ dm_sm_destroy(sm);
+
+ return smc;
}
EXPORT_SYMBOL_GPL(dm_sm_disk_create);
diff --git a/drivers/md/persistent-data/dm-transaction-manager.c b/drivers/md/persistent-data/dm-transaction-manager.c
index 400fe144c0cd..e5604b32d91f 100644
--- a/drivers/md/persistent-data/dm-transaction-manager.c
+++ b/drivers/md/persistent-data/dm-transaction-manager.c
@@ -138,6 +138,9 @@ EXPORT_SYMBOL_GPL(dm_tm_create_non_blocking_clone);
void dm_tm_destroy(struct dm_transaction_manager *tm)
{
+ if (!tm->is_clone)
+ wipe_shadow_table(tm);
+
kfree(tm);
}
EXPORT_SYMBOL_GPL(dm_tm_destroy);
@@ -344,8 +347,10 @@ static int dm_tm_create_internal(struct dm_block_manager *bm,
}
*sm = dm_sm_checker_create(inner);
- if (!*sm)
+ if (IS_ERR(*sm)) {
+ r = PTR_ERR(*sm);
goto bad2;
+ }
} else {
r = dm_bm_write_lock(dm_tm_get_bm(*tm), sb_location,
@@ -364,8 +369,10 @@ static int dm_tm_create_internal(struct dm_block_manager *bm,
}
*sm = dm_sm_checker_create(inner);
- if (!*sm)
+ if (IS_ERR(*sm)) {
+ r = PTR_ERR(*sm);
goto bad2;
+ }
}
return 0;
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index a9c7981ddd24..cacd008d6864 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -517,8 +517,8 @@ static int read_balance(struct r1conf *conf, struct r1bio *r1_bio, int *max_sect
int bad_sectors;
int disk = start_disk + i;
- if (disk >= conf->raid_disks)
- disk -= conf->raid_disks;
+ if (disk >= conf->raid_disks * 2)
+ disk -= conf->raid_disks * 2;
rdev = rcu_dereference(conf->mirrors[disk].rdev);
if (r1_bio->bios[disk] == IO_BLOCKED
@@ -883,7 +883,6 @@ static void make_request(struct mddev *mddev, struct bio * bio)
const unsigned long do_sync = (bio->bi_rw & REQ_SYNC);
const unsigned long do_flush_fua = (bio->bi_rw & (REQ_FLUSH | REQ_FUA));
struct md_rdev *blocked_rdev;
- int plugged;
int first_clone;
int sectors_handled;
int max_sectors;
@@ -1034,7 +1033,6 @@ read_again:
* the bad blocks. Each set of writes gets it's own r1bio
* with a set of bios attached.
*/
- plugged = mddev_check_plugged(mddev);
disks = conf->raid_disks * 2;
retry_write:
@@ -1191,6 +1189,8 @@ read_again:
bio_list_add(&conf->pending_bio_list, mbio);
conf->pending_count++;
spin_unlock_irqrestore(&conf->device_lock, flags);
+ if (!mddev_check_plugged(mddev))
+ md_wakeup_thread(mddev->thread);
}
/* Mustn't call r1_bio_write_done before this next test,
* as it could result in the bio being freed.
@@ -1213,9 +1213,6 @@ read_again:
/* In case raid1d snuck in to freeze_array */
wake_up(&conf->wait_barrier);
-
- if (do_sync || !bitmap || !plugged)
- md_wakeup_thread(mddev->thread);
}
static void status(struct seq_file *seq, struct mddev *mddev)
@@ -1821,8 +1818,14 @@ static void sync_request_write(struct mddev *mddev, struct r1bio *r1_bio)
if (atomic_dec_and_test(&r1_bio->remaining)) {
/* if we're here, all write(s) have completed, so clean up */
- md_done_sync(mddev, r1_bio->sectors, 1);
- put_buf(r1_bio);
+ int s = r1_bio->sectors;
+ if (test_bit(R1BIO_MadeGood, &r1_bio->state) ||
+ test_bit(R1BIO_WriteError, &r1_bio->state))
+ reschedule_retry(r1_bio);
+ else {
+ put_buf(r1_bio);
+ md_done_sync(mddev, s, 1);
+ }
}
}
@@ -2488,9 +2491,10 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr, int *skipp
*/
if (test_bit(MD_RECOVERY_REQUESTED, &mddev->recovery)) {
atomic_set(&r1_bio->remaining, read_targets);
- for (i = 0; i < conf->raid_disks * 2; i++) {
+ for (i = 0; i < conf->raid_disks * 2 && read_targets; i++) {
bio = r1_bio->bios[i];
if (bio->bi_end_io == end_sync_read) {
+ read_targets--;
md_sync_acct(bio->bi_bdev, nr_sectors);
generic_make_request(bio);
}
@@ -2621,7 +2625,7 @@ static struct r1conf *setup_conf(struct mddev *mddev)
goto abort;
}
err = -ENOMEM;
- conf->thread = md_register_thread(raid1d, mddev, NULL);
+ conf->thread = md_register_thread(raid1d, mddev, "raid1");
if (!conf->thread) {
printk(KERN_ERR
"md/raid1:%s: couldn't allocate thread\n",
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index 99ae6068e456..8da6282254c3 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1039,7 +1039,6 @@ static void make_request(struct mddev *mddev, struct bio * bio)
const unsigned long do_fua = (bio->bi_rw & REQ_FUA);
unsigned long flags;
struct md_rdev *blocked_rdev;
- int plugged;
int sectors_handled;
int max_sectors;
int sectors;
@@ -1239,7 +1238,6 @@ read_again:
* of r10_bios is recored in bio->bi_phys_segments just as with
* the read case.
*/
- plugged = mddev_check_plugged(mddev);
r10_bio->read_slot = -1; /* make sure repl_bio gets freed */
raid10_find_phys(conf, r10_bio);
@@ -1396,6 +1394,8 @@ retry_write:
bio_list_add(&conf->pending_bio_list, mbio);
conf->pending_count++;
spin_unlock_irqrestore(&conf->device_lock, flags);
+ if (!mddev_check_plugged(mddev))
+ md_wakeup_thread(mddev->thread);
if (!r10_bio->devs[i].repl_bio)
continue;
@@ -1423,6 +1423,8 @@ retry_write:
bio_list_add(&conf->pending_bio_list, mbio);
conf->pending_count++;
spin_unlock_irqrestore(&conf->device_lock, flags);
+ if (!mddev_check_plugged(mddev))
+ md_wakeup_thread(mddev->thread);
}
/* Don't remove the bias on 'remaining' (one_write_done) until
@@ -1448,9 +1450,6 @@ retry_write:
/* In case raid10d snuck in to freeze_array */
wake_up(&conf->wait_barrier);
-
- if (do_sync || !mddev->bitmap || !plugged)
- md_wakeup_thread(mddev->thread);
}
static void status(struct seq_file *seq, struct mddev *mddev)
@@ -2310,7 +2309,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
if (r10_sync_page_io(rdev,
r10_bio->devs[sl].addr +
sect,
- s<<9, conf->tmppage, WRITE)
+ s, conf->tmppage, WRITE)
== 0) {
/* Well, this device is dead */
printk(KERN_NOTICE
@@ -2349,7 +2348,7 @@ static void fix_read_error(struct r10conf *conf, struct mddev *mddev, struct r10
switch (r10_sync_page_io(rdev,
r10_bio->devs[sl].addr +
sect,
- s<<9, conf->tmppage,
+ s, conf->tmppage,
READ)) {
case 0:
/* Well, this device is dead */
@@ -2512,7 +2511,7 @@ read_more:
slot = r10_bio->read_slot;
printk_ratelimited(
KERN_ERR
- "md/raid10:%s: %s: redirecting"
+ "md/raid10:%s: %s: redirecting "
"sector %llu to another mirror\n",
mdname(mddev),
bdevname(rdev->bdev, b),
@@ -2661,7 +2660,8 @@ static void raid10d(struct mddev *mddev)
blk_start_plug(&plug);
for (;;) {
- flush_pending_writes(conf);
+ if (atomic_read(&mddev->plug_cnt) == 0)
+ flush_pending_writes(conf);
spin_lock_irqsave(&conf->device_lock, flags);
if (list_empty(head)) {
@@ -2890,6 +2890,12 @@ static sector_t sync_request(struct mddev *mddev, sector_t sector_nr,
/* want to reconstruct this device */
rb2 = r10_bio;
sect = raid10_find_virt(conf, sector_nr, i);
+ if (sect >= mddev->resync_max_sectors) {
+ /* last stripe is not complete - don't
+ * try to recover this sector.
+ */
+ continue;
+ }
/* Unless we are doing a full sync, or a replacement
* we only need to recover the block if it is set in
* the bitmap
@@ -3421,7 +3427,7 @@ static struct r10conf *setup_conf(struct mddev *mddev)
spin_lock_init(&conf->resync_lock);
init_waitqueue_head(&conf->wait_barrier);
- conf->thread = md_register_thread(raid10d, mddev, NULL);
+ conf->thread = md_register_thread(raid10d, mddev, "raid10");
if (!conf->thread)
goto out;
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index d26767246d26..04348d76bb30 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -196,12 +196,14 @@ static void __release_stripe(struct r5conf *conf, struct stripe_head *sh)
BUG_ON(!list_empty(&sh->lru));
BUG_ON(atomic_read(&conf->active_stripes)==0);
if (test_bit(STRIPE_HANDLE, &sh->state)) {
- if (test_bit(STRIPE_DELAYED, &sh->state))
+ if (test_bit(STRIPE_DELAYED, &sh->state) &&
+ !test_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
list_add_tail(&sh->lru, &conf->delayed_list);
else if (test_bit(STRIPE_BIT_DELAY, &sh->state) &&
sh->bm_seq - conf->seq_write > 0)
list_add_tail(&sh->lru, &conf->bitmap_list);
else {
+ clear_bit(STRIPE_DELAYED, &sh->state);
clear_bit(STRIPE_BIT_DELAY, &sh->state);
list_add_tail(&sh->lru, &conf->handle_list);
}
@@ -606,6 +608,12 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
* a chance*/
md_check_recovery(conf->mddev);
}
+ /*
+ * Because md_wait_for_blocked_rdev
+ * will dec nr_pending, we must
+ * increment it first.
+ */
+ atomic_inc(&rdev->nr_pending);
md_wait_for_blocked_rdev(rdev, conf->mddev);
} else {
/* Acknowledged bad block - skip the write */
@@ -1737,6 +1745,7 @@ static void raid5_end_read_request(struct bio * bi, int error)
} else {
const char *bdn = bdevname(rdev->bdev, b);
int retry = 0;
+ int set_bad = 0;
clear_bit(R5_UPTODATE, &sh->dev[i].flags);
atomic_inc(&rdev->read_errors);
@@ -1748,7 +1757,8 @@ static void raid5_end_read_request(struct bio * bi, int error)
mdname(conf->mddev),
(unsigned long long)s,
bdn);
- else if (conf->mddev->degraded >= conf->max_degraded)
+ else if (conf->mddev->degraded >= conf->max_degraded) {
+ set_bad = 1;
printk_ratelimited(
KERN_WARNING
"md/raid:%s: read error not correctable "
@@ -1756,8 +1766,9 @@ static void raid5_end_read_request(struct bio * bi, int error)
mdname(conf->mddev),
(unsigned long long)s,
bdn);
- else if (test_bit(R5_ReWrite, &sh->dev[i].flags))
+ } else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) {
/* Oh, no!!! */
+ set_bad = 1;
printk_ratelimited(
KERN_WARNING
"md/raid:%s: read error NOT corrected!! "
@@ -1765,7 +1776,7 @@ static void raid5_end_read_request(struct bio * bi, int error)
mdname(conf->mddev),
(unsigned long long)s,
bdn);
- else if (atomic_read(&rdev->read_errors)
+ } else if (atomic_read(&rdev->read_errors)
> conf->max_nr_stripes)
printk(KERN_WARNING
"md/raid:%s: Too many read errors, failing device %s.\n",
@@ -1777,7 +1788,11 @@ static void raid5_end_read_request(struct bio * bi, int error)
else {
clear_bit(R5_ReadError, &sh->dev[i].flags);
clear_bit(R5_ReWrite, &sh->dev[i].flags);
- md_error(conf->mddev, rdev);
+ if (!(set_bad
+ && test_bit(In_sync, &rdev->flags)
+ && rdev_set_badblocks(
+ rdev, sh->sector, STRIPE_SECTORS, 0)))
+ md_error(conf->mddev, rdev);
}
}
rdev_dec_pending(rdev, conf->mddev);
@@ -3582,8 +3597,18 @@ static void handle_stripe(struct stripe_head *sh)
finish:
/* wait for this device to become unblocked */
- if (conf->mddev->external && unlikely(s.blocked_rdev))
- md_wait_for_blocked_rdev(s.blocked_rdev, conf->mddev);
+ if (unlikely(s.blocked_rdev)) {
+ if (conf->mddev->external)
+ md_wait_for_blocked_rdev(s.blocked_rdev,
+ conf->mddev);
+ else
+ /* Internal metadata will immediately
+ * be written by raid5d, so we don't
+ * need to wait here.
+ */
+ rdev_dec_pending(s.blocked_rdev,
+ conf->mddev);
+ }
if (s.handle_bad_blocks)
for (i = disks; i--; ) {
@@ -3881,8 +3906,6 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
raid_bio->bi_next = (void*)rdev;
align_bi->bi_bdev = rdev->bdev;
align_bi->bi_flags &= ~(1 << BIO_SEG_VALID);
- /* No reshape active, so we can trust rdev->data_offset */
- align_bi->bi_sector += rdev->data_offset;
if (!bio_fits_rdev(align_bi) ||
is_badblock(rdev, align_bi->bi_sector, align_bi->bi_size>>9,
@@ -3893,6 +3916,9 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
return 0;
}
+ /* No reshape active, so we can trust rdev->data_offset */
+ align_bi->bi_sector += rdev->data_offset;
+
spin_lock_irq(&conf->device_lock);
wait_event_lock_irq(conf->wait_for_stripe,
conf->quiesce == 0,
@@ -3971,7 +3997,6 @@ static void make_request(struct mddev *mddev, struct bio * bi)
struct stripe_head *sh;
const int rw = bio_data_dir(bi);
int remaining;
- int plugged;
if (unlikely(bi->bi_rw & REQ_FLUSH)) {
md_flush_request(mddev, bi);
@@ -3990,7 +4015,6 @@ static void make_request(struct mddev *mddev, struct bio * bi)
bi->bi_next = NULL;
bi->bi_phys_segments = 1; /* over-loaded to count active stripes */
- plugged = mddev_check_plugged(mddev);
for (;logical_sector < last_sector; logical_sector += STRIPE_SECTORS) {
DEFINE_WAIT(w);
int previous;
@@ -4092,6 +4116,7 @@ static void make_request(struct mddev *mddev, struct bio * bi)
if ((bi->bi_rw & REQ_SYNC) &&
!test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state))
atomic_inc(&conf->preread_active_stripes);
+ mddev_check_plugged(mddev);
release_stripe(sh);
} else {
/* cannot get stripe for read-ahead, just give-up */
@@ -4099,10 +4124,7 @@ static void make_request(struct mddev *mddev, struct bio * bi)
finish_wait(&conf->wait_for_overlap, &w);
break;
}
-
}
- if (!plugged)
- md_wakeup_thread(mddev->thread);
spin_lock_irq(&conf->device_lock);
remaining = raid5_dec_bi_phys_segments(bi);
@@ -4823,6 +4845,7 @@ static struct r5conf *setup_conf(struct mddev *mddev)
int raid_disk, memory, max_disks;
struct md_rdev *rdev;
struct disk_info *disk;
+ char pers_name[6];
if (mddev->new_level != 5
&& mddev->new_level != 4
@@ -4946,7 +4969,8 @@ static struct r5conf *setup_conf(struct mddev *mddev)
printk(KERN_INFO "md/raid:%s: allocated %dkB\n",
mdname(mddev), memory);
- conf->thread = md_register_thread(raid5d, mddev, NULL);
+ sprintf(pers_name, "raid%d", mddev->new_level);
+ conf->thread = md_register_thread(raid5d, mddev, pers_name);
if (!conf->thread) {
printk(KERN_ERR
"md/raid:%s: couldn't allocate thread.\n",
@@ -5465,10 +5489,9 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
if (rdev->saved_raid_disk >= 0 &&
rdev->saved_raid_disk >= first &&
conf->disks[rdev->saved_raid_disk].rdev == NULL)
- disk = rdev->saved_raid_disk;
- else
- disk = first;
- for ( ; disk <= last ; disk++) {
+ first = rdev->saved_raid_disk;
+
+ for (disk = first; disk <= last; disk++) {
p = conf->disks + disk;
if (p->rdev == NULL) {
clear_bit(In_sync, &rdev->flags);
@@ -5477,8 +5500,11 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
if (rdev->saved_raid_disk != disk)
conf->fullsync = 1;
rcu_assign_pointer(p->rdev, rdev);
- break;
+ goto out;
}
+ }
+ for (disk = first; disk <= last; disk++) {
+ p = conf->disks + disk;
if (test_bit(WantReplacement, &p->rdev->flags) &&
p->replacement == NULL) {
clear_bit(In_sync, &rdev->flags);
@@ -5490,6 +5516,7 @@ static int raid5_add_disk(struct mddev *mddev, struct md_rdev *rdev)
break;
}
}
+out:
print_raid5_conf(conf);
return err;
}