summaryrefslogtreecommitdiff
path: root/drivers/md
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md')
-rw-r--r--drivers/md/bitmap.c17
-rw-r--r--drivers/md/dm-crypt.c1
-rw-r--r--drivers/md/linear.c1
-rw-r--r--drivers/md/md.c87
-rw-r--r--drivers/md/multipath.c4
-rw-r--r--drivers/md/raid0.c1
-rw-r--r--drivers/md/raid1.c33
-rw-r--r--drivers/md/raid10.c20
-rw-r--r--drivers/md/raid5.c87
9 files changed, 181 insertions, 70 deletions
diff --git a/drivers/md/bitmap.c b/drivers/md/bitmap.c
index c14dacdacfac..b26927ce889c 100644
--- a/drivers/md/bitmap.c
+++ b/drivers/md/bitmap.c
@@ -203,17 +203,6 @@ static void bitmap_checkfree(struct bitmap *bitmap, unsigned long page)
* bitmap file handling - read and write the bitmap file and its superblock
*/
-/* copy the pathname of a file to a buffer */
-char *file_path(struct file *file, char *buf, int count)
-{
- if (!buf)
- return NULL;
-
- buf = d_path(&file->f_path, buf, count);
-
- return IS_ERR(buf) ? NULL : buf;
-}
-
/*
* basic page I/O operations
*/
@@ -721,11 +710,13 @@ static void bitmap_file_kick(struct bitmap *bitmap)
if (bitmap->file) {
path = kmalloc(PAGE_SIZE, GFP_KERNEL);
if (path)
- ptr = file_path(bitmap->file, path, PAGE_SIZE);
+ ptr = d_path(&bitmap->file->f_path, path,
+ PAGE_SIZE);
+
printk(KERN_ALERT
"%s: kicking failed bitmap file %s from array!\n",
- bmname(bitmap), ptr ? ptr : "");
+ bmname(bitmap), IS_ERR(ptr) ? "" : ptr);
kfree(path);
} else
diff --git a/drivers/md/dm-crypt.c b/drivers/md/dm-crypt.c
index 835def11419d..ab6a61db63ce 100644
--- a/drivers/md/dm-crypt.c
+++ b/drivers/md/dm-crypt.c
@@ -432,6 +432,7 @@ static int crypt_convert(struct crypt_config *cc,
case 0:
atomic_dec(&ctx->pending);
ctx->sector++;
+ cond_resched();
continue;
/* error */
diff --git a/drivers/md/linear.c b/drivers/md/linear.c
index 0b8511776b3e..10748240cb2f 100644
--- a/drivers/md/linear.c
+++ b/drivers/md/linear.c
@@ -250,6 +250,7 @@ static int linear_run (mddev_t *mddev)
{
linear_conf_t *conf;
+ mddev->queue->queue_lock = &mddev->queue->__queue_lock;
conf = linear_conf(mddev, mddev->raid_disks);
if (!conf)
diff --git a/drivers/md/md.c b/drivers/md/md.c
index 83eb78b00137..2580ac1b9b0f 100644
--- a/drivers/md/md.c
+++ b/drivers/md/md.c
@@ -74,6 +74,8 @@ static DEFINE_SPINLOCK(pers_lock);
static void md_print_devices(void);
+static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
+
#define MD_BUG(x...) { printk("md: bug in file %s, line %d\n", __FILE__, __LINE__); md_print_devices(); }
/*
@@ -274,6 +276,7 @@ static mddev_t * mddev_find(dev_t unit)
atomic_set(&new->active, 1);
spin_lock_init(&new->write_lock);
init_waitqueue_head(&new->sb_wait);
+ init_waitqueue_head(&new->recovery_wait);
new->reshape_position = MaxSector;
new->resync_max = MaxSector;
new->level = LEVEL_NONE;
@@ -3013,6 +3016,36 @@ degraded_show(mddev_t *mddev, char *page)
static struct md_sysfs_entry md_degraded = __ATTR_RO(degraded);
static ssize_t
+sync_force_parallel_show(mddev_t *mddev, char *page)
+{
+ return sprintf(page, "%d\n", mddev->parallel_resync);
+}
+
+static ssize_t
+sync_force_parallel_store(mddev_t *mddev, const char *buf, size_t len)
+{
+ long n;
+
+ if (strict_strtol(buf, 10, &n))
+ return -EINVAL;
+
+ if (n != 0 && n != 1)
+ return -EINVAL;
+
+ mddev->parallel_resync = n;
+
+ if (mddev->sync_thread)
+ wake_up(&resync_wait);
+
+ return len;
+}
+
+/* force parallel resync, even with shared block devices */
+static struct md_sysfs_entry md_sync_force_parallel =
+__ATTR(sync_force_parallel, S_IRUGO|S_IWUSR,
+ sync_force_parallel_show, sync_force_parallel_store);
+
+static ssize_t
sync_speed_show(mddev_t *mddev, char *page)
{
unsigned long resync, dt, db;
@@ -3187,6 +3220,7 @@ static struct attribute *md_redundancy_attrs[] = {
&md_sync_min.attr,
&md_sync_max.attr,
&md_sync_speed.attr,
+ &md_sync_force_parallel.attr,
&md_sync_completed.attr,
&md_max_sync.attr,
&md_suspend_lo.attr,
@@ -3691,6 +3725,8 @@ static int do_md_stop(mddev_t * mddev, int mode)
module_put(mddev->pers->owner);
mddev->pers = NULL;
+ /* tell userspace to handle 'inactive' */
+ sysfs_notify(&mddev->kobj, NULL, "array_state");
set_capacity(disk, 0);
mddev->changed = 1;
@@ -3861,8 +3897,10 @@ static void autorun_devices(int part)
md_probe(dev, NULL, NULL);
mddev = mddev_find(dev);
- if (!mddev) {
- printk(KERN_ERR
+ if (!mddev || !mddev->gendisk) {
+ if (mddev)
+ mddev_put(mddev);
+ printk(KERN_ERR
"md: cannot allocate memory for md drive.\n");
break;
}
@@ -3987,8 +4025,8 @@ static int get_bitmap_file(mddev_t * mddev, void __user * arg)
if (!buf)
goto out;
- ptr = file_path(mddev->bitmap->file, buf, sizeof(file->pathname));
- if (!ptr)
+ ptr = d_path(&mddev->bitmap->file->f_path, buf, sizeof(file->pathname));
+ if (IS_ERR(ptr))
goto out;
strcpy(file->pathname, ptr);
@@ -5399,7 +5437,7 @@ void md_done_sync(mddev_t *mddev, int blocks, int ok)
atomic_sub(blocks, &mddev->recovery_active);
wake_up(&mddev->recovery_wait);
if (!ok) {
- set_bit(MD_RECOVERY_ERR, &mddev->recovery);
+ set_bit(MD_RECOVERY_INTR, &mddev->recovery);
md_wakeup_thread(mddev->thread);
// stop recovery, signal do_sync ....
}
@@ -5435,8 +5473,11 @@ void md_write_start(mddev_t *mddev, struct bio *bi)
md_wakeup_thread(mddev->thread);
}
spin_unlock_irq(&mddev->write_lock);
+ sysfs_notify(&mddev->kobj, NULL, "array_state");
}
- wait_event(mddev->sb_wait, mddev->flags==0);
+ wait_event(mddev->sb_wait,
+ !test_bit(MD_CHANGE_CLEAN, &mddev->flags) &&
+ !test_bit(MD_CHANGE_PENDING, &mddev->flags));
}
void md_write_end(mddev_t *mddev)
@@ -5471,13 +5512,17 @@ void md_allow_write(mddev_t *mddev)
mddev->safemode = 1;
spin_unlock_irq(&mddev->write_lock);
md_update_sb(mddev, 0);
+
+ sysfs_notify(&mddev->kobj, NULL, "array_state");
+ /* wait for the dirty state to be recorded in the metadata */
+ wait_event(mddev->sb_wait,
+ !test_bit(MD_CHANGE_CLEAN, &mddev->flags) &&
+ !test_bit(MD_CHANGE_PENDING, &mddev->flags));
} else
spin_unlock_irq(&mddev->write_lock);
}
EXPORT_SYMBOL_GPL(md_allow_write);
-static DECLARE_WAIT_QUEUE_HEAD(resync_wait);
-
#define SYNC_MARKS 10
#define SYNC_MARK_STEP (3*HZ)
void md_do_sync(mddev_t *mddev)
@@ -5541,8 +5586,9 @@ void md_do_sync(mddev_t *mddev)
for_each_mddev(mddev2, tmp) {
if (mddev2 == mddev)
continue;
- if (mddev2->curr_resync &&
- match_mddev_units(mddev,mddev2)) {
+ if (!mddev->parallel_resync
+ && mddev2->curr_resync
+ && match_mddev_units(mddev, mddev2)) {
DEFINE_WAIT(wq);
if (mddev < mddev2 && mddev->curr_resync == 2) {
/* arbitrarily yield */
@@ -5622,7 +5668,6 @@ void md_do_sync(mddev_t *mddev)
window/2,(unsigned long long) max_sectors/2);
atomic_set(&mddev->recovery_active, 0);
- init_waitqueue_head(&mddev->recovery_wait);
last_check = 0;
if (j>2) {
@@ -5647,7 +5692,7 @@ void md_do_sync(mddev_t *mddev)
sectors = mddev->pers->sync_request(mddev, j, &skipped,
currspeed < speed_min(mddev));
if (sectors == 0) {
- set_bit(MD_RECOVERY_ERR, &mddev->recovery);
+ set_bit(MD_RECOVERY_INTR, &mddev->recovery);
goto out;
}
@@ -5670,8 +5715,7 @@ void md_do_sync(mddev_t *mddev)
last_check = io_sectors;
- if (test_bit(MD_RECOVERY_INTR, &mddev->recovery) ||
- test_bit(MD_RECOVERY_ERR, &mddev->recovery))
+ if (test_bit(MD_RECOVERY_INTR, &mddev->recovery))
break;
repeat:
@@ -5725,8 +5769,7 @@ void md_do_sync(mddev_t *mddev)
/* tell personality that we are finished */
mddev->pers->sync_request(mddev, max_sectors, &skipped, 1);
- if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery) &&
- !test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
+ if (!test_bit(MD_RECOVERY_CHECK, &mddev->recovery) &&
mddev->curr_resync > 2) {
if (test_bit(MD_RECOVERY_SYNC, &mddev->recovery)) {
if (test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
@@ -5795,7 +5838,10 @@ static int remove_and_add_spares(mddev_t *mddev)
}
if (mddev->degraded) {
- rdev_for_each(rdev, rtmp, mddev)
+ rdev_for_each(rdev, rtmp, mddev) {
+ if (rdev->raid_disk >= 0 &&
+ !test_bit(In_sync, &rdev->flags))
+ spares++;
if (rdev->raid_disk < 0
&& !test_bit(Faulty, &rdev->flags)) {
rdev->recovery_offset = 0;
@@ -5813,6 +5859,7 @@ static int remove_and_add_spares(mddev_t *mddev)
} else
break;
}
+ }
}
return spares;
}
@@ -5826,7 +5873,7 @@ static int remove_and_add_spares(mddev_t *mddev)
* to do that as needed.
* When it is determined that resync is needed, we set MD_RECOVERY_RUNNING in
* "->recovery" and create a thread at ->sync_thread.
- * When the thread finishes it sets MD_RECOVERY_DONE (and might set MD_RECOVERY_ERR)
+ * When the thread finishes it sets MD_RECOVERY_DONE
* and wakeups up this thread which will reap the thread and finish up.
* This thread also removes any faulty devices (with nr_pending == 0).
*
@@ -5901,8 +5948,7 @@ void md_check_recovery(mddev_t *mddev)
/* resync has finished, collect result */
md_unregister_thread(mddev->sync_thread);
mddev->sync_thread = NULL;
- if (!test_bit(MD_RECOVERY_ERR, &mddev->recovery) &&
- !test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
+ if (!test_bit(MD_RECOVERY_INTR, &mddev->recovery)) {
/* success...*/
/* activate any spares */
mddev->pers->spare_active(mddev);
@@ -5926,7 +5972,6 @@ void md_check_recovery(mddev_t *mddev)
* might be left set
*/
clear_bit(MD_RECOVERY_NEEDED, &mddev->recovery);
- clear_bit(MD_RECOVERY_ERR, &mddev->recovery);
clear_bit(MD_RECOVERY_INTR, &mddev->recovery);
clear_bit(MD_RECOVERY_DONE, &mddev->recovery);
diff --git a/drivers/md/multipath.c b/drivers/md/multipath.c
index 42ee1a2dc144..e968116e0de9 100644
--- a/drivers/md/multipath.c
+++ b/drivers/md/multipath.c
@@ -327,7 +327,8 @@ static int multipath_remove_disk(mddev_t *mddev, int number)
if (rdev) {
if (test_bit(In_sync, &rdev->flags) ||
atomic_read(&rdev->nr_pending)) {
- printk(KERN_ERR "hot-remove-disk, slot %d is identified" " but is still operational!\n", number);
+ printk(KERN_ERR "hot-remove-disk, slot %d is identified"
+ " but is still operational!\n", number);
err = -EBUSY;
goto abort;
}
@@ -417,6 +418,7 @@ static int multipath_run (mddev_t *mddev)
* bookkeeping area. [whatever we allocate in multipath_run(),
* should be freed in multipath_stop()]
*/
+ mddev->queue->queue_lock = &mddev->queue->__queue_lock;
conf = kzalloc(sizeof(multipath_conf_t), GFP_KERNEL);
mddev->private = conf;
diff --git a/drivers/md/raid0.c b/drivers/md/raid0.c
index 818b48284096..914c04ddec7c 100644
--- a/drivers/md/raid0.c
+++ b/drivers/md/raid0.c
@@ -280,6 +280,7 @@ static int raid0_run (mddev_t *mddev)
(mddev->chunk_size>>1)-1);
blk_queue_max_sectors(mddev->queue, mddev->chunk_size >> 9);
blk_queue_segment_boundary(mddev->queue, (mddev->chunk_size>>1) - 1);
+ mddev->queue->queue_lock = &mddev->queue->__queue_lock;
conf = kmalloc(sizeof (raid0_conf_t), GFP_KERNEL);
if (!conf)
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 6778b7cb39bd..c610b947218a 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -773,7 +773,7 @@ static int make_request(struct request_queue *q, struct bio * bio)
r1bio_t *r1_bio;
struct bio *read_bio;
int i, targets = 0, disks;
- struct bitmap *bitmap = mddev->bitmap;
+ struct bitmap *bitmap;
unsigned long flags;
struct bio_list bl;
struct page **behind_pages = NULL;
@@ -802,6 +802,8 @@ static int make_request(struct request_queue *q, struct bio * bio)
wait_barrier(conf);
+ bitmap = mddev->bitmap;
+
disk_stat_inc(mddev->gendisk, ios[rw]);
disk_stat_add(mddev->gendisk, sectors[rw], bio_sectors(bio));
@@ -1025,7 +1027,7 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
/*
* if recovery is running, make sure it aborts.
*/
- set_bit(MD_RECOVERY_ERR, &mddev->recovery);
+ set_bit(MD_RECOVERY_INTR, &mddev->recovery);
} else
set_bit(Faulty, &rdev->flags);
set_bit(MD_CHANGE_DEVS, &mddev->flags);
@@ -1146,6 +1148,14 @@ static int raid1_remove_disk(mddev_t *mddev, int number)
err = -EBUSY;
goto abort;
}
+ /* Only remove non-faulty devices is recovery
+ * is not possible.
+ */
+ if (!test_bit(Faulty, &rdev->flags) &&
+ mddev->degraded < conf->raid_disks) {
+ err = -EBUSY;
+ goto abort;
+ }
p->rdev = NULL;
synchronize_rcu();
if (atomic_read(&rdev->nr_pending)) {
@@ -1282,6 +1292,7 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
rdev_dec_pending(conf->mirrors[i].rdev, mddev);
} else {
/* fixup the bio for reuse */
+ int size;
sbio->bi_vcnt = vcnt;
sbio->bi_size = r1_bio->sectors << 9;
sbio->bi_idx = 0;
@@ -1295,10 +1306,20 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
sbio->bi_sector = r1_bio->sector +
conf->mirrors[i].rdev->data_offset;
sbio->bi_bdev = conf->mirrors[i].rdev->bdev;
- for (j = 0; j < vcnt ; j++)
- memcpy(page_address(sbio->bi_io_vec[j].bv_page),
+ size = sbio->bi_size;
+ for (j = 0; j < vcnt ; j++) {
+ struct bio_vec *bi;
+ bi = &sbio->bi_io_vec[j];
+ bi->bv_offset = 0;
+ if (size > PAGE_SIZE)
+ bi->bv_len = PAGE_SIZE;
+ else
+ bi->bv_len = size;
+ size -= PAGE_SIZE;
+ memcpy(page_address(bi->bv_page),
page_address(pbio->bi_io_vec[j].bv_page),
PAGE_SIZE);
+ }
}
}
@@ -1935,6 +1956,9 @@ static int run(mddev_t *mddev)
if (!conf->r1bio_pool)
goto out_no_mem;
+ spin_lock_init(&conf->device_lock);
+ mddev->queue->queue_lock = &conf->device_lock;
+
rdev_for_each(rdev, tmp, mddev) {
disk_idx = rdev->raid_disk;
if (disk_idx >= mddev->raid_disks
@@ -1958,7 +1982,6 @@ static int run(mddev_t *mddev)
}
conf->raid_disks = mddev->raid_disks;
conf->mddev = mddev;
- spin_lock_init(&conf->device_lock);
INIT_LIST_HEAD(&conf->retry_list);
spin_lock_init(&conf->resync_lock);
diff --git a/drivers/md/raid10.c b/drivers/md/raid10.c
index faf3d8912979..a71277b640ab 100644
--- a/drivers/md/raid10.c
+++ b/drivers/md/raid10.c
@@ -1020,7 +1020,7 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
/*
* if recovery is running, make sure it aborts.
*/
- set_bit(MD_RECOVERY_ERR, &mddev->recovery);
+ set_bit(MD_RECOVERY_INTR, &mddev->recovery);
}
set_bit(Faulty, &rdev->flags);
set_bit(MD_CHANGE_DEVS, &mddev->flags);
@@ -1171,6 +1171,14 @@ static int raid10_remove_disk(mddev_t *mddev, int number)
err = -EBUSY;
goto abort;
}
+ /* Only remove faulty devices in recovery
+ * is not possible.
+ */
+ if (!test_bit(Faulty, &rdev->flags) &&
+ enough(conf)) {
+ err = -EBUSY;
+ goto abort;
+ }
p->rdev = NULL;
synchronize_rcu();
if (atomic_read(&rdev->nr_pending)) {
@@ -1237,6 +1245,7 @@ static void end_sync_write(struct bio *bio, int error)
if (!uptodate)
md_error(mddev, conf->mirrors[d].rdev);
+
update_head_pos(i, r10_bio);
while (atomic_dec_and_test(&r10_bio->remaining)) {
@@ -1844,7 +1853,8 @@ static sector_t sync_request(mddev_t *mddev, sector_t sector_nr, int *skipped, i
if (rb2)
atomic_dec(&rb2->remaining);
r10_bio = rb2;
- if (!test_and_set_bit(MD_RECOVERY_ERR, &mddev->recovery))
+ if (!test_and_set_bit(MD_RECOVERY_INTR,
+ &mddev->recovery))
printk(KERN_INFO "raid10: %s: insufficient working devices for recovery.\n",
mdname(mddev));
break;
@@ -2082,6 +2092,9 @@ static int run(mddev_t *mddev)
goto out_free_conf;
}
+ spin_lock_init(&conf->device_lock);
+ mddev->queue->queue_lock = &conf->device_lock;
+
rdev_for_each(rdev, tmp, mddev) {
disk_idx = rdev->raid_disk;
if (disk_idx >= mddev->raid_disks
@@ -2103,7 +2116,6 @@ static int run(mddev_t *mddev)
disk->head_position = 0;
}
- spin_lock_init(&conf->device_lock);
INIT_LIST_HEAD(&conf->retry_list);
spin_lock_init(&conf->resync_lock);
@@ -2125,6 +2137,8 @@ static int run(mddev_t *mddev)
!test_bit(In_sync, &disk->rdev->flags)) {
disk->head_position = 0;
mddev->degraded++;
+ if (disk->rdev)
+ conf->fullsync = 1;
}
}
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 087eee0cb809..54c8ee28fcc4 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -94,6 +94,8 @@
#define __inline__
#endif
+#define printk_rl(args...) ((void) (printk_ratelimit() && printk(args)))
+
#if !RAID6_USE_EMPTY_ZERO_PAGE
/* In .bss so it's zeroed */
const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256)));
@@ -1143,10 +1145,12 @@ static void raid5_end_read_request(struct bio * bi, int error)
set_bit(R5_UPTODATE, &sh->dev[i].flags);
if (test_bit(R5_ReadError, &sh->dev[i].flags)) {
rdev = conf->disks[i].rdev;
- printk(KERN_INFO "raid5:%s: read error corrected (%lu sectors at %llu on %s)\n",
- mdname(conf->mddev), STRIPE_SECTORS,
- (unsigned long long)(sh->sector + rdev->data_offset),
- bdevname(rdev->bdev, b));
+ printk_rl(KERN_INFO "raid5:%s: read error corrected"
+ " (%lu sectors at %llu on %s)\n",
+ mdname(conf->mddev), STRIPE_SECTORS,
+ (unsigned long long)(sh->sector
+ + rdev->data_offset),
+ bdevname(rdev->bdev, b));
clear_bit(R5_ReadError, &sh->dev[i].flags);
clear_bit(R5_ReWrite, &sh->dev[i].flags);
}
@@ -1160,16 +1164,22 @@ static void raid5_end_read_request(struct bio * bi, int error)
clear_bit(R5_UPTODATE, &sh->dev[i].flags);
atomic_inc(&rdev->read_errors);
if (conf->mddev->degraded)
- printk(KERN_WARNING "raid5:%s: read error not correctable (sector %llu on %s).\n",
- mdname(conf->mddev),
- (unsigned long long)(sh->sector + rdev->data_offset),
- bdn);
+ printk_rl(KERN_WARNING
+ "raid5:%s: read error not correctable "
+ "(sector %llu on %s).\n",
+ mdname(conf->mddev),
+ (unsigned long long)(sh->sector
+ + rdev->data_offset),
+ bdn);
else if (test_bit(R5_ReWrite, &sh->dev[i].flags))
/* Oh, no!!! */
- printk(KERN_WARNING "raid5:%s: read error NOT corrected!! (sector %llu on %s).\n",
- mdname(conf->mddev),
- (unsigned long long)(sh->sector + rdev->data_offset),
- bdn);
+ printk_rl(KERN_WARNING
+ "raid5:%s: read error NOT corrected!! "
+ "(sector %llu on %s).\n",
+ mdname(conf->mddev),
+ (unsigned long long)(sh->sector
+ + rdev->data_offset),
+ bdn);
else if (atomic_read(&rdev->read_errors)
> conf->max_nr_stripes)
printk(KERN_WARNING
@@ -1258,7 +1268,7 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
/*
* if recovery was running, make sure it aborts.
*/
- set_bit(MD_RECOVERY_ERR, &mddev->recovery);
+ set_bit(MD_RECOVERY_INTR, &mddev->recovery);
}
set_bit(Faulty, &rdev->flags);
printk (KERN_ALERT
@@ -1992,6 +2002,7 @@ static int __handle_issuing_new_read_requests5(struct stripe_head *sh,
* have quiesced.
*/
if ((s->uptodate == disks - 1) &&
+ (s->failed && disk_idx == s->failed_num) &&
!test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) {
set_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending);
set_bit(R5_Wantcompute, &dev->flags);
@@ -2077,7 +2088,9 @@ static void handle_issuing_new_read_requests6(struct stripe_head *sh,
/* we would like to get this block, possibly
* by computing it, but we might not be able to
*/
- if (s->uptodate == disks-1) {
+ if ((s->uptodate == disks - 1) &&
+ (s->failed && (i == r6s->failed_num[0] ||
+ i == r6s->failed_num[1]))) {
pr_debug("Computing stripe %llu block %d\n",
(unsigned long long)sh->sector, i);
compute_block_1(sh, i, 0);
@@ -2369,8 +2382,8 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
/* complete a check operation */
if (test_and_clear_bit(STRIPE_OP_CHECK, &sh->ops.complete)) {
- clear_bit(STRIPE_OP_CHECK, &sh->ops.ack);
- clear_bit(STRIPE_OP_CHECK, &sh->ops.pending);
+ clear_bit(STRIPE_OP_CHECK, &sh->ops.ack);
+ clear_bit(STRIPE_OP_CHECK, &sh->ops.pending);
if (s->failed == 0) {
if (sh->ops.zero_sum_result == 0)
/* parity is correct (on disc,
@@ -2400,16 +2413,6 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
canceled_check = 1; /* STRIPE_INSYNC is not set */
}
- /* check if we can clear a parity disk reconstruct */
- if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete) &&
- test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) {
-
- clear_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending);
- clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete);
- clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.ack);
- clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending);
- }
-
/* start a new check operation if there are no failures, the stripe is
* not insync, and a repair is not in flight
*/
@@ -2424,6 +2427,17 @@ static void handle_parity_checks5(raid5_conf_t *conf, struct stripe_head *sh,
}
}
+ /* check if we can clear a parity disk reconstruct */
+ if (test_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete) &&
+ test_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending)) {
+
+ clear_bit(STRIPE_OP_MOD_REPAIR_PD, &sh->ops.pending);
+ clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.complete);
+ clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.ack);
+ clear_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending);
+ }
+
+
/* Wait for check parity and compute block operations to complete
* before write-back. If a failure occurred while the check operation
* was in flight we need to cycle this stripe through handle_stripe
@@ -2634,6 +2648,7 @@ static void handle_stripe5(struct stripe_head *sh)
struct r5dev *dev;
unsigned long pending = 0;
mdk_rdev_t *blocked_rdev = NULL;
+ int prexor;
memset(&s, 0, sizeof(s));
pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d "
@@ -2763,9 +2778,11 @@ static void handle_stripe5(struct stripe_head *sh)
/* leave prexor set until postxor is done, allows us to distinguish
* a rmw from a rcw during biodrain
*/
+ prexor = 0;
if (test_bit(STRIPE_OP_PREXOR, &sh->ops.complete) &&
test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) {
+ prexor = 1;
clear_bit(STRIPE_OP_PREXOR, &sh->ops.complete);
clear_bit(STRIPE_OP_PREXOR, &sh->ops.ack);
clear_bit(STRIPE_OP_PREXOR, &sh->ops.pending);
@@ -2799,6 +2816,8 @@ static void handle_stripe5(struct stripe_head *sh)
if (!test_and_set_bit(
STRIPE_OP_IO, &sh->ops.pending))
sh->ops.count++;
+ if (prexor)
+ continue;
if (!test_bit(R5_Insync, &dev->flags) ||
(i == sh->pd_idx && s.failed == 0))
set_bit(STRIPE_INSYNC, &sh->state);
@@ -2879,6 +2898,8 @@ static void handle_stripe5(struct stripe_head *sh)
for (i = conf->raid_disks; i--; ) {
set_bit(R5_Wantwrite, &sh->dev[i].flags);
+ set_bit(R5_LOCKED, &dev->flags);
+ s.locked++;
if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending))
sh->ops.count++;
}
@@ -2892,6 +2913,7 @@ static void handle_stripe5(struct stripe_head *sh)
conf->raid_disks);
s.locked += handle_write_operations5(sh, 1, 1);
} else if (s.expanded &&
+ s.locked == 0 &&
!test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) {
clear_bit(STRIPE_EXPAND_READY, &sh->state);
atomic_dec(&conf->reshape_stripes);
@@ -4256,6 +4278,7 @@ static int run(mddev_t *mddev)
goto abort;
}
spin_lock_init(&conf->device_lock);
+ mddev->queue->queue_lock = &conf->device_lock;
init_waitqueue_head(&conf->wait_for_stripe);
init_waitqueue_head(&conf->wait_for_overlap);
INIT_LIST_HEAD(&conf->handle_list);
@@ -4285,7 +4308,9 @@ static int run(mddev_t *mddev)
" disk %d\n", bdevname(rdev->bdev,b),
raid_disk);
working_disks++;
- }
+ } else
+ /* Cannot rely on bitmap to complete recovery */
+ conf->fullsync = 1;
}
/*
@@ -4562,6 +4587,14 @@ static int raid5_remove_disk(mddev_t *mddev, int number)
err = -EBUSY;
goto abort;
}
+ /* Only remove non-faulty devices if recovery
+ * isn't possible.
+ */
+ if (!test_bit(Faulty, &rdev->flags) &&
+ mddev->degraded <= conf->max_degraded) {
+ err = -EBUSY;
+ goto abort;
+ }
p->rdev = NULL;
synchronize_rcu();
if (atomic_read(&rdev->nr_pending)) {