summaryrefslogtreecommitdiff
path: root/drivers/md/raid1.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/raid1.c')
-rw-r--r--drivers/md/raid1.c247
1 files changed, 147 insertions, 100 deletions
diff --git a/drivers/md/raid1.c b/drivers/md/raid1.c
index 3b4d69c05623..dc9d2def0270 100644
--- a/drivers/md/raid1.c
+++ b/drivers/md/raid1.c
@@ -271,7 +271,7 @@ static int raid1_end_read_request(struct bio *bio, unsigned int bytes_done, int
*/
update_head_pos(mirror, r1_bio);
- if (uptodate || conf->working_disks <= 1) {
+ if (uptodate || (conf->raid_disks - conf->mddev->degraded) <= 1) {
/*
* Set R1BIO_Uptodate in our master bio, so that
* we will return a good error code for to the higher
@@ -601,6 +601,32 @@ static int raid1_issue_flush(request_queue_t *q, struct gendisk *disk,
return ret;
}
+static int raid1_congested(void *data, int bits)
+{
+ mddev_t *mddev = data;
+ conf_t *conf = mddev_to_conf(mddev);
+ int i, ret = 0;
+
+ rcu_read_lock();
+ for (i = 0; i < mddev->raid_disks; i++) {
+ mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
+ if (rdev && !test_bit(Faulty, &rdev->flags)) {
+ request_queue_t *q = bdev_get_queue(rdev->bdev);
+
+ /* Note the '|| 1' - when read_balance prefers
+ * non-congested targets, it can be removed
+ */
+ if ((bits & (1<<BDI_write_congested)) || 1)
+ ret |= bdi_congested(&q->backing_dev_info, bits);
+ else
+ ret &= bdi_congested(&q->backing_dev_info, bits);
+ }
+ }
+ rcu_read_unlock();
+ return ret;
+}
+
+
/* Barriers....
* Sometimes we need to suspend IO while we do something else,
* either some resync/recovery, or reconfigure the array.
@@ -929,7 +955,7 @@ static void status(struct seq_file *seq, mddev_t *mddev)
int i;
seq_printf(seq, " [%d/%d] [", conf->raid_disks,
- conf->working_disks);
+ conf->raid_disks - mddev->degraded);
rcu_read_lock();
for (i = 0; i < conf->raid_disks; i++) {
mdk_rdev_t *rdev = rcu_dereference(conf->mirrors[i].rdev);
@@ -953,26 +979,27 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev)
* else mark the drive as failed
*/
if (test_bit(In_sync, &rdev->flags)
- && conf->working_disks == 1)
+ && (conf->raid_disks - mddev->degraded) == 1)
/*
* Don't fail the drive, act as though we were just a
* normal single drive
*/
return;
- if (test_bit(In_sync, &rdev->flags)) {
+ if (test_and_clear_bit(In_sync, &rdev->flags)) {
+ unsigned long flags;
+ spin_lock_irqsave(&conf->device_lock, flags);
mddev->degraded++;
- conf->working_disks--;
+ spin_unlock_irqrestore(&conf->device_lock, flags);
/*
* if recovery is running, make sure it aborts.
*/
set_bit(MD_RECOVERY_ERR, &mddev->recovery);
}
- clear_bit(In_sync, &rdev->flags);
set_bit(Faulty, &rdev->flags);
- mddev->sb_dirty = 1;
+ set_bit(MD_CHANGE_DEVS, &mddev->flags);
printk(KERN_ALERT "raid1: Disk failure on %s, disabling device. \n"
" Operation continuing on %d devices\n",
- bdevname(rdev->bdev,b), conf->working_disks);
+ bdevname(rdev->bdev,b), conf->raid_disks - mddev->degraded);
}
static void print_conf(conf_t *conf)
@@ -984,7 +1011,7 @@ static void print_conf(conf_t *conf)
printk("(!conf)\n");
return;
}
- printk(" --- wd:%d rd:%d\n", conf->working_disks,
+ printk(" --- wd:%d rd:%d\n", conf->raid_disks - conf->mddev->degraded,
conf->raid_disks);
rcu_read_lock();
@@ -1023,10 +1050,11 @@ static int raid1_spare_active(mddev_t *mddev)
mdk_rdev_t *rdev = conf->mirrors[i].rdev;
if (rdev
&& !test_bit(Faulty, &rdev->flags)
- && !test_bit(In_sync, &rdev->flags)) {
- conf->working_disks++;
+ && !test_and_set_bit(In_sync, &rdev->flags)) {
+ unsigned long flags;
+ spin_lock_irqsave(&conf->device_lock, flags);
mddev->degraded--;
- set_bit(In_sync, &rdev->flags);
+ spin_unlock_irqrestore(&conf->device_lock, flags);
}
}
@@ -1368,6 +1396,95 @@ static void sync_request_write(mddev_t *mddev, r1bio_t *r1_bio)
* 3. Performs writes following reads for array syncronising.
*/
+static void fix_read_error(conf_t *conf, int read_disk,
+ sector_t sect, int sectors)
+{
+ mddev_t *mddev = conf->mddev;
+ while(sectors) {
+ int s = sectors;
+ int d = read_disk;
+ int success = 0;
+ int start;
+ mdk_rdev_t *rdev;
+
+ if (s > (PAGE_SIZE>>9))
+ s = PAGE_SIZE >> 9;
+
+ do {
+ /* Note: no rcu protection needed here
+ * as this is synchronous in the raid1d thread
+ * which is the thread that might remove
+ * a device. If raid1d ever becomes multi-threaded....
+ */
+ rdev = conf->mirrors[d].rdev;
+ if (rdev &&
+ test_bit(In_sync, &rdev->flags) &&
+ sync_page_io(rdev->bdev,
+ sect + rdev->data_offset,
+ s<<9,
+ conf->tmppage, READ))
+ success = 1;
+ else {
+ d++;
+ if (d == conf->raid_disks)
+ d = 0;
+ }
+ } while (!success && d != read_disk);
+
+ if (!success) {
+ /* Cannot read from anywhere -- bye bye array */
+ md_error(mddev, conf->mirrors[read_disk].rdev);
+ break;
+ }
+ /* write it back and re-read */
+ start = d;
+ while (d != read_disk) {
+ if (d==0)
+ d = conf->raid_disks;
+ d--;
+ rdev = conf->mirrors[d].rdev;
+ if (rdev &&
+ test_bit(In_sync, &rdev->flags)) {
+ if (sync_page_io(rdev->bdev,
+ sect + rdev->data_offset,
+ s<<9, conf->tmppage, WRITE)
+ == 0)
+ /* Well, this device is dead */
+ md_error(mddev, rdev);
+ }
+ }
+ d = start;
+ while (d != read_disk) {
+ char b[BDEVNAME_SIZE];
+ if (d==0)
+ d = conf->raid_disks;
+ d--;
+ rdev = conf->mirrors[d].rdev;
+ if (rdev &&
+ test_bit(In_sync, &rdev->flags)) {
+ if (sync_page_io(rdev->bdev,
+ sect + rdev->data_offset,
+ s<<9, conf->tmppage, READ)
+ == 0)
+ /* Well, this device is dead */
+ md_error(mddev, rdev);
+ else {
+ atomic_add(s, &rdev->corrected_errors);
+ printk(KERN_INFO
+ "raid1:%s: read error corrected "
+ "(%d sectors at %llu on %s)\n",
+ mdname(mddev), s,
+ (unsigned long long)sect +
+ rdev->data_offset,
+ bdevname(rdev->bdev, b));
+ }
+ }
+ }
+ sectors -= s;
+ sect += s;
+ }
+}
+
static void raid1d(mddev_t *mddev)
{
r1bio_t *r1_bio;
@@ -1460,86 +1577,14 @@ static void raid1d(mddev_t *mddev)
* This is all done synchronously while the array is
* frozen
*/
- sector_t sect = r1_bio->sector;
- int sectors = r1_bio->sectors;
- freeze_array(conf);
- if (mddev->ro == 0) while(sectors) {
- int s = sectors;
- int d = r1_bio->read_disk;
- int success = 0;
-
- if (s > (PAGE_SIZE>>9))
- s = PAGE_SIZE >> 9;
-
- do {
- /* Note: no rcu protection needed here
- * as this is synchronous in the raid1d thread
- * which is the thread that might remove
- * a device. If raid1d ever becomes multi-threaded....
- */
- rdev = conf->mirrors[d].rdev;
- if (rdev &&
- test_bit(In_sync, &rdev->flags) &&
- sync_page_io(rdev->bdev,
- sect + rdev->data_offset,
- s<<9,
- conf->tmppage, READ))
- success = 1;
- else {
- d++;
- if (d == conf->raid_disks)
- d = 0;
- }
- } while (!success && d != r1_bio->read_disk);
-
- if (success) {
- /* write it back and re-read */
- int start = d;
- while (d != r1_bio->read_disk) {
- if (d==0)
- d = conf->raid_disks;
- d--;
- rdev = conf->mirrors[d].rdev;
- if (rdev &&
- test_bit(In_sync, &rdev->flags)) {
- if (sync_page_io(rdev->bdev,
- sect + rdev->data_offset,
- s<<9, conf->tmppage, WRITE) == 0)
- /* Well, this device is dead */
- md_error(mddev, rdev);
- }
- }
- d = start;
- while (d != r1_bio->read_disk) {
- if (d==0)
- d = conf->raid_disks;
- d--;
- rdev = conf->mirrors[d].rdev;
- if (rdev &&
- test_bit(In_sync, &rdev->flags)) {
- if (sync_page_io(rdev->bdev,
- sect + rdev->data_offset,
- s<<9, conf->tmppage, READ) == 0)
- /* Well, this device is dead */
- md_error(mddev, rdev);
- else {
- atomic_add(s, &rdev->corrected_errors);
- printk(KERN_INFO "raid1:%s: read error corrected (%d sectors at %llu on %s)\n",
- mdname(mddev), s, (unsigned long long)(sect + rdev->data_offset), bdevname(rdev->bdev, b));
- }
- }
- }
- } else {
- /* Cannot read from anywhere -- bye bye array */
- md_error(mddev, conf->mirrors[r1_bio->read_disk].rdev);
- break;
- }
- sectors -= s;
- sect += s;
+ if (mddev->ro == 0) {
+ freeze_array(conf);
+ fix_read_error(conf, r1_bio->read_disk,
+ r1_bio->sector,
+ r1_bio->sectors);
+ unfreeze_array(conf);
}
- unfreeze_array(conf);
-
bio = r1_bio->bios[r1_bio->read_disk];
if ((disk=read_balance(conf, r1_bio)) == -1) {
printk(KERN_ALERT "raid1: %s: unrecoverable I/O"
@@ -1884,15 +1929,11 @@ static int run(mddev_t *mddev)
blk_queue_max_sectors(mddev->queue, PAGE_SIZE>>9);
disk->head_position = 0;
- if (!test_bit(Faulty, &rdev->flags) && test_bit(In_sync, &rdev->flags))
- conf->working_disks++;
}
conf->raid_disks = mddev->raid_disks;
conf->mddev = mddev;
spin_lock_init(&conf->device_lock);
INIT_LIST_HEAD(&conf->retry_list);
- if (conf->working_disks == 1)
- mddev->recovery_cp = MaxSector;
spin_lock_init(&conf->resync_lock);
init_waitqueue_head(&conf->wait_barrier);
@@ -1900,11 +1941,6 @@ static int run(mddev_t *mddev)
bio_list_init(&conf->pending_bio_list);
bio_list_init(&conf->flushing_bio_list);
- if (!conf->working_disks) {
- printk(KERN_ERR "raid1: no operational mirrors for %s\n",
- mdname(mddev));
- goto out_free_conf;
- }
mddev->degraded = 0;
for (i = 0; i < conf->raid_disks; i++) {
@@ -1917,6 +1953,13 @@ static int run(mddev_t *mddev)
mddev->degraded++;
}
}
+ if (mddev->degraded == conf->raid_disks) {
+ printk(KERN_ERR "raid1: no operational mirrors for %s\n",
+ mdname(mddev));
+ goto out_free_conf;
+ }
+ if (conf->raid_disks - mddev->degraded == 1)
+ mddev->recovery_cp = MaxSector;
/*
* find the first working one and use it as a starting point
@@ -1948,6 +1991,8 @@ static int run(mddev_t *mddev)
mddev->queue->unplug_fn = raid1_unplug;
mddev->queue->issue_flush_fn = raid1_issue_flush;
+ mddev->queue->backing_dev_info.congested_fn = raid1_congested;
+ mddev->queue->backing_dev_info.congested_data = mddev;
return 0;
@@ -2035,7 +2080,7 @@ static int raid1_reshape(mddev_t *mddev)
mirror_info_t *newmirrors;
conf_t *conf = mddev_to_conf(mddev);
int cnt, raid_disks;
-
+ unsigned long flags;
int d, d2;
/* Cannot change chunk_size, layout, or level */
@@ -2094,7 +2139,9 @@ static int raid1_reshape(mddev_t *mddev)
kfree(conf->poolinfo);
conf->poolinfo = newpoolinfo;
+ spin_lock_irqsave(&conf->device_lock, flags);
mddev->degraded += (raid_disks - conf->raid_disks);
+ spin_unlock_irqrestore(&conf->device_lock, flags);
conf->raid_disks = mddev->raid_disks = raid_disks;
mddev->delta_disks = 0;