diff options
author | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2008-07-21 08:55:14 +0400 |
---|---|---|
committer | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2008-07-21 08:55:14 +0400 |
commit | 908cf4b925e419bc74f3297b2f0e51d6f8a81da2 (patch) | |
tree | 6c2da79366d4695a9c2560ab18259eca8a2a25b4 /drivers/md/raid5.c | |
parent | 92c49890922d54cba4b1eadeb0b185773c2c9570 (diff) | |
parent | 14b395e35d1afdd8019d11b92e28041fad591b71 (diff) | |
download | linux-908cf4b925e419bc74f3297b2f0e51d6f8a81da2.tar.xz |
Merge master.kernel.org:/pub/scm/linux/kernel/git/torvalds/linux-2.6 into next
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r-- | drivers/md/raid5.c | 78 |
1 files changed, 53 insertions, 25 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 93fde48c0f42..9ce7154845c6 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -94,6 +94,8 @@ #define __inline__ #endif +#define printk_rl(args...) ((void) (printk_ratelimit() && printk(args))) + #if !RAID6_USE_EMPTY_ZERO_PAGE /* In .bss so it's zeroed */ const char raid6_empty_zero_page[PAGE_SIZE] __attribute__((aligned(256))); @@ -1143,10 +1145,12 @@ static void raid5_end_read_request(struct bio * bi, int error) set_bit(R5_UPTODATE, &sh->dev[i].flags); if (test_bit(R5_ReadError, &sh->dev[i].flags)) { rdev = conf->disks[i].rdev; - printk(KERN_INFO "raid5:%s: read error corrected (%lu sectors at %llu on %s)\n", - mdname(conf->mddev), STRIPE_SECTORS, - (unsigned long long)(sh->sector + rdev->data_offset), - bdevname(rdev->bdev, b)); + printk_rl(KERN_INFO "raid5:%s: read error corrected" + " (%lu sectors at %llu on %s)\n", + mdname(conf->mddev), STRIPE_SECTORS, + (unsigned long long)(sh->sector + + rdev->data_offset), + bdevname(rdev->bdev, b)); clear_bit(R5_ReadError, &sh->dev[i].flags); clear_bit(R5_ReWrite, &sh->dev[i].flags); } @@ -1160,16 +1164,22 @@ static void raid5_end_read_request(struct bio * bi, int error) clear_bit(R5_UPTODATE, &sh->dev[i].flags); atomic_inc(&rdev->read_errors); if (conf->mddev->degraded) - printk(KERN_WARNING "raid5:%s: read error not correctable (sector %llu on %s).\n", - mdname(conf->mddev), - (unsigned long long)(sh->sector + rdev->data_offset), - bdn); + printk_rl(KERN_WARNING + "raid5:%s: read error not correctable " + "(sector %llu on %s).\n", + mdname(conf->mddev), + (unsigned long long)(sh->sector + + rdev->data_offset), + bdn); else if (test_bit(R5_ReWrite, &sh->dev[i].flags)) /* Oh, no!!! */ - printk(KERN_WARNING "raid5:%s: read error NOT corrected!! (sector %llu on %s).\n", - mdname(conf->mddev), - (unsigned long long)(sh->sector + rdev->data_offset), - bdn); + printk_rl(KERN_WARNING + "raid5:%s: read error NOT corrected!! " + "(sector %llu on %s).\n", + mdname(conf->mddev), + (unsigned long long)(sh->sector + + rdev->data_offset), + bdn); else if (atomic_read(&rdev->read_errors) > conf->max_nr_stripes) printk(KERN_WARNING @@ -1258,7 +1268,7 @@ static void error(mddev_t *mddev, mdk_rdev_t *rdev) /* * if recovery was running, make sure it aborts. */ - set_bit(MD_RECOVERY_ERR, &mddev->recovery); + set_bit(MD_RECOVERY_INTR, &mddev->recovery); } set_bit(Faulty, &rdev->flags); printk (KERN_ALERT @@ -1992,6 +2002,7 @@ static int __handle_issuing_new_read_requests5(struct stripe_head *sh, * have quiesced. */ if ((s->uptodate == disks - 1) && + (s->failed && disk_idx == s->failed_num) && !test_bit(STRIPE_OP_CHECK, &sh->ops.pending)) { set_bit(STRIPE_OP_COMPUTE_BLK, &sh->ops.pending); set_bit(R5_Wantcompute, &dev->flags); @@ -2006,12 +2017,7 @@ static int __handle_issuing_new_read_requests5(struct stripe_head *sh, */ s->uptodate++; return 0; /* uptodate + compute == disks */ - } else if ((s->uptodate < disks - 1) && - test_bit(R5_Insync, &dev->flags)) { - /* Note: we hold off compute operations while checks are - * in flight, but we still prefer 'compute' over 'read' - * hence we only read if (uptodate < * disks-1) - */ + } else if (test_bit(R5_Insync, &dev->flags)) { set_bit(R5_LOCKED, &dev->flags); set_bit(R5_Wantread, &dev->flags); if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending)) @@ -2077,7 +2083,9 @@ static void handle_issuing_new_read_requests6(struct stripe_head *sh, /* we would like to get this block, possibly * by computing it, but we might not be able to */ - if (s->uptodate == disks-1) { + if ((s->uptodate == disks - 1) && + (s->failed && (i == r6s->failed_num[0] || + i == r6s->failed_num[1]))) { pr_debug("Computing stripe %llu block %d\n", (unsigned long long)sh->sector, i); compute_block_1(sh, i, 0); @@ -2635,6 +2643,7 @@ static void handle_stripe5(struct stripe_head *sh) struct r5dev *dev; unsigned long pending = 0; mdk_rdev_t *blocked_rdev = NULL; + int prexor; memset(&s, 0, sizeof(s)); pr_debug("handling stripe %llu, state=%#lx cnt=%d, pd_idx=%d " @@ -2764,9 +2773,11 @@ static void handle_stripe5(struct stripe_head *sh) /* leave prexor set until postxor is done, allows us to distinguish * a rmw from a rcw during biodrain */ + prexor = 0; if (test_bit(STRIPE_OP_PREXOR, &sh->ops.complete) && test_bit(STRIPE_OP_POSTXOR, &sh->ops.complete)) { + prexor = 1; clear_bit(STRIPE_OP_PREXOR, &sh->ops.complete); clear_bit(STRIPE_OP_PREXOR, &sh->ops.ack); clear_bit(STRIPE_OP_PREXOR, &sh->ops.pending); @@ -2800,6 +2811,8 @@ static void handle_stripe5(struct stripe_head *sh) if (!test_and_set_bit( STRIPE_OP_IO, &sh->ops.pending)) sh->ops.count++; + if (prexor) + continue; if (!test_bit(R5_Insync, &dev->flags) || (i == sh->pd_idx && s.failed == 0)) set_bit(STRIPE_INSYNC, &sh->state); @@ -2880,6 +2893,8 @@ static void handle_stripe5(struct stripe_head *sh) for (i = conf->raid_disks; i--; ) { set_bit(R5_Wantwrite, &sh->dev[i].flags); + set_bit(R5_LOCKED, &dev->flags); + s.locked++; if (!test_and_set_bit(STRIPE_OP_IO, &sh->ops.pending)) sh->ops.count++; } @@ -2893,6 +2908,7 @@ static void handle_stripe5(struct stripe_head *sh) conf->raid_disks); s.locked += handle_write_operations5(sh, 1, 1); } else if (s.expanded && + s.locked == 0 && !test_bit(STRIPE_OP_POSTXOR, &sh->ops.pending)) { clear_bit(STRIPE_EXPAND_READY, &sh->state); atomic_dec(&conf->reshape_stripes); @@ -3298,15 +3314,17 @@ static int raid5_congested(void *data, int bits) /* We want read requests to align with chunks where possible, * but write requests don't need to. */ -static int raid5_mergeable_bvec(struct request_queue *q, struct bio *bio, struct bio_vec *biovec) +static int raid5_mergeable_bvec(struct request_queue *q, + struct bvec_merge_data *bvm, + struct bio_vec *biovec) { mddev_t *mddev = q->queuedata; - sector_t sector = bio->bi_sector + get_start_sect(bio->bi_bdev); + sector_t sector = bvm->bi_sector + get_start_sect(bvm->bi_bdev); int max; unsigned int chunk_sectors = mddev->chunk_size >> 9; - unsigned int bio_sectors = bio->bi_size >> 9; + unsigned int bio_sectors = bvm->bi_size >> 9; - if (bio_data_dir(bio) == WRITE) + if ((bvm->bi_rw & 1) == WRITE) return biovec->bv_len; /* always allow writes to be mergeable */ max = (chunk_sectors - ((sector & (chunk_sectors - 1)) + bio_sectors)) << 9; @@ -4287,7 +4305,9 @@ static int run(mddev_t *mddev) " disk %d\n", bdevname(rdev->bdev,b), raid_disk); working_disks++; - } + } else + /* Cannot rely on bitmap to complete recovery */ + conf->fullsync = 1; } /* @@ -4564,6 +4584,14 @@ static int raid5_remove_disk(mddev_t *mddev, int number) err = -EBUSY; goto abort; } + /* Only remove non-faulty devices if recovery + * isn't possible. + */ + if (!test_bit(Faulty, &rdev->flags) && + mddev->degraded <= conf->max_degraded) { + err = -EBUSY; + goto abort; + } p->rdev = NULL; synchronize_rcu(); if (atomic_read(&rdev->nr_pending)) { |