summaryrefslogtreecommitdiff
path: root/drivers/md/raid5.c
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r--drivers/md/raid5.c134
1 files changed, 84 insertions, 50 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 1fe7c2205353..19d77a026639 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -53,6 +53,8 @@
#include <linux/cpu.h>
#include <linux/slab.h>
#include <linux/ratelimit.h>
+#include <trace/events/block.h>
+
#include "md.h"
#include "raid5.h"
#include "raid0.h"
@@ -182,6 +184,8 @@ static void return_io(struct bio *return_bi)
return_bi = bi->bi_next;
bi->bi_next = NULL;
bi->bi_size = 0;
+ trace_block_bio_complete(bdev_get_queue(bi->bi_bdev),
+ bi, 0);
bio_endio(bi, 0);
bi = return_bi;
}
@@ -466,7 +470,7 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
do {
wait_event_lock_irq(conf->wait_for_stripe,
conf->quiesce == 0 || noquiesce,
- conf->device_lock, /* nothing */);
+ conf->device_lock);
sh = __find_stripe(conf, sector, conf->generation - previous);
if (!sh) {
if (!conf->inactive_blocked)
@@ -480,8 +484,7 @@ get_active_stripe(struct r5conf *conf, sector_t sector,
(atomic_read(&conf->active_stripes)
< (conf->max_nr_stripes *3/4)
|| !conf->inactive_blocked),
- conf->device_lock,
- );
+ conf->device_lock);
conf->inactive_blocked = 0;
} else
init_stripe(sh, sector, previous);
@@ -671,6 +674,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
bi->bi_next = NULL;
if (rrdev)
set_bit(R5_DOUBLE_LOCKED, &sh->dev[i].flags);
+ trace_block_bio_remap(bdev_get_queue(bi->bi_bdev),
+ bi, disk_devt(conf->mddev->gendisk),
+ sh->dev[i].sector);
generic_make_request(bi);
}
if (rrdev) {
@@ -698,6 +704,9 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s)
rbi->bi_io_vec[0].bv_offset = 0;
rbi->bi_size = STRIPE_SIZE;
rbi->bi_next = NULL;
+ trace_block_bio_remap(bdev_get_queue(rbi->bi_bdev),
+ rbi, disk_devt(conf->mddev->gendisk),
+ sh->dev[i].sector);
generic_make_request(rbi);
}
if (!rdev && !rrdev) {
@@ -1646,8 +1655,7 @@ static int resize_stripes(struct r5conf *conf, int newsize)
spin_lock_irq(&conf->device_lock);
wait_event_lock_irq(conf->wait_for_stripe,
!list_empty(&conf->inactive_list),
- conf->device_lock,
- );
+ conf->device_lock);
osh = get_free_stripe(conf);
spin_unlock_irq(&conf->device_lock);
atomic_set(&nsh->count, 1);
@@ -2774,10 +2782,12 @@ static void handle_stripe_clean_event(struct r5conf *conf,
dev = &sh->dev[i];
if (!test_bit(R5_LOCKED, &dev->flags) &&
(test_bit(R5_UPTODATE, &dev->flags) ||
- test_and_clear_bit(R5_Discard, &dev->flags))) {
+ test_bit(R5_Discard, &dev->flags))) {
/* We can return any write requests */
struct bio *wbi, *wbi2;
pr_debug("Return write for disc %d\n", i);
+ if (test_and_clear_bit(R5_Discard, &dev->flags))
+ clear_bit(R5_UPTODATE, &dev->flags);
wbi = dev->written;
dev->written = NULL;
while (wbi && wbi->bi_sector <
@@ -2795,7 +2805,8 @@ static void handle_stripe_clean_event(struct r5conf *conf,
!test_bit(STRIPE_DEGRADED, &sh->state),
0);
}
- }
+ } else if (test_bit(R5_Discard, &sh->dev[i].flags))
+ clear_bit(R5_Discard, &sh->dev[i].flags);
if (test_and_clear_bit(STRIPE_FULL_WRITE, &sh->state))
if (atomic_dec_and_test(&conf->pending_full_writes))
@@ -2852,8 +2863,10 @@ static void handle_stripe_dirtying(struct r5conf *conf,
pr_debug("for sector %llu, rmw=%d rcw=%d\n",
(unsigned long long)sh->sector, rmw, rcw);
set_bit(STRIPE_HANDLE, &sh->state);
- if (rmw < rcw && rmw > 0)
+ if (rmw < rcw && rmw > 0) {
/* prefer read-modify-write, but need to get some data */
+ blk_add_trace_msg(conf->mddev->queue, "raid5 rmw %llu %d",
+ (unsigned long long)sh->sector, rmw);
for (i = disks; i--; ) {
struct r5dev *dev = &sh->dev[i];
if ((dev->towrite || i == sh->pd_idx) &&
@@ -2864,7 +2877,7 @@ static void handle_stripe_dirtying(struct r5conf *conf,
if (
test_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) {
pr_debug("Read_old block "
- "%d for r-m-w\n", i);
+ "%d for r-m-w\n", i);
set_bit(R5_LOCKED, &dev->flags);
set_bit(R5_Wantread, &dev->flags);
s->locked++;
@@ -2874,8 +2887,10 @@ static void handle_stripe_dirtying(struct r5conf *conf,
}
}
}
+ }
if (rcw <= rmw && rcw > 0) {
/* want reconstruct write, but need to get some data */
+ int qread =0;
rcw = 0;
for (i = disks; i--; ) {
struct r5dev *dev = &sh->dev[i];
@@ -2894,12 +2909,17 @@ static void handle_stripe_dirtying(struct r5conf *conf,
set_bit(R5_LOCKED, &dev->flags);
set_bit(R5_Wantread, &dev->flags);
s->locked++;
+ qread++;
} else {
set_bit(STRIPE_DELAYED, &sh->state);
set_bit(STRIPE_HANDLE, &sh->state);
}
}
}
+ if (rcw)
+ blk_add_trace_msg(conf->mddev->queue, "raid5 rcw %llu %d %d %d",
+ (unsigned long long)sh->sector,
+ rcw, qread, test_bit(STRIPE_DELAYED, &sh->state));
}
/* now if nothing is locked, and if we have enough data,
* we can start a write request
@@ -3221,10 +3241,7 @@ static void handle_stripe_expansion(struct r5conf *conf, struct stripe_head *sh)
}
/* done submitting copies, wait for them to complete */
- if (tx) {
- async_tx_ack(tx);
- dma_wait_for_async_tx(tx);
- }
+ async_tx_quiesce(&tx);
}
/*
@@ -3490,40 +3507,6 @@ static void handle_stripe(struct stripe_head *sh)
handle_failed_sync(conf, sh, &s);
}
- /*
- * might be able to return some write requests if the parity blocks
- * are safe, or on a failed drive
- */
- pdev = &sh->dev[sh->pd_idx];
- s.p_failed = (s.failed >= 1 && s.failed_num[0] == sh->pd_idx)
- || (s.failed >= 2 && s.failed_num[1] == sh->pd_idx);
- qdev = &sh->dev[sh->qd_idx];
- s.q_failed = (s.failed >= 1 && s.failed_num[0] == sh->qd_idx)
- || (s.failed >= 2 && s.failed_num[1] == sh->qd_idx)
- || conf->level < 6;
-
- if (s.written &&
- (s.p_failed || ((test_bit(R5_Insync, &pdev->flags)
- && !test_bit(R5_LOCKED, &pdev->flags)
- && (test_bit(R5_UPTODATE, &pdev->flags) ||
- test_bit(R5_Discard, &pdev->flags))))) &&
- (s.q_failed || ((test_bit(R5_Insync, &qdev->flags)
- && !test_bit(R5_LOCKED, &qdev->flags)
- && (test_bit(R5_UPTODATE, &qdev->flags) ||
- test_bit(R5_Discard, &qdev->flags))))))
- handle_stripe_clean_event(conf, sh, disks, &s.return_bi);
-
- /* Now we might consider reading some blocks, either to check/generate
- * parity, or to satisfy requests
- * or to load a block that is being partially written.
- */
- if (s.to_read || s.non_overwrite
- || (conf->level == 6 && s.to_write && s.failed)
- || (s.syncing && (s.uptodate + s.compute < disks))
- || s.replacing
- || s.expanding)
- handle_stripe_fill(sh, &s, disks);
-
/* Now we check to see if any write operations have recently
* completed
*/
@@ -3561,6 +3544,40 @@ static void handle_stripe(struct stripe_head *sh)
s.dec_preread_active = 1;
}
+ /*
+ * might be able to return some write requests if the parity blocks
+ * are safe, or on a failed drive
+ */
+ pdev = &sh->dev[sh->pd_idx];
+ s.p_failed = (s.failed >= 1 && s.failed_num[0] == sh->pd_idx)
+ || (s.failed >= 2 && s.failed_num[1] == sh->pd_idx);
+ qdev = &sh->dev[sh->qd_idx];
+ s.q_failed = (s.failed >= 1 && s.failed_num[0] == sh->qd_idx)
+ || (s.failed >= 2 && s.failed_num[1] == sh->qd_idx)
+ || conf->level < 6;
+
+ if (s.written &&
+ (s.p_failed || ((test_bit(R5_Insync, &pdev->flags)
+ && !test_bit(R5_LOCKED, &pdev->flags)
+ && (test_bit(R5_UPTODATE, &pdev->flags) ||
+ test_bit(R5_Discard, &pdev->flags))))) &&
+ (s.q_failed || ((test_bit(R5_Insync, &qdev->flags)
+ && !test_bit(R5_LOCKED, &qdev->flags)
+ && (test_bit(R5_UPTODATE, &qdev->flags) ||
+ test_bit(R5_Discard, &qdev->flags))))))
+ handle_stripe_clean_event(conf, sh, disks, &s.return_bi);
+
+ /* Now we might consider reading some blocks, either to check/generate
+ * parity, or to satisfy requests
+ * or to load a block that is being partially written.
+ */
+ if (s.to_read || s.non_overwrite
+ || (conf->level == 6 && s.to_write && s.failed)
+ || (s.syncing && (s.uptodate + s.compute < disks))
+ || s.replacing
+ || s.expanding)
+ handle_stripe_fill(sh, &s, disks);
+
/* Now to consider new write requests and what else, if anything
* should be read. We do not handle new writes when:
* 1/ A 'write' operation (copy+xor) is already in flight.
@@ -3900,6 +3917,8 @@ static void raid5_align_endio(struct bio *bi, int error)
rdev_dec_pending(rdev, conf->mddev);
if (!error && uptodate) {
+ trace_block_bio_complete(bdev_get_queue(raid_bi->bi_bdev),
+ raid_bi, 0);
bio_endio(raid_bi, 0);
if (atomic_dec_and_test(&conf->active_aligned_reads))
wake_up(&conf->wait_for_stripe);
@@ -4000,10 +4019,13 @@ static int chunk_aligned_read(struct mddev *mddev, struct bio * raid_bio)
spin_lock_irq(&conf->device_lock);
wait_event_lock_irq(conf->wait_for_stripe,
conf->quiesce == 0,
- conf->device_lock, /* nothing */);
+ conf->device_lock);
atomic_inc(&conf->active_aligned_reads);
spin_unlock_irq(&conf->device_lock);
+ trace_block_bio_remap(bdev_get_queue(align_bi->bi_bdev),
+ align_bi, disk_devt(mddev->gendisk),
+ raid_bio->bi_sector);
generic_make_request(align_bi);
return 1;
} else {
@@ -4078,6 +4100,7 @@ static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule)
struct stripe_head *sh;
struct mddev *mddev = cb->cb.data;
struct r5conf *conf = mddev->private;
+ int cnt = 0;
if (cb->list.next && !list_empty(&cb->list)) {
spin_lock_irq(&conf->device_lock);
@@ -4092,9 +4115,11 @@ static void raid5_unplug(struct blk_plug_cb *blk_cb, bool from_schedule)
smp_mb__before_clear_bit();
clear_bit(STRIPE_ON_UNPLUG_LIST, &sh->state);
__release_stripe(conf, sh);
+ cnt++;
}
spin_unlock_irq(&conf->device_lock);
}
+ trace_block_unplug(mddev->queue, cnt, !from_schedule);
kfree(cb);
}
@@ -4352,6 +4377,8 @@ static void make_request(struct mddev *mddev, struct bio * bi)
if ( rw == WRITE )
md_write_end(mddev);
+ trace_block_bio_complete(bdev_get_queue(bi->bi_bdev),
+ bi, 0);
bio_endio(bi, 0);
}
}
@@ -4728,8 +4755,11 @@ static int retry_aligned_read(struct r5conf *conf, struct bio *raid_bio)
handled++;
}
remaining = raid5_dec_bi_active_stripes(raid_bio);
- if (remaining == 0)
+ if (remaining == 0) {
+ trace_block_bio_complete(bdev_get_queue(raid_bio->bi_bdev),
+ raid_bio, 0);
bio_endio(raid_bio, 0);
+ }
if (atomic_dec_and_test(&conf->active_aligned_reads))
wake_up(&conf->wait_for_stripe);
return handled;
@@ -5529,6 +5559,10 @@ static int run(struct mddev *mddev)
* discard data disk but write parity disk
*/
stripe = stripe * PAGE_SIZE;
+ /* Round up to power of 2, as discard handling
+ * currently assumes that */
+ while ((stripe-1) & stripe)
+ stripe = (stripe | (stripe-1)) + 1;
mddev->queue->limits.discard_alignment = stripe;
mddev->queue->limits.discard_granularity = stripe;
/*
@@ -6088,7 +6122,7 @@ static void raid5_quiesce(struct mddev *mddev, int state)
wait_event_lock_irq(conf->wait_for_stripe,
atomic_read(&conf->active_stripes) == 0 &&
atomic_read(&conf->active_aligned_reads) == 0,
- conf->device_lock, /* nothing */);
+ conf->device_lock);
conf->quiesce = 1;
spin_unlock_irq(&conf->device_lock);
/* allow reshape to continue */