summaryrefslogtreecommitdiff
path: root/drivers/md/raid5.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-05-06 18:28:58 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2023-05-06 18:28:58 +0300
commita3b111b046f6ce5dff168af203daf2f46f3afb29 (patch)
tree09e899be4433dac1cccc74883bdc671fa657b144 /drivers/md/raid5.c
parent7644c8231987288e7aae378d2ff3c56a980d1988 (diff)
parentc0b79b0ff53be5b05be98e3caaa6a39de1fe9520 (diff)
downloadlinux-a3b111b046f6ce5dff168af203daf2f46f3afb29.tar.xz
Merge tag 'for-6.4/block-2023-05-06' of git://git.kernel.dk/linux
Pull more block updates from Jens Axboe: - MD pull request via Song: - Improve raid5 sequential IO performance on spinning disks, which fixes a regression since v6.0 (Jan Kara) - Fix bitmap offset types, which fixes an issue introduced in this merge window (Jonathan Derrick) - Cleanup of hweight type used for cgroup writeback (Maxim) - Fix a regression with the "has_submit_bio" changes across partitions (Ming) - Cleanup of QUEUE_FLAG_ADD_RANDOM clearing. We used to set this flag on queues non blk-mq queues, and hence some drivers clear it unconditionally. Since all of these have since been converted to true blk-mq drivers, drop the useless clear as the bit is not set (Chaitanya) - Fix the flags being set in a bio for a flush for drbd (Christoph) - Cleanup and deduplication of the code handling setting block device capacity (Damien) - Fix for ublk handling IO timeouts (Ming) - Fix for a regression in blk-cgroup teardown (Tao) - NBD documentation and code fixes (Eric) - Convert blk-integrity to using device_attributes rather than a second kobject to manage lifetimes (Thomas) * tag 'for-6.4/block-2023-05-06' of git://git.kernel.dk/linux: ublk: add timeout handler drbd: correctly submit flush bio on barrier mailmap: add mailmap entries for Jens Axboe block: Skip destroyed blkg when restart in blkg_destroy_all() writeback: fix call of incorrect macro md: Fix bitmap offset type in sb writer md/raid5: Improve performance for sequential IO docs nbd: userspace NBD now favors github over sourceforge block nbd: use req.cookie instead of req.handle uapi nbd: add cookie alias to handle uapi nbd: improve doc links to userspace spec blk-integrity: register sysfs attributes on struct device blk-integrity: convert to struct device_attribute blk-integrity: use sysfs_emit block/drivers: remove dead clear of random flag block: sync part's ->bd_has_submit_bio with disk's block: Cleanup set_capacity()/bdev_set_nr_sectors()
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r--drivers/md/raid5.c45
1 files changed, 44 insertions, 1 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c
index 812a12e3e41a..4739ed891e75 100644
--- a/drivers/md/raid5.c
+++ b/drivers/md/raid5.c
@@ -6079,6 +6079,38 @@ out_release:
return ret;
}
+/*
+ * If the bio covers multiple data disks, find sector within the bio that has
+ * the lowest chunk offset in the first chunk.
+ */
+static sector_t raid5_bio_lowest_chunk_sector(struct r5conf *conf,
+ struct bio *bi)
+{
+ int sectors_per_chunk = conf->chunk_sectors;
+ int raid_disks = conf->raid_disks;
+ int dd_idx;
+ struct stripe_head sh;
+ unsigned int chunk_offset;
+ sector_t r_sector = bi->bi_iter.bi_sector & ~((sector_t)RAID5_STRIPE_SECTORS(conf)-1);
+ sector_t sector;
+
+ /* We pass in fake stripe_head to get back parity disk numbers */
+ sector = raid5_compute_sector(conf, r_sector, 0, &dd_idx, &sh);
+ chunk_offset = sector_div(sector, sectors_per_chunk);
+ if (sectors_per_chunk - chunk_offset >= bio_sectors(bi))
+ return r_sector;
+ /*
+ * Bio crosses to the next data disk. Check whether it's in the same
+ * chunk.
+ */
+ dd_idx++;
+ while (dd_idx == sh.pd_idx || dd_idx == sh.qd_idx)
+ dd_idx++;
+ if (dd_idx >= raid_disks)
+ return r_sector;
+ return r_sector + sectors_per_chunk - chunk_offset;
+}
+
static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
{
DEFINE_WAIT_FUNC(wait, woken_wake_function);
@@ -6150,6 +6182,17 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
}
md_account_bio(mddev, &bi);
+ /*
+ * Lets start with the stripe with the lowest chunk offset in the first
+ * chunk. That has the best chances of creating IOs adjacent to
+ * previous IOs in case of sequential IO and thus creates the most
+ * sequential IO pattern. We don't bother with the optimization when
+ * reshaping as the performance benefit is not worth the complexity.
+ */
+ if (likely(conf->reshape_progress == MaxSector))
+ logical_sector = raid5_bio_lowest_chunk_sector(conf, bi);
+ s = (logical_sector - ctx.first_sector) >> RAID5_STRIPE_SHIFT(conf);
+
add_wait_queue(&conf->wait_for_overlap, &wait);
while (1) {
res = make_stripe_request(mddev, conf, &ctx, logical_sector,
@@ -6178,7 +6221,7 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi)
continue;
}
- s = find_first_bit(ctx.sectors_to_do, stripe_cnt);
+ s = find_next_bit_wrap(ctx.sectors_to_do, stripe_cnt, s);
if (s == stripe_cnt)
break;