diff options
| author | Benjamin Marzinski <bmarzins@redhat.com> | 2026-04-08 07:35:48 +0300 |
|---|---|---|
| committer | Yu Kuai <yukuai@fnnas.com> | 2026-04-28 15:44:37 +0300 |
| commit | 418b3e64e4459feb3f75979de9ec89e085745343 (patch) | |
| tree | 967be77c114c8a3e13df3c0697853cfa5181fa0d | |
| parent | 0898a817621a2f0cddca8122d9b974003fe5036d (diff) | |
| download | linux-418b3e64e4459feb3f75979de9ec89e085745343.tar.xz | |
md/raid5: Fix UAF on IO across the reshape position
If make_stripe_request() returns STRIPE_WAIT_RESHAPE,
raid5_make_request() will free the cloned bio. But raid5_make_request()
can call make_stripe_request() multiple times, writing to the various
stripes. If that bio got added to the toread or towrite lists of a
stripe disk in an earlier call to make_stripe_request(), then it's not
safe to just free the bio if a later part of it is found to cross the
reshape position. Doing so can lead to a UAF error, when bio_endio()
is called on the bio for the earlier stripes.
Instead, raid5_make_request() needs to wait until all parts of the bio
have called bio_endio(). To do this, bios that cross the reshape
position while the reshape can't make progress are flagged as needing to
wait for all parts to complete. When raid5_make_request() has a bio that
failed make_stripe_request() with STRIPE_WAIT_RESHAPE, it sets
bi->bi_private to a completion struct and waits for completion after
ending the bio. When the bio_endio() is called for the last time on a
clone bio with bi->bi_private set, it wakes up the waiter. This
guarantees that raid5_make_request() doesn't return until the cloned bio
needing a retry for io across the reshape boundary is safely cleaned up.
There is a simple reproducer available at [1]. Compile the kernel with
KASAN for more useful reporting when the error is triggered (this is not
necessary to see the bug).
[1] https://gist.github.com/bmarzins/e48598824305cf2171289e47d7241fa5
Signed-off-by: Benjamin Marzinski <bmarzins@redhat.com>
Reviewed-by: Xiao Ni <xni@redhat.com>
Link: https://lore.kernel.org/r/20260408043548.1695157-1-bmarzins@redhat.com
Signed-off-by: Yu Kuai <yukuai@fnnas.com>
| -rw-r--r-- | drivers/md/md.c | 31 | ||||
| -rw-r--r-- | drivers/md/md.h | 1 | ||||
| -rw-r--r-- | drivers/md/raid5.c | 7 |
3 files changed, 14 insertions, 25 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index 5fb5ae8368ba..8656f2eff40d 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -9341,9 +9341,11 @@ static void md_bitmap_end(struct mddev *mddev, struct md_io_clone *md_io_clone) static void md_end_clone_io(struct bio *bio) { - struct md_io_clone *md_io_clone = bio->bi_private; + struct md_io_clone *md_io_clone = container_of(bio, struct md_io_clone, + bio_clone); struct bio *orig_bio = md_io_clone->orig_bio; struct mddev *mddev = md_io_clone->mddev; + struct completion *reshape_completion = bio->bi_private; if (bio_data_dir(orig_bio) == WRITE && md_bitmap_enabled(mddev, false)) md_bitmap_end(mddev, md_io_clone); @@ -9355,7 +9357,10 @@ static void md_end_clone_io(struct bio *bio) bio_end_io_acct(orig_bio, md_io_clone->start_time); bio_put(bio); - bio_endio(orig_bio); + if (unlikely(reshape_completion)) + complete(reshape_completion); + else + bio_endio(orig_bio); percpu_ref_put(&mddev->active_io); } @@ -9380,7 +9385,7 @@ static void md_clone_bio(struct mddev *mddev, struct bio **bio) } clone->bi_end_io = md_end_clone_io; - clone->bi_private = md_io_clone; + clone->bi_private = NULL; *bio = clone; } @@ -9391,26 +9396,6 @@ void md_account_bio(struct mddev *mddev, struct bio **bio) } EXPORT_SYMBOL_GPL(md_account_bio); -void md_free_cloned_bio(struct bio *bio) -{ - struct md_io_clone *md_io_clone = bio->bi_private; - struct bio *orig_bio = md_io_clone->orig_bio; - struct mddev *mddev = md_io_clone->mddev; - - if (bio_data_dir(orig_bio) == WRITE && md_bitmap_enabled(mddev, false)) - md_bitmap_end(mddev, md_io_clone); - - if (bio->bi_status && !orig_bio->bi_status) - orig_bio->bi_status = bio->bi_status; - - if (md_io_clone->start_time) - bio_end_io_acct(orig_bio, md_io_clone->start_time); - - bio_put(bio); - percpu_ref_put(&mddev->active_io); -} -EXPORT_SYMBOL_GPL(md_free_cloned_bio); - /* md_allow_write(mddev) * Calling this ensures that the array is marked 'active' so that writes * may proceed without blocking. It is important to call this before diff --git a/drivers/md/md.h b/drivers/md/md.h index d6f5482e2479..12d86aa7e6f9 100644 --- a/drivers/md/md.h +++ b/drivers/md/md.h @@ -920,7 +920,6 @@ extern void md_finish_reshape(struct mddev *mddev); void md_submit_discard_bio(struct mddev *mddev, struct md_rdev *rdev, struct bio *bio, sector_t start, sector_t size); void md_account_bio(struct mddev *mddev, struct bio **bio); -void md_free_cloned_bio(struct bio *bio); extern bool __must_check md_flush_request(struct mddev *mddev, struct bio *bio); void md_write_metadata(struct mddev *mddev, struct md_rdev *rdev, diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 6e79829c5acb..0d76e82f4506 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -6217,7 +6217,12 @@ static bool raid5_make_request(struct mddev *mddev, struct bio * bi) mempool_free(ctx, conf->ctx_pool); if (res == STRIPE_WAIT_RESHAPE) { - md_free_cloned_bio(bi); + DECLARE_COMPLETION_ONSTACK(done); + WRITE_ONCE(bi->bi_private, &done); + + bio_endio(bi); + + wait_for_completion(&done); return false; } |
