From c4cf5261f8bffd9de132b50660a69148e7575bd6 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 17 Apr 2015 16:15:18 -0600 Subject: bio: skip atomic inc/dec of ->bi_remaining for non-chains Struct bio has an atomic ref count for chained bio's, and we use this to know when to end IO on the bio. However, most bio's are not chained, so we don't need to always introduce this atomic operation as part of ending IO. Add a helper to elevate the bi_remaining count, and flag the bio as now actually needing the decrement at end_io time. Rename the field to __bi_remaining to catch any current users of this doing the incrementing manually. For high IOPS workloads, this reduces the overhead of bio_endio() substantially. Tested-by: Robert Elliott Acked-by: Kent Overstreet Reviewed-by: Jan Kara Signed-off-by: Jens Axboe --- drivers/md/dm-cache-target.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/md/dm-cache-target.c') diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 7755af351867..705eb7b99d69 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -91,7 +91,7 @@ static void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio) * Must bump bi_remaining to allow bio to complete with * restored bi_end_io. */ - atomic_inc(&bio->bi_remaining); + bio_inc_remaining(bio); } /*----------------------------------------------------------------*/ -- cgit v1.2.3 From 326e1dbb57368087a36607aaebe9795b8d5453e5 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 22 May 2015 09:14:03 -0400 Subject: block: remove management of bi_remaining when restoring original bi_end_io Commit c4cf5261 ("bio: skip atomic inc/dec of ->bi_remaining for non-chains") regressed all existing callers that followed this pattern: 1) saving a bio's original bi_end_io 2) wiring up an intermediate bi_end_io 3) restoring the original bi_end_io from intermediate bi_end_io 4) calling bio_endio() to execute the restored original bi_end_io The regression was due to BIO_CHAIN only ever getting set if bio_inc_remaining() is called. For the above pattern it isn't set until step 3 above (step 2 would've needed to establish BIO_CHAIN). As such the first bio_endio(), in step 2 above, never decremented __bi_remaining before calling the intermediate bi_end_io -- leaving __bi_remaining with the value 1 instead of 0. When bio_inc_remaining() occurred during step 3 it brought it to a value of 2. When the second bio_endio() was called, in step 4 above, it should've called the original bi_end_io but it didn't because there was an extra reference that wasn't dropped (due to atomic operations being optimized away since BIO_CHAIN wasn't set upfront). Fix this issue by removing the __bi_remaining management complexity for all callers that use the above pattern -- bio_chain() is the only interface that _needs_ to be concerned with __bi_remaining. For the above pattern callers just expect the bi_end_io they set to get called! Remove bio_endio_nodec() and also remove all bio_inc_remaining() calls that aren't associated with the bio_chain() interface. Also, the bio_inc_remaining() interface has been moved local to bio.c. Fixes: c4cf5261 ("bio: skip atomic inc/dec of ->bi_remaining for non-chains") Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Mike Snitzer Signed-off-by: Jens Axboe --- block/bio-integrity.c | 4 ++-- block/bio.c | 35 ++++++++++++++--------------------- drivers/md/bcache/io.c | 2 +- drivers/md/dm-cache-target.c | 6 ------ drivers/md/dm-raid1.c | 2 -- drivers/md/dm-snap.c | 1 - drivers/md/dm-thin.c | 9 +++------ drivers/md/dm-verity.c | 2 +- fs/btrfs/disk-io.c | 2 +- fs/btrfs/volumes.c | 16 +++++----------- fs/btrfs/volumes.h | 2 -- include/linux/bio.h | 12 ------------ 12 files changed, 27 insertions(+), 66 deletions(-) (limited to 'drivers/md/dm-cache-target.c') diff --git a/block/bio-integrity.c b/block/bio-integrity.c index 5cbd5d9ea61d..0436c21db7f2 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -361,7 +361,7 @@ static void bio_integrity_verify_fn(struct work_struct *work) /* Restore original bio completion handler */ bio->bi_end_io = bip->bip_end_io; - bio_endio_nodec(bio, error); + bio_endio(bio, error); } /** @@ -388,7 +388,7 @@ void bio_integrity_endio(struct bio *bio, int error) */ if (error) { bio->bi_end_io = bip->bip_end_io; - bio_endio_nodec(bio, error); + bio_endio(bio, error); return; } diff --git a/block/bio.c b/block/bio.c index c2ff8a88aef1..259197d97de1 100644 --- a/block/bio.c +++ b/block/bio.c @@ -303,6 +303,17 @@ static void bio_chain_endio(struct bio *bio, int error) bio_put(bio); } +/* + * Increment chain count for the bio. Make sure the CHAIN flag update + * is visible before the raised count. + */ +static inline void bio_inc_remaining(struct bio *bio) +{ + bio->bi_flags |= (1 << BIO_CHAIN); + smp_mb__before_atomic(); + atomic_inc(&bio->__bi_remaining); +} + /** * bio_chain - chain bio completions * @bio: the target bio @@ -1756,8 +1767,10 @@ static inline bool bio_remaining_done(struct bio *bio) BUG_ON(atomic_read(&bio->__bi_remaining) <= 0); - if (atomic_dec_and_test(&bio->__bi_remaining)) + if (atomic_dec_and_test(&bio->__bi_remaining)) { + clear_bit(BIO_CHAIN, &bio->bi_flags); return true; + } return false; } @@ -1808,26 +1821,6 @@ void bio_endio(struct bio *bio, int error) } EXPORT_SYMBOL(bio_endio); -/** - * bio_endio_nodec - end I/O on a bio, without decrementing bi_remaining - * @bio: bio - * @error: error, if any - * - * For code that has saved and restored bi_end_io; thing hard before using this - * function, probably you should've cloned the entire bio. - **/ -void bio_endio_nodec(struct bio *bio, int error) -{ - /* - * If it's not flagged as a chain, we are not going to dec the count - */ - if (bio_flagged(bio, BIO_CHAIN)) - bio_inc_remaining(bio); - - bio_endio(bio, error); -} -EXPORT_SYMBOL(bio_endio_nodec); - /** * bio_split - split a bio * @bio: bio to split diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c index fa028fa82df4..cb64e64a4789 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -55,7 +55,7 @@ static void bch_bio_submit_split_done(struct closure *cl) s->bio->bi_end_io = s->bi_end_io; s->bio->bi_private = s->bi_private; - bio_endio_nodec(s->bio, 0); + bio_endio(s->bio, 0); closure_debug_destroy(&s->cl); mempool_free(s, s->p->bio_split_hook); diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 705eb7b99d69..41b2594a80c6 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -86,12 +86,6 @@ static void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio) { bio->bi_end_io = h->bi_end_io; bio->bi_private = h->bi_private; - - /* - * Must bump bi_remaining to allow bio to complete with - * restored bi_end_io. - */ - bio_inc_remaining(bio); } /*----------------------------------------------------------------*/ diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c index d6a1c096b777..743fa9bbae9e 100644 --- a/drivers/md/dm-raid1.c +++ b/drivers/md/dm-raid1.c @@ -1254,8 +1254,6 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio, int error) dm_bio_restore(bd, bio); bio_record->details.bi_bdev = NULL; - bio_inc_remaining(bio); - queue_bio(ms, bio, rw); return DM_ENDIO_INCOMPLETE; } diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 8bfeae218531..7c82d3ccce87 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -1478,7 +1478,6 @@ out: if (full_bio) { full_bio->bi_end_io = pe->full_bio_end_io; full_bio->bi_private = pe->full_bio_private; - bio_inc_remaining(full_bio); } increment_pending_exceptions_done_count(); diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index 342dbdad6131..e852602c0091 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -793,10 +793,9 @@ static void inc_remap_and_issue_cell(struct thin_c *tc, static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m) { - if (m->bio) { + if (m->bio) m->bio->bi_end_io = m->saved_bi_end_io; - bio_inc_remaining(m->bio); - } + cell_error(m->tc->pool, m->cell); list_del(&m->list); mempool_free(m, m->tc->pool->mapping_pool); @@ -810,10 +809,8 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) int r; bio = m->bio; - if (bio) { + if (bio) bio->bi_end_io = m->saved_bi_end_io; - bio_inc_remaining(bio); - } if (m->err) { cell_error(pool, m->cell); diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c index 66616db33e6f..bb9c6a00e4b0 100644 --- a/drivers/md/dm-verity.c +++ b/drivers/md/dm-verity.c @@ -459,7 +459,7 @@ static void verity_finish_io(struct dm_verity_io *io, int error) bio->bi_end_io = io->orig_bi_end_io; bio->bi_private = io->orig_bi_private; - bio_endio_nodec(bio, error); + bio_endio(bio, error); } static void verity_work(struct work_struct *w) diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index e08a926fe12c..0bccf18dc1dc 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -1745,7 +1745,7 @@ static void end_workqueue_fn(struct btrfs_work *work) bio->bi_private = end_io_wq->private; bio->bi_end_io = end_io_wq->end_io; kmem_cache_free(btrfs_end_io_wq_cache, end_io_wq); - bio_endio_nodec(bio, error); + bio_endio(bio, error); } static int cleaner_kthread(void *arg) diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index 8e8d1d1e28a5..dac77d42a9ab 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -5585,10 +5585,10 @@ int btrfs_rmap_block(struct btrfs_mapping_tree *map_tree, static inline void btrfs_end_bbio(struct btrfs_bio *bbio, struct bio *bio, int err) { - if (likely(bbio->flags & BTRFS_BIO_ORIG_BIO_SUBMITTED)) - bio_endio_nodec(bio, err); - else - bio_endio(bio, err); + bio->bi_private = bbio->private; + bio->bi_end_io = bbio->end_io; + bio_endio(bio, err); + btrfs_put_bbio(bbio); } @@ -5632,8 +5632,6 @@ static void btrfs_end_bio(struct bio *bio, int err) bio = bbio->orig_bio; } - bio->bi_private = bbio->private; - bio->bi_end_io = bbio->end_io; btrfs_io_bio(bio)->mirror_num = bbio->mirror_num; /* only send an error to the higher layers if it is * beyond the tolerance of the btrfs bio @@ -5815,8 +5813,6 @@ static void bbio_error(struct btrfs_bio *bbio, struct bio *bio, u64 logical) /* Shoud be the original bio. */ WARN_ON(bio != bbio->orig_bio); - bio->bi_private = bbio->private; - bio->bi_end_io = bbio->end_io; btrfs_io_bio(bio)->mirror_num = bbio->mirror_num; bio->bi_iter.bi_sector = logical >> 9; @@ -5897,10 +5893,8 @@ int btrfs_map_bio(struct btrfs_root *root, int rw, struct bio *bio, if (dev_nr < total_devs - 1) { bio = btrfs_bio_clone(first_bio, GFP_NOFS); BUG_ON(!bio); /* -ENOMEM */ - } else { + } else bio = first_bio; - bbio->flags |= BTRFS_BIO_ORIG_BIO_SUBMITTED; - } submit_stripe_bio(root, bbio, bio, bbio->stripes[dev_nr].physical, dev_nr, rw, diff --git a/fs/btrfs/volumes.h b/fs/btrfs/volumes.h index ebc31331a837..cedae0356558 100644 --- a/fs/btrfs/volumes.h +++ b/fs/btrfs/volumes.h @@ -292,8 +292,6 @@ struct btrfs_bio_stripe { struct btrfs_bio; typedef void (btrfs_bio_end_io_t) (struct btrfs_bio *bio, int err); -#define BTRFS_BIO_ORIG_BIO_SUBMITTED (1 << 0) - struct btrfs_bio { atomic_t refs; atomic_t stripes_pending; diff --git a/include/linux/bio.h b/include/linux/bio.h index 7486ea103f6e..f0291cf64cc5 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -427,7 +427,6 @@ static inline struct bio *bio_clone_kmalloc(struct bio *bio, gfp_t gfp_mask) } extern void bio_endio(struct bio *, int); -extern void bio_endio_nodec(struct bio *, int); struct request_queue; extern int bio_phys_segments(struct request_queue *, struct bio *); @@ -658,17 +657,6 @@ static inline struct bio *bio_list_get(struct bio_list *bl) return bio; } -/* - * Increment chain count for the bio. Make sure the CHAIN flag update - * is visible before the raised count. - */ -static inline void bio_inc_remaining(struct bio *bio) -{ - bio->bi_flags |= (1 << BIO_CHAIN); - smp_mb__before_atomic(); - atomic_inc(&bio->__bi_remaining); -} - /* * bio_set is used to allow other portions of the IO system to * allocate their own private memory pools for bio and iovec structures. -- cgit v1.2.3