From 4f024f3797c43cb4b73cd2c50cec728842d0e49e Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 11 Oct 2013 15:44:27 -0700 Subject: block: Abstract out bvec iterator MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Immutable biovecs are going to require an explicit iterator. To implement immutable bvecs, a later patch is going to add a bi_bvec_done member to this struct; for now, this patch effectively just renames things. Signed-off-by: Kent Overstreet Cc: Jens Axboe Cc: Geert Uytterhoeven Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: "Ed L. Cashin" Cc: Nick Piggin Cc: Lars Ellenberg Cc: Jiri Kosina Cc: Matthew Wilcox Cc: Geoff Levand Cc: Yehuda Sadeh Cc: Sage Weil Cc: Alex Elder Cc: ceph-devel@vger.kernel.org Cc: Joshua Morris Cc: Philip Kelleher Cc: Rusty Russell Cc: "Michael S. Tsirkin" Cc: Konrad Rzeszutek Wilk Cc: Jeremy Fitzhardinge Cc: Neil Brown Cc: Alasdair Kergon Cc: Mike Snitzer Cc: dm-devel@redhat.com Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: linux390@de.ibm.com Cc: Boaz Harrosh Cc: Benny Halevy Cc: "James E.J. Bottomley" Cc: Greg Kroah-Hartman Cc: "Nicholas A. Bellinger" Cc: Alexander Viro Cc: Chris Mason Cc: "Theodore Ts'o" Cc: Andreas Dilger Cc: Jaegeuk Kim Cc: Steven Whitehouse Cc: Dave Kleikamp Cc: Joern Engel Cc: Prasad Joshi Cc: Trond Myklebust Cc: KONISHI Ryusuke Cc: Mark Fasheh Cc: Joel Becker Cc: Ben Myers Cc: xfs@oss.sgi.com Cc: Steven Rostedt Cc: Frederic Weisbecker Cc: Ingo Molnar Cc: Len Brown Cc: Pavel Machek Cc: "Rafael J. Wysocki" Cc: Herton Ronaldo Krzesinski Cc: Ben Hutchings Cc: Andrew Morton Cc: Guo Chao Cc: Tejun Heo Cc: Asai Thambi S P Cc: Selvan Mani Cc: Sam Bradshaw Cc: Wei Yongjun Cc: "Roger Pau Monné" Cc: Jan Beulich Cc: Stefano Stabellini Cc: Ian Campbell Cc: Sebastian Ott Cc: Christian Borntraeger Cc: Minchan Kim Cc: Jiang Liu Cc: Nitin Gupta Cc: Jerome Marchand Cc: Joe Perches Cc: Peng Tao Cc: Andy Adamson Cc: fanchaoting Cc: Jie Liu Cc: Sunil Mushran Cc: "Martin K. Petersen" Cc: Namjae Jeon Cc: Pankaj Kumar Cc: Dan Magenheimer Cc: Mel Gorman 6 --- drivers/md/dm-cache-target.c | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'drivers/md/dm-cache-target.c') diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 9efcf1059b99..86f9c83eb30c 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -664,15 +664,17 @@ static void remap_to_origin(struct cache *cache, struct bio *bio) static void remap_to_cache(struct cache *cache, struct bio *bio, dm_cblock_t cblock) { - sector_t bi_sector = bio->bi_sector; + sector_t bi_sector = bio->bi_iter.bi_sector; bio->bi_bdev = cache->cache_dev->bdev; if (!block_size_is_power_of_two(cache)) - bio->bi_sector = (from_cblock(cblock) * cache->sectors_per_block) + - sector_div(bi_sector, cache->sectors_per_block); + bio->bi_iter.bi_sector = + (from_cblock(cblock) * cache->sectors_per_block) + + sector_div(bi_sector, cache->sectors_per_block); else - bio->bi_sector = (from_cblock(cblock) << cache->sectors_per_block_shift) | - (bi_sector & (cache->sectors_per_block - 1)); + bio->bi_iter.bi_sector = + (from_cblock(cblock) << cache->sectors_per_block_shift) | + (bi_sector & (cache->sectors_per_block - 1)); } static void check_if_tick_bio_needed(struct cache *cache, struct bio *bio) @@ -712,7 +714,7 @@ static void remap_to_cache_dirty(struct cache *cache, struct bio *bio, static dm_oblock_t get_bio_block(struct cache *cache, struct bio *bio) { - sector_t block_nr = bio->bi_sector; + sector_t block_nr = bio->bi_iter.bi_sector; if (!block_size_is_power_of_two(cache)) (void) sector_div(block_nr, cache->sectors_per_block); @@ -1027,7 +1029,7 @@ static void issue_overwrite(struct dm_cache_migration *mg, struct bio *bio) static bool bio_writes_complete_block(struct cache *cache, struct bio *bio) { return (bio_data_dir(bio) == WRITE) && - (bio->bi_size == (cache->sectors_per_block << SECTOR_SHIFT)); + (bio->bi_iter.bi_size == (cache->sectors_per_block << SECTOR_SHIFT)); } static void avoid_copy(struct dm_cache_migration *mg) @@ -1252,7 +1254,7 @@ static void process_flush_bio(struct cache *cache, struct bio *bio) size_t pb_data_size = get_per_bio_data_size(cache); struct per_bio_data *pb = get_per_bio_data(bio, pb_data_size); - BUG_ON(bio->bi_size); + BUG_ON(bio->bi_iter.bi_size); if (!pb->req_nr) remap_to_origin(cache, bio); else @@ -1275,9 +1277,9 @@ static void process_flush_bio(struct cache *cache, struct bio *bio) */ static void process_discard_bio(struct cache *cache, struct bio *bio) { - dm_block_t start_block = dm_sector_div_up(bio->bi_sector, + dm_block_t start_block = dm_sector_div_up(bio->bi_iter.bi_sector, cache->discard_block_size); - dm_block_t end_block = bio->bi_sector + bio_sectors(bio); + dm_block_t end_block = bio_end_sector(bio); dm_block_t b; end_block = block_div(end_block, cache->discard_block_size); -- cgit v1.2.3 From 196d38bccfcfa32faed8c561868336fdfa0fe8e4 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 23 Nov 2013 18:34:15 -0800 Subject: block: Generic bio chaining This adds a generic mechanism for chaining bio completions. This is going to be used for a bio_split() replacement, and it turns out to be very useful in a fair amount of driver code - a fair number of drivers were implementing this in their own roundabout ways, often painfully. Note that this means it's no longer to call bio_endio() more than once on the same bio! This can cause problems for drivers that save/restore bi_end_io. Arguably they shouldn't be saving/restoring bi_end_io at all - in all but the simplest cases they'd be better off just cloning the bio, and immutable biovecs is making bio cloning cheaper. But for now, we add a bio_endio_nodec() for these cases. Signed-off-by: Kent Overstreet Cc: Jens Axboe --- drivers/md/bcache/io.c | 2 +- drivers/md/dm-cache-target.c | 6 ++++ drivers/md/dm-snap.c | 1 + drivers/md/dm-thin.c | 8 +++-- drivers/md/dm-verity.c | 2 +- fs/bio-integrity.c | 2 +- fs/bio.c | 76 ++++++++++++++++++++++++++++++++++++++++---- include/linux/bio.h | 2 ++ include/linux/blk_types.h | 2 ++ 9 files changed, 90 insertions(+), 11 deletions(-) (limited to 'drivers/md/dm-cache-target.c') diff --git a/drivers/md/bcache/io.c b/drivers/md/bcache/io.c index 0f0ab659914d..522f95778443 100644 --- a/drivers/md/bcache/io.c +++ b/drivers/md/bcache/io.c @@ -133,7 +133,7 @@ static void bch_bio_submit_split_done(struct closure *cl) s->bio->bi_end_io = s->bi_end_io; s->bio->bi_private = s->bi_private; - bio_endio(s->bio, 0); + bio_endio_nodec(s->bio, 0); closure_debug_destroy(&s->cl); mempool_free(s, s->p->bio_split_hook); diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index 86f9c83eb30c..bf3a206abd78 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -765,6 +765,12 @@ static void writethrough_endio(struct bio *bio, int err) dm_unhook_bio(&pb->hook_info, bio); + /* + * Must bump bi_remaining to allow bio to complete with + * restored bi_end_io. + */ + atomic_inc(&bio->bi_remaining); + if (err) { bio_endio(bio, err); return; diff --git a/drivers/md/dm-snap.c b/drivers/md/dm-snap.c index 3ded8c729dfb..80b5cabbea29 100644 --- a/drivers/md/dm-snap.c +++ b/drivers/md/dm-snap.c @@ -1415,6 +1415,7 @@ out: if (full_bio) { full_bio->bi_end_io = pe->full_bio_end_io; full_bio->bi_private = pe->full_bio_private; + atomic_inc(&full_bio->bi_remaining); } free_pending_exception(pe); diff --git a/drivers/md/dm-thin.c b/drivers/md/dm-thin.c index a65402480c8c..1abb4a24c338 100644 --- a/drivers/md/dm-thin.c +++ b/drivers/md/dm-thin.c @@ -611,8 +611,10 @@ static void cell_defer_no_holder(struct thin_c *tc, struct dm_bio_prison_cell *c static void process_prepared_mapping_fail(struct dm_thin_new_mapping *m) { - if (m->bio) + if (m->bio) { m->bio->bi_end_io = m->saved_bi_end_io; + atomic_inc(&m->bio->bi_remaining); + } cell_error(m->tc->pool, m->cell); list_del(&m->list); mempool_free(m, m->tc->pool->mapping_pool); @@ -626,8 +628,10 @@ static void process_prepared_mapping(struct dm_thin_new_mapping *m) int r; bio = m->bio; - if (bio) + if (bio) { bio->bi_end_io = m->saved_bi_end_io; + atomic_inc(&bio->bi_remaining); + } if (m->err) { cell_error(pool, m->cell); diff --git a/drivers/md/dm-verity.c b/drivers/md/dm-verity.c index ac35e959d49b..796007a5e0e1 100644 --- a/drivers/md/dm-verity.c +++ b/drivers/md/dm-verity.c @@ -385,7 +385,7 @@ static void verity_finish_io(struct dm_verity_io *io, int error) bio->bi_end_io = io->orig_bi_end_io; bio->bi_private = io->orig_bi_private; - bio_endio(bio, error); + bio_endio_nodec(bio, error); } static void verity_work(struct work_struct *w) diff --git a/fs/bio-integrity.c b/fs/bio-integrity.c index fed744b8c9e5..9d547d2e357c 100644 --- a/fs/bio-integrity.c +++ b/fs/bio-integrity.c @@ -502,7 +502,7 @@ static void bio_integrity_verify_fn(struct work_struct *work) /* Restore original bio completion handler */ bio->bi_end_io = bip->bip_end_io; - bio_endio(bio, error); + bio_endio_nodec(bio, error); } /** diff --git a/fs/bio.c b/fs/bio.c index e6dfa06773ac..b0a16dbc71ef 100644 --- a/fs/bio.c +++ b/fs/bio.c @@ -273,6 +273,7 @@ void bio_init(struct bio *bio) { memset(bio, 0, sizeof(*bio)); bio->bi_flags = 1 << BIO_UPTODATE; + atomic_set(&bio->bi_remaining, 1); atomic_set(&bio->bi_cnt, 1); } EXPORT_SYMBOL(bio_init); @@ -295,9 +296,35 @@ void bio_reset(struct bio *bio) memset(bio, 0, BIO_RESET_BYTES); bio->bi_flags = flags|(1 << BIO_UPTODATE); + atomic_set(&bio->bi_remaining, 1); } EXPORT_SYMBOL(bio_reset); +static void bio_chain_endio(struct bio *bio, int error) +{ + bio_endio(bio->bi_private, error); + bio_put(bio); +} + +/** + * bio_chain - chain bio completions + * + * The caller won't have a bi_end_io called when @bio completes - instead, + * @parent's bi_end_io won't be called until both @parent and @bio have + * completed; the chained bio will also be freed when it completes. + * + * The caller must not set bi_private or bi_end_io in @bio. + */ +void bio_chain(struct bio *bio, struct bio *parent) +{ + BUG_ON(bio->bi_private || bio->bi_end_io); + + bio->bi_private = parent; + bio->bi_end_io = bio_chain_endio; + atomic_inc(&parent->bi_remaining); +} +EXPORT_SYMBOL(bio_chain); + static void bio_alloc_rescue(struct work_struct *work) { struct bio_set *bs = container_of(work, struct bio_set, rescue_work); @@ -1719,16 +1746,53 @@ EXPORT_SYMBOL(bio_flush_dcache_pages); **/ void bio_endio(struct bio *bio, int error) { - if (error) - clear_bit(BIO_UPTODATE, &bio->bi_flags); - else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) - error = -EIO; + while (bio) { + BUG_ON(atomic_read(&bio->bi_remaining) <= 0); + + if (error) + clear_bit(BIO_UPTODATE, &bio->bi_flags); + else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) + error = -EIO; + + if (!atomic_dec_and_test(&bio->bi_remaining)) + return; - if (bio->bi_end_io) - bio->bi_end_io(bio, error); + /* + * Need to have a real endio function for chained bios, + * otherwise various corner cases will break (like stacking + * block devices that save/restore bi_end_io) - however, we want + * to avoid unbounded recursion and blowing the stack. Tail call + * optimization would handle this, but compiling with frame + * pointers also disables gcc's sibling call optimization. + */ + if (bio->bi_end_io == bio_chain_endio) { + struct bio *parent = bio->bi_private; + bio_put(bio); + bio = parent; + } else { + if (bio->bi_end_io) + bio->bi_end_io(bio, error); + bio = NULL; + } + } } EXPORT_SYMBOL(bio_endio); +/** + * bio_endio_nodec - end I/O on a bio, without decrementing bi_remaining + * @bio: bio + * @error: error, if any + * + * For code that has saved and restored bi_end_io; thing hard before using this + * function, probably you should've cloned the entire bio. + **/ +void bio_endio_nodec(struct bio *bio, int error) +{ + atomic_inc(&bio->bi_remaining); + bio_endio(bio, error); +} +EXPORT_SYMBOL(bio_endio_nodec); + void bio_pair_release(struct bio_pair *bp) { if (atomic_dec_and_test(&bp->cnt)) { diff --git a/include/linux/bio.h b/include/linux/bio.h index 0c32a45a419c..64f5169c224b 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -356,6 +356,7 @@ static inline struct bio *bio_clone_kmalloc(struct bio *bio, gfp_t gfp_mask) } extern void bio_endio(struct bio *, int); +extern void bio_endio_nodec(struct bio *, int); struct request_queue; extern int bio_phys_segments(struct request_queue *, struct bio *); @@ -364,6 +365,7 @@ extern void bio_advance(struct bio *, unsigned); extern void bio_init(struct bio *); extern void bio_reset(struct bio *); +void bio_chain(struct bio *, struct bio *); extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int); extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *, diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index d369f8f6af79..bbc3a6c88fce 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -65,6 +65,8 @@ struct bio { unsigned int bi_seg_front_size; unsigned int bi_seg_back_size; + atomic_t bi_remaining; + bio_end_io_t *bi_end_io; void *bi_private; -- cgit v1.2.3 From 8d30726912cb39c3a3ebde06214d54861f8fdde2 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 3 Dec 2013 19:16:04 -0700 Subject: dm cache: increment bi_remaining when bi_end_io is restored Move the bio->bi_remaining increment into dm_unhook_bio() so the overwrite_endio() handler works as expected. Signed-off-by: Mike Snitzer Signed-off-by: Jens Axboe --- drivers/md/dm-cache-target.c | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'drivers/md/dm-cache-target.c') diff --git a/drivers/md/dm-cache-target.c b/drivers/md/dm-cache-target.c index bf3a206abd78..7c8dd1f69ce0 100644 --- a/drivers/md/dm-cache-target.c +++ b/drivers/md/dm-cache-target.c @@ -85,6 +85,12 @@ static void dm_unhook_bio(struct dm_hook_info *h, struct bio *bio) { bio->bi_end_io = h->bi_end_io; bio->bi_private = h->bi_private; + + /* + * Must bump bi_remaining to allow bio to complete with + * restored bi_end_io. + */ + atomic_inc(&bio->bi_remaining); } /*----------------------------------------------------------------*/ @@ -765,12 +771,6 @@ static void writethrough_endio(struct bio *bio, int err) dm_unhook_bio(&pb->hook_info, bio); - /* - * Must bump bi_remaining to allow bio to complete with - * restored bi_end_io. - */ - atomic_inc(&bio->bi_remaining); - if (err) { bio_endio(bio, err); return; -- cgit v1.2.3