From 4b7381326424809577eb43bf635ea1d43a095d89 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 28 May 2026 18:31:15 +0100 Subject: buffer: Add bh_submit() bh_submit() takes a bio_end_io allowing users to avoid the indirect function call through bh->b_end_io, and eventually allowing us to remove bh->b_end_io. Signed-off-by: Matthew Wilcox (Oracle) Link: https://patch.msgid.link/20260528173150.1093780-3-willy@infradead.org Reviewed-by: Jan Kara Signed-off-by: Christian Brauner (Amutable) --- include/linux/buffer_head.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index e4939e33b4b5..d59980e4adda 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -240,6 +240,7 @@ int sync_dirty_buffer(struct buffer_head *bh); int __sync_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags); void write_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags); void submit_bh(blk_opf_t, struct buffer_head *); +void bh_submit(struct buffer_head *, blk_opf_t, bio_end_io_t); void write_boundary_block(struct block_device *bdev, sector_t bblock, unsigned blocksize); int bh_uptodate_or_lock(struct buffer_head *bh); -- cgit v1.2.3 From 86ecf5704ed3c731284b9c60e46dcc5d115ebe7c Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 28 May 2026 18:31:17 +0100 Subject: buffer: Add bh_end_read(), bh_end_write() and bh_end_async_write() These are the bio_end_io_t versions of end_buffer_read_sync(), end_buffer_write_sync() and end_buffer_async_write(). They do not contain a put_bh() call as it is no longer necessary. Also add the helper function bio_endio_bh(). Signed-off-by: Matthew Wilcox (Oracle) Link: https://patch.msgid.link/20260528173150.1093780-5-willy@infradead.org Reviewed-by: Jan Kara Signed-off-by: Christian Brauner (Amutable) --- fs/buffer.c | 89 +++++++++++++++++++++++++++++++++++++++++---- include/linux/buffer_head.h | 6 +++ 2 files changed, 88 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/fs/buffer.c b/fs/buffer.c index 4dcce64ef006..e22a94afa385 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -129,6 +129,34 @@ static void buffer_io_error(struct buffer_head *bh, char *msg) bh->b_bdev, (unsigned long long)bh->b_blocknr, msg); } +/** + * bio_endio_bh - Discard the bio used to submit a buffer. + * @bio: The bio. + * @bhp: Where to return the buffer_head. + * + * Call this in your bio_end_io handler to retrieve the buffer_head + * submitted in bh_submit(). If you did not call bh_submit(), do not + * call this function; it will return garbage. + * + * This function consumes the bio refcount which will probably free the + * bio. + * + * Return: True if the I/O succeeded. + */ +bool bio_endio_bh(struct bio *bio, struct buffer_head **bhp) +{ + bool success = bio->bi_status == BLK_STS_OK; + struct buffer_head *bh = bio->bi_private; + + if (unlikely(bio_flagged(bio, BIO_QUIET))) + set_bit(BH_Quiet, &bh->b_state); + bio_put(bio); + + *bhp = bh; + return success; +} +EXPORT_SYMBOL(bio_endio_bh); + /* * End-of-IO handler helper function which does not touch the bh after * unlocking it. @@ -159,7 +187,22 @@ void end_buffer_read_sync(struct buffer_head *bh, int uptodate) } EXPORT_SYMBOL(end_buffer_read_sync); -void end_buffer_write_sync(struct buffer_head *bh, int uptodate) +/** + * bh_end_read - I/O end handler for reads + * @bio: The bio being completed. + * + * Pass this function to bh_submit() if you're reading into the buffer, + * unless you need your own special I/O end handler. + */ +void bh_end_read(struct bio *bio) +{ + struct buffer_head *bh; + bool uptodate = bio_endio_bh(bio, &bh); + __end_buffer_read_notouch(bh, uptodate); +} +EXPORT_SYMBOL(bh_end_read); + +static void __end_buffer_write_sync(struct buffer_head *bh, int uptodate) { if (uptodate) { set_buffer_uptodate(bh); @@ -169,10 +212,30 @@ void end_buffer_write_sync(struct buffer_head *bh, int uptodate) clear_buffer_uptodate(bh); } unlock_buffer(bh); +} + +void end_buffer_write_sync(struct buffer_head *bh, int uptodate) +{ + __end_buffer_write_sync(bh, uptodate); put_bh(bh); } EXPORT_SYMBOL(end_buffer_write_sync); +/** + * bh_end_write - I/O end handler for writes + * @bio: The bio being completed. + * + * Pass this function to bh_submit() if you're writing from the buffer, + * unless you need your own special I/O end handler. + */ +void bh_end_write(struct bio *bio) +{ + struct buffer_head *bh; + bool success = bio_endio_bh(bio, &bh); + __end_buffer_write_sync(bh, success); +} +EXPORT_SYMBOL(bh_end_write); + static struct buffer_head * __find_get_block_slow(struct block_device *bdev, sector_t block, bool atomic) { @@ -416,6 +479,21 @@ still_busy: spin_unlock_irqrestore(&first->b_uptodate_lock, flags); } +/** + * bh_end_async_write - I/O end handler for async folio writes + * @bio: The bio being completed. + * + * Pass this function to bh_submit() if you're doing the equivalent of + * block_write_full_folio(). + */ +void bh_end_async_write(struct bio *bio) +{ + struct buffer_head *bh; + bool success = bio_endio_bh(bio, &bh); + end_buffer_async_write(bh, success); +} +EXPORT_SYMBOL(bh_end_async_write); + /* * If a page's buffers are under async readin (end_buffer_async_read * completion) then there is a possibility that another thread of @@ -1150,13 +1228,10 @@ EXPORT_SYMBOL(__bforget); static void end_bio_bh_io_sync(struct bio *bio) { - struct buffer_head *bh = bio->bi_private; - - if (unlikely(bio_flagged(bio, BIO_QUIET))) - set_bit(BH_Quiet, &bh->b_state); + struct buffer_head *bh; + bool uptodate = bio_endio_bh(bio, &bh); - bh->b_end_io(bh, !bio->bi_status); - bio_put(bio); + bh->b_end_io(bh, uptodate); } static void buffer_set_crypto_ctx(struct bio *bio, const struct buffer_head *bh, diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index d59980e4adda..b0a31a90fa79 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -204,6 +204,12 @@ struct buffer_head *create_empty_buffers(struct folio *folio, unsigned long blocksize, unsigned long b_state); void end_buffer_read_sync(struct buffer_head *bh, int uptodate); void end_buffer_write_sync(struct buffer_head *bh, int uptodate); +bool bio_endio_bh(struct bio *bio, struct buffer_head **bhp); + +/* Completion routines suitable for passing to bh_submit() */ +void bh_end_read(struct bio *bio); +void bh_end_write(struct bio *bio); +void bh_end_async_write(struct bio *bio); /* Things to do with metadata buffers list */ void mmb_mark_buffer_dirty(struct buffer_head *bh, struct mapping_metadata_bhs *mmb); -- cgit v1.2.3 From e6eff926482d0f6ebe038a15348158682f5711a0 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 28 May 2026 18:31:39 +0100 Subject: buffer: Remove mark_buffer_async_write() There are no more callers of this function, so delete it. end_buffer_async_write() then has only one caller left, so inline it into bh_end_async_write(). Signed-off-by: Matthew Wilcox (Oracle) Link: https://patch.msgid.link/20260528173150.1093780-27-willy@infradead.org Reviewed-by: Jan Kara Signed-off-by: Christian Brauner (Amutable) --- fs/buffer.c | 38 ++++++++++++-------------------------- include/linux/buffer_head.h | 1 - 2 files changed, 12 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/fs/buffer.c b/fs/buffer.c index 50bcef50a4a4..d35f584a06f9 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -438,12 +438,19 @@ static void bh_end_async_read(struct bio *bio) end_buffer_async_read(bh, uptodate); } -/* - * Completion handler for block_write_full_folio() - folios which are unlocked - * during I/O, and which have the writeback flag cleared upon I/O completion. +/** + * bh_end_async_write - I/O end handler for async folio writes + * @bio: The bio being completed. + * + * Pass this function to bh_submit() if you're doing the equivalent of + * block_write_full_folio(). That is, the folio is unlocked, and will + * have its writeback flag cleared once all async write buffers have + * completed. */ -static void end_buffer_async_write(struct buffer_head *bh, int uptodate) +void bh_end_async_write(struct bio *bio) { + struct buffer_head *bh; + bool success = bio_endio_bh(bio, &bh); unsigned long flags; struct buffer_head *first; struct buffer_head *tmp; @@ -452,7 +459,7 @@ static void end_buffer_async_write(struct buffer_head *bh, int uptodate) BUG_ON(!buffer_async_write(bh)); folio = bh->b_folio; - if (uptodate) { + if (success) { set_buffer_uptodate(bh); } else { buffer_io_error(bh, ", lost async page write"); @@ -480,29 +487,8 @@ static void end_buffer_async_write(struct buffer_head *bh, int uptodate) still_busy: spin_unlock_irqrestore(&first->b_uptodate_lock, flags); } - -/** - * bh_end_async_write - I/O end handler for async folio writes - * @bio: The bio being completed. - * - * Pass this function to bh_submit() if you're doing the equivalent of - * block_write_full_folio(). - */ -void bh_end_async_write(struct bio *bio) -{ - struct buffer_head *bh; - bool success = bio_endio_bh(bio, &bh); - end_buffer_async_write(bh, success); -} EXPORT_SYMBOL(bh_end_async_write); -void mark_buffer_async_write(struct buffer_head *bh) -{ - bh->b_end_io = end_buffer_async_write; - set_buffer_async_write(bh); -} -EXPORT_SYMBOL(mark_buffer_async_write); - /* * fs/buffer.c contains helper functions for buffer-backed address space's diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index b0a31a90fa79..f0a9f3f05ac1 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -224,7 +224,6 @@ static inline void clean_bdev_bh_alias(struct buffer_head *bh) clean_bdev_aliases(bh->b_bdev, bh->b_blocknr, 1); } -void mark_buffer_async_write(struct buffer_head *bh); void __wait_on_buffer(struct buffer_head *); wait_queue_head_t *bh_waitq_head(struct buffer_head *bh); struct buffer_head *__find_get_block(struct block_device *bdev, sector_t block, -- cgit v1.2.3 From 2911b935825523a71a71eecfe908902d181c041e Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 28 May 2026 18:31:44 +0100 Subject: buffer: Remove submit_bh() No users are left; remove this API. Also remove/fix comments mentioning it, and end_bio_bh_io_sync() as it's now unused. Signed-off-by: Matthew Wilcox (Oracle) Link: https://patch.msgid.link/20260528173150.1093780-32-willy@infradead.org Reviewed-by: Jan Kara Signed-off-by: Christian Brauner (Amutable) --- Documentation/trace/ftrace.rst | 4 ++-- fs/buffer.c | 27 ++++++--------------------- include/linux/buffer_head.h | 3 +-- mm/vmscan.c | 2 +- 4 files changed, 10 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/Documentation/trace/ftrace.rst b/Documentation/trace/ftrace.rst index b9efb148a5c2..2ed1b96e440b 100644 --- a/Documentation/trace/ftrace.rst +++ b/Documentation/trace/ftrace.rst @@ -1624,7 +1624,7 @@ function-trace, we get a much larger output:: => blk_queue_bio => submit_bio_noacct => submit_bio - => submit_bh + => bh_submit => __ext3_get_inode_loc => ext3_iget => ext3_lookup @@ -1909,7 +1909,7 @@ tracers. => blk_queue_bio => submit_bio_noacct => submit_bio - => submit_bh + => bh_submit => ext3_bread => ext3_dir_bread => htree_dirblock_to_tree diff --git a/fs/buffer.c b/fs/buffer.c index d35f584a06f9..3df0ea1a6342 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1187,14 +1187,6 @@ void __bforget(struct buffer_head *bh) } EXPORT_SYMBOL(__bforget); -static void end_bio_bh_io_sync(struct bio *bio) -{ - struct buffer_head *bh; - bool uptodate = bio_endio_bh(bio, &bh); - - bh->b_end_io(bh, uptodate); -} - static void buffer_set_crypto_ctx(struct bio *bio, const struct buffer_head *bh, gfp_t gfp_mask) { @@ -1829,15 +1821,15 @@ static struct buffer_head *folio_create_buffers(struct folio *folio, /* * While block_write_full_folio is writing back the dirty buffers under - * the page lock, whoever dirtied the buffers may decide to clean them + * the folio lock, whoever dirtied the buffers may decide to clean them * again at any time. We handle that by only looking at the buffer * state inside lock_buffer(). * * If block_write_full_folio() is called for regular writeback - * (wbc->sync_mode == WB_SYNC_NONE) then it will redirty a page which has a - * locked buffer. This only can happen if someone has written the buffer - * directly, with submit_bh(). At the address_space level PageWriteback - * prevents this contention from occurring. + * (wbc->sync_mode == WB_SYNC_NONE) then it will redirty a folio which + * has a locked buffer. This only can happen if someone has written + * the buffer directly, with bh_submit(). At the address_space level + * the folio writeback flag prevents this contention from occurring. * * If block_write_full_folio() is called with wbc->sync_mode == * WB_SYNC_ALL, the writes are posted using REQ_SYNC; this @@ -1954,7 +1946,7 @@ done: /* * The folio was marked dirty, but the buffers were * clean. Someone wrote them back by hand with - * write_dirty_buffer/submit_bh. A rare case. + * write_dirty_buffer/bh_submit. A rare case. */ folio_end_writeback(folio); @@ -2800,13 +2792,6 @@ sector_t generic_block_bmap(struct address_space *mapping, sector_t block, } EXPORT_SYMBOL(generic_block_bmap); -void submit_bh(blk_opf_t opf, struct buffer_head *bh) -{ - BUG_ON(!bh->b_end_io); - __bh_submit(bh, opf, WRITE_LIFE_NOT_SET, NULL, end_bio_bh_io_sync); -} -EXPORT_SYMBOL(submit_bh); - void write_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags) { lock_buffer(bh); diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index f0a9f3f05ac1..7629130d42c4 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -55,7 +55,7 @@ typedef void (bh_end_io_t)(struct buffer_head *bh, int uptodate); * is the bio, and buffer_heads are used for extracting block * mappings (via a get_block_t call), for tracking state within * a folio (via a folio_mapping) and for wrapping bio submission - * for backward compatibility reasons (e.g. submit_bh). + * for backward compatibility reasons (e.g. bh_submit). */ struct buffer_head { unsigned long b_state; /* buffer state bitmap (see above) */ @@ -244,7 +244,6 @@ void __lock_buffer(struct buffer_head *bh); int sync_dirty_buffer(struct buffer_head *bh); int __sync_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags); void write_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags); -void submit_bh(blk_opf_t, struct buffer_head *); void bh_submit(struct buffer_head *, blk_opf_t, bio_end_io_t); void write_boundary_block(struct block_device *bdev, sector_t bblock, unsigned blocksize); diff --git a/mm/vmscan.c b/mm/vmscan.c index bd1b1aa12581..67231d3189ef 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1449,7 +1449,7 @@ retry: * is possible for a folio to have the dirty flag set, * but it is actually clean (all its buffers are clean). * This happens if the buffers were written out directly, - * with submit_bh(). ext3 will do this, as well as + * with bh_submit(). ext3 will do this, as well as * the blockdev mapping. filemap_release_folio() will * discover that cleanness and will drop the buffers * and mark the folio clean - it can be freed. -- cgit v1.2.3 From ac75b922bb67cc8edb52006c9346dc0ca91d04c8 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 28 May 2026 18:31:45 +0100 Subject: buffer: Remove b_end_io This shrinks buffer_head by 8 bytes, letting us pack more buffer heads per slab. With a Debian config, it shrinks from 104 bytes to 96 bytes which is 42 objects per 4KiB page rather than 39, a 7% reduction in the amount of memory used. Signed-off-by: Matthew Wilcox (Oracle) Link: https://patch.msgid.link/20260528173150.1093780-33-willy@infradead.org Reviewed-by: Jan Kara Signed-off-by: Christian Brauner (Amutable) --- Documentation/filesystems/locking.rst | 14 -------------- drivers/md/raid5.h | 6 +++--- fs/buffer.c | 1 - include/linux/buffer_head.h | 4 +--- 4 files changed, 4 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst index 8421ea21bd35..a27aca42fd85 100644 --- a/Documentation/filesystems/locking.rst +++ b/Documentation/filesystems/locking.rst @@ -416,20 +416,6 @@ lm_open_conflict yes no no lm_breaker_timedout yes no no ====================== ============= ================= ========= -buffer_head -=========== - -prototypes:: - - void (*b_end_io)(struct buffer_head *bh, int uptodate); - -locking rules: - -called from interrupts. In other words, extreme care is needed here. -bh is locked, but that's all warranties we have here. Currently only RAID1, -highmem, fs/buffer.c, and fs/ntfs/aops.c are providing these. Block devices -call this method upon the IO completion. - block_device_operations ======================= prototypes:: diff --git a/drivers/md/raid5.h b/drivers/md/raid5.h index 1c7b710fc9c1..1dfa60a41d91 100644 --- a/drivers/md/raid5.h +++ b/drivers/md/raid5.h @@ -38,7 +38,7 @@ * Clean -> Dirty - on compute_parity to satisfy write/sync (RECONSTRUCT or RMW) * * The Want->Empty, Want->Clean, Dirty->Clean, transitions - * all happen in b_end_io at interrupt time. + * all happen in end_io at interrupt time. * Each sets the Uptodate bit before releasing the Lock bit. * This leaves one multi-stage transition: * Want->Dirty->Clean @@ -64,7 +64,7 @@ * together, but we are not guaranteed of that so we allow for more. * * If a buffer is on the read list when the associated cache buffer is - * Uptodate, the data is copied into the read buffer and it's b_end_io + * Uptodate, the data is copied into the read buffer and it's end_io * routine is called. This may happen in the end_request routine only * if the buffer has just successfully been read. end_request should * remove the buffers from the list and then set the Uptodate bit on @@ -76,7 +76,7 @@ * into the cache buffer, which is then marked dirty, and moved onto a * third list, the written list (bh_written). Once both the parity * block and the cached buffer are successfully written, any buffer on - * a written list can be returned with b_end_io. + * a written list can be returned with end_io. * * The write list and read list both act as fifos. The read list, * write list and written list are protected by the device_lock. diff --git a/fs/buffer.c b/fs/buffer.c index 3df0ea1a6342..ccda92920175 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -946,7 +946,6 @@ static sector_t folio_init_buffers(struct folio *folio, do { if (!buffer_mapped(bh)) { - bh->b_end_io = NULL; bh->b_private = NULL; bh->b_bdev = bdev; bh->b_blocknr = block; diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 7629130d42c4..1ee56c9f2327 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -46,7 +46,6 @@ enum bh_state_bits { struct page; struct buffer_head; struct address_space; -typedef void (bh_end_io_t)(struct buffer_head *bh, int uptodate); /* * Historically, a buffer_head was used to map a single block @@ -70,8 +69,7 @@ struct buffer_head { char *b_data; /* pointer to data within the page */ struct block_device *b_bdev; - bh_end_io_t *b_end_io; /* I/O completion */ - void *b_private; /* reserved for b_end_io */ + void *b_private; /* reserved for bio_end_io */ struct list_head b_assoc_buffers; /* associated with another mapping */ struct mapping_metadata_bhs *b_mmb; /* head of the list of metadata bhs * this buffer is associated with */ -- cgit v1.2.3 From b20f15420f786a029be09b1d9b81695581d1a122 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 28 May 2026 18:31:47 +0100 Subject: buffer: Remove end_buffer_write_sync() It has no callers left, so delete it. Inline __end_buffer_write_sync() into bh_end_write(). Signed-off-by: Matthew Wilcox (Oracle) Link: https://patch.msgid.link/20260528173150.1093780-35-willy@infradead.org Reviewed-by: Jan Kara Signed-off-by: Christian Brauner (Amutable) --- fs/buffer.c | 29 +++++++++-------------------- include/linux/buffer_head.h | 1 - 2 files changed, 9 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/fs/buffer.c b/fs/buffer.c index db604c0e54d4..7ed8dd77d221 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -192,25 +192,6 @@ void bh_end_read(struct bio *bio) } EXPORT_SYMBOL(bh_end_read); -static void __end_buffer_write_sync(struct buffer_head *bh, int uptodate) -{ - if (uptodate) { - set_buffer_uptodate(bh); - } else { - buffer_io_error(bh, ", lost sync page write"); - mark_buffer_write_io_error(bh); - clear_buffer_uptodate(bh); - } - unlock_buffer(bh); -} - -void end_buffer_write_sync(struct buffer_head *bh, int uptodate) -{ - __end_buffer_write_sync(bh, uptodate); - put_bh(bh); -} -EXPORT_SYMBOL(end_buffer_write_sync); - /** * bh_end_write - I/O end handler for writes * @bio: The bio being completed. @@ -222,7 +203,15 @@ void bh_end_write(struct bio *bio) { struct buffer_head *bh; bool success = bio_endio_bh(bio, &bh); - __end_buffer_write_sync(bh, success); + + if (success) { + set_buffer_uptodate(bh); + } else { + buffer_io_error(bh, ", lost sync page write"); + mark_buffer_write_io_error(bh); + clear_buffer_uptodate(bh); + } + unlock_buffer(bh); } EXPORT_SYMBOL(bh_end_write); diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 1ee56c9f2327..8b23bc9a244c 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -201,7 +201,6 @@ struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size); struct buffer_head *create_empty_buffers(struct folio *folio, unsigned long blocksize, unsigned long b_state); void end_buffer_read_sync(struct buffer_head *bh, int uptodate); -void end_buffer_write_sync(struct buffer_head *bh, int uptodate); bool bio_endio_bh(struct bio *bio, struct buffer_head **bhp); /* Completion routines suitable for passing to bh_submit() */ -- cgit v1.2.3