From 3ae7286943ae6f6bfecfe0a3da9d1a4c64f5531f Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:07:12 -0700 Subject: fs/buffer: Use the new blk_opf_t type Improve static type checking by using the new blk_opf_t type for block layer request flags. Change WRITE into REQ_OP_WRITE. This patch does not change any functionality since REQ_OP_WRITE == WRITE == 1. Reviewed-by: Jan Kara Cc: Al Viro Cc: Christoph Hellwig Cc: Matthew Wilcox Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-47-bvanassche@acm.org Signed-off-by: Jens Axboe --- include/linux/buffer_head.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux/buffer_head.h') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index c9d1463bb20f..9795df9400bd 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -9,6 +9,7 @@ #define _LINUX_BUFFER_HEAD_H #include +#include #include #include #include @@ -201,11 +202,11 @@ struct buffer_head *alloc_buffer_head(gfp_t gfp_flags); void free_buffer_head(struct buffer_head * bh); void unlock_buffer(struct buffer_head *bh); void __lock_buffer(struct buffer_head *bh); -void ll_rw_block(int, int, int, struct buffer_head * bh[]); +void ll_rw_block(enum req_op, blk_opf_t, int, struct buffer_head * bh[]); int sync_dirty_buffer(struct buffer_head *bh); -int __sync_dirty_buffer(struct buffer_head *bh, int op_flags); -void write_dirty_buffer(struct buffer_head *bh, int op_flags); -int submit_bh(int, int, struct buffer_head *); +int __sync_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags); +void write_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags); +int submit_bh(enum req_op, blk_opf_t, struct buffer_head *); void write_boundary_block(struct block_device *bdev, sector_t bblock, unsigned blocksize); int bh_uptodate_or_lock(struct buffer_head *bh); -- cgit v1.2.3 From 1420c4a549bf28ffddbed827d61fb3d4d2132ddb Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 14 Jul 2022 11:07:13 -0700 Subject: fs/buffer: Combine two submit_bh() and ll_rw_block() arguments Both submit_bh() and ll_rw_block() accept a request operation type and request flags as their first two arguments. Micro-optimize these two functions by combining these first two arguments into a single argument. This patch does not change the behavior of any of the modified code. Cc: Alexander Viro Cc: Jan Kara Acked-by: Song Liu (for the md changes) Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20220714180729.1065367-48-bvanassche@acm.org Signed-off-by: Jens Axboe --- drivers/md/md-bitmap.c | 4 ++-- fs/buffer.c | 53 +++++++++++++++++++++++---------------------- fs/ext4/fast_commit.c | 2 +- fs/ext4/mmp.c | 2 +- fs/ext4/super.c | 6 ++--- fs/gfs2/bmap.c | 5 ++--- fs/gfs2/dir.c | 5 ++--- fs/gfs2/meta_io.c | 9 ++++---- fs/gfs2/quota.c | 2 +- fs/isofs/compress.c | 2 +- fs/jbd2/commit.c | 8 +++---- fs/jbd2/journal.c | 4 ++-- fs/jbd2/recovery.c | 4 ++-- fs/nilfs2/btnode.c | 2 +- fs/nilfs2/gcinode.c | 2 +- fs/nilfs2/mdt.c | 2 +- fs/ntfs/aops.c | 6 ++--- fs/ntfs/compress.c | 2 +- fs/ntfs/file.c | 2 +- fs/ntfs/logfile.c | 2 +- fs/ntfs/mft.c | 4 ++-- fs/ntfs3/file.c | 2 +- fs/ntfs3/inode.c | 2 +- fs/ocfs2/aops.c | 2 +- fs/ocfs2/buffer_head_io.c | 8 +++---- fs/ocfs2/super.c | 2 +- fs/reiserfs/inode.c | 4 ++-- fs/reiserfs/journal.c | 12 +++++----- fs/reiserfs/stree.c | 4 ++-- fs/reiserfs/super.c | 2 +- fs/udf/dir.c | 2 +- fs/udf/directory.c | 2 +- fs/udf/inode.c | 2 +- fs/ufs/balloc.c | 2 +- include/linux/buffer_head.h | 4 ++-- 35 files changed, 88 insertions(+), 90 deletions(-) (limited to 'include/linux/buffer_head.h') diff --git a/drivers/md/md-bitmap.c b/drivers/md/md-bitmap.c index 0a21b8317103..bf6dffadbe6f 100644 --- a/drivers/md/md-bitmap.c +++ b/drivers/md/md-bitmap.c @@ -302,7 +302,7 @@ static void write_page(struct bitmap *bitmap, struct page *page, int wait) atomic_inc(&bitmap->pending_writes); set_buffer_locked(bh); set_buffer_mapped(bh); - submit_bh(REQ_OP_WRITE, REQ_SYNC, bh); + submit_bh(REQ_OP_WRITE | REQ_SYNC, bh); bh = bh->b_this_page; } @@ -394,7 +394,7 @@ static int read_page(struct file *file, unsigned long index, atomic_inc(&bitmap->pending_writes); set_buffer_locked(bh); set_buffer_mapped(bh); - submit_bh(REQ_OP_READ, 0, bh); + submit_bh(REQ_OP_READ, bh); } blk_cur++; bh = bh->b_this_page; diff --git a/fs/buffer.c b/fs/buffer.c index 4a00b61f35ec..af53569930bb 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -52,8 +52,8 @@ #include "internal.h" static int fsync_buffers_list(spinlock_t *lock, struct list_head *list); -static int submit_bh_wbc(enum req_op op, blk_opf_t op_flags, - struct buffer_head *bh, struct writeback_control *wbc); +static int submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh, + struct writeback_control *wbc); #define BH_ENTRY(list) list_entry((list), struct buffer_head, b_assoc_buffers) @@ -562,7 +562,7 @@ void write_boundary_block(struct block_device *bdev, struct buffer_head *bh = __find_get_block(bdev, bblock + 1, blocksize); if (bh) { if (buffer_dirty(bh)) - ll_rw_block(REQ_OP_WRITE, 0, 1, &bh); + ll_rw_block(REQ_OP_WRITE, 1, &bh); put_bh(bh); } } @@ -1174,7 +1174,7 @@ static struct buffer_head *__bread_slow(struct buffer_head *bh) } else { get_bh(bh); bh->b_end_io = end_buffer_read_sync; - submit_bh(REQ_OP_READ, 0, bh); + submit_bh(REQ_OP_READ, bh); wait_on_buffer(bh); if (buffer_uptodate(bh)) return bh; @@ -1342,7 +1342,7 @@ void __breadahead(struct block_device *bdev, sector_t block, unsigned size) { struct buffer_head *bh = __getblk(bdev, block, size); if (likely(bh)) { - ll_rw_block(REQ_OP_READ, REQ_RAHEAD, 1, &bh); + ll_rw_block(REQ_OP_READ | REQ_RAHEAD, 1, &bh); brelse(bh); } } @@ -1353,7 +1353,7 @@ void __breadahead_gfp(struct block_device *bdev, sector_t block, unsigned size, { struct buffer_head *bh = __getblk_gfp(bdev, block, size, gfp); if (likely(bh)) { - ll_rw_block(REQ_OP_READ, REQ_RAHEAD, 1, &bh); + ll_rw_block(REQ_OP_READ | REQ_RAHEAD, 1, &bh); brelse(bh); } } @@ -1804,7 +1804,7 @@ int __block_write_full_page(struct inode *inode, struct page *page, do { struct buffer_head *next = bh->b_this_page; if (buffer_async_write(bh)) { - submit_bh_wbc(REQ_OP_WRITE, write_flags, bh, wbc); + submit_bh_wbc(REQ_OP_WRITE | write_flags, bh, wbc); nr_underway++; } bh = next; @@ -1858,7 +1858,7 @@ recover: struct buffer_head *next = bh->b_this_page; if (buffer_async_write(bh)) { clear_buffer_dirty(bh); - submit_bh_wbc(REQ_OP_WRITE, write_flags, bh, wbc); + submit_bh_wbc(REQ_OP_WRITE | write_flags, bh, wbc); nr_underway++; } bh = next; @@ -2033,7 +2033,7 @@ int __block_write_begin_int(struct folio *folio, loff_t pos, unsigned len, if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh) && (block_start < from || block_end > to)) { - ll_rw_block(REQ_OP_READ, 0, 1, &bh); + ll_rw_block(REQ_OP_READ, 1, &bh); *wait_bh++=bh; } } @@ -2334,7 +2334,7 @@ int block_read_full_folio(struct folio *folio, get_block_t *get_block) if (buffer_uptodate(bh)) end_buffer_async_read(bh, 1); else - submit_bh(REQ_OP_READ, 0, bh); + submit_bh(REQ_OP_READ, bh); } return 0; } @@ -2665,7 +2665,7 @@ int nobh_write_begin(struct address_space *mapping, loff_t pos, unsigned len, if (block_start < from || block_end > to) { lock_buffer(bh); bh->b_end_io = end_buffer_read_nobh; - submit_bh(REQ_OP_READ, 0, bh); + submit_bh(REQ_OP_READ, bh); nr_reads++; } } @@ -2915,7 +2915,7 @@ int block_truncate_page(struct address_space *mapping, if (!buffer_uptodate(bh) && !buffer_delay(bh) && !buffer_unwritten(bh)) { err = -EIO; - ll_rw_block(REQ_OP_READ, 0, 1, &bh); + ll_rw_block(REQ_OP_READ, 1, &bh); wait_on_buffer(bh); /* Uhhuh. Read error. Complain and punt. */ if (!buffer_uptodate(bh)) @@ -2994,9 +2994,10 @@ static void end_bio_bh_io_sync(struct bio *bio) bio_put(bio); } -static int submit_bh_wbc(enum req_op op, blk_opf_t op_flags, - struct buffer_head *bh, struct writeback_control *wbc) +static int submit_bh_wbc(blk_opf_t opf, struct buffer_head *bh, + struct writeback_control *wbc) { + const enum req_op op = opf & REQ_OP_MASK; struct bio *bio; BUG_ON(!buffer_locked(bh)); @@ -3012,11 +3013,11 @@ static int submit_bh_wbc(enum req_op op, blk_opf_t op_flags, clear_buffer_write_io_error(bh); if (buffer_meta(bh)) - op_flags |= REQ_META; + opf |= REQ_META; if (buffer_prio(bh)) - op_flags |= REQ_PRIO; + opf |= REQ_PRIO; - bio = bio_alloc(bh->b_bdev, 1, op | op_flags, GFP_NOIO); + bio = bio_alloc(bh->b_bdev, 1, opf, GFP_NOIO); fscrypt_set_bio_crypt_ctx_bh(bio, bh, GFP_NOIO); @@ -3040,9 +3041,9 @@ static int submit_bh_wbc(enum req_op op, blk_opf_t op_flags, return 0; } -int submit_bh(enum req_op op, blk_opf_t op_flags, struct buffer_head *bh) +int submit_bh(blk_opf_t opf, struct buffer_head *bh) { - return submit_bh_wbc(op, op_flags, bh, NULL); + return submit_bh_wbc(opf, bh, NULL); } EXPORT_SYMBOL(submit_bh); @@ -3072,9 +3073,9 @@ EXPORT_SYMBOL(submit_bh); * All of the buffers must be for the same device, and must also be a * multiple of the current approved size for the device. */ -void ll_rw_block(enum req_op op, blk_opf_t op_flags, int nr, - struct buffer_head *bhs[]) +void ll_rw_block(const blk_opf_t opf, int nr, struct buffer_head *bhs[]) { + const enum req_op op = opf & REQ_OP_MASK; int i; for (i = 0; i < nr; i++) { @@ -3086,14 +3087,14 @@ void ll_rw_block(enum req_op op, blk_opf_t op_flags, int nr, if (test_clear_buffer_dirty(bh)) { bh->b_end_io = end_buffer_write_sync; get_bh(bh); - submit_bh(op, op_flags, bh); + submit_bh(opf, bh); continue; } } else { if (!buffer_uptodate(bh)) { bh->b_end_io = end_buffer_read_sync; get_bh(bh); - submit_bh(op, op_flags, bh); + submit_bh(opf, bh); continue; } } @@ -3111,7 +3112,7 @@ void write_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags) } bh->b_end_io = end_buffer_write_sync; get_bh(bh); - submit_bh(REQ_OP_WRITE, op_flags, bh); + submit_bh(REQ_OP_WRITE | op_flags, bh); } EXPORT_SYMBOL(write_dirty_buffer); @@ -3138,7 +3139,7 @@ int __sync_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags) get_bh(bh); bh->b_end_io = end_buffer_write_sync; - ret = submit_bh(REQ_OP_WRITE, op_flags, bh); + ret = submit_bh(REQ_OP_WRITE | op_flags, bh); wait_on_buffer(bh); if (!ret && !buffer_uptodate(bh)) ret = -EIO; @@ -3366,7 +3367,7 @@ int bh_submit_read(struct buffer_head *bh) get_bh(bh); bh->b_end_io = end_buffer_read_sync; - submit_bh(REQ_OP_READ, 0, bh); + submit_bh(REQ_OP_READ, bh); wait_on_buffer(bh); if (buffer_uptodate(bh)) return 0; diff --git a/fs/ext4/fast_commit.c b/fs/ext4/fast_commit.c index 795a60ad1897..0df5482c6c1c 100644 --- a/fs/ext4/fast_commit.c +++ b/fs/ext4/fast_commit.c @@ -668,7 +668,7 @@ static void ext4_fc_submit_bh(struct super_block *sb, bool is_tail) set_buffer_dirty(bh); set_buffer_uptodate(bh); bh->b_end_io = ext4_end_buffer_io_sync; - submit_bh(REQ_OP_WRITE, write_flags, bh); + submit_bh(REQ_OP_WRITE | write_flags, bh); EXT4_SB(sb)->s_fc_bh = NULL; } diff --git a/fs/ext4/mmp.c b/fs/ext4/mmp.c index b221f313ded6..9af68a7ecdcf 100644 --- a/fs/ext4/mmp.c +++ b/fs/ext4/mmp.c @@ -52,7 +52,7 @@ static int write_mmp_block(struct super_block *sb, struct buffer_head *bh) lock_buffer(bh); bh->b_end_io = end_buffer_write_sync; get_bh(bh); - submit_bh(REQ_OP_WRITE, REQ_SYNC | REQ_META | REQ_PRIO, bh); + submit_bh(REQ_OP_WRITE | REQ_SYNC | REQ_META | REQ_PRIO, bh); wait_on_buffer(bh); sb_end_write(sb); if (unlikely(!buffer_uptodate(bh))) diff --git a/fs/ext4/super.c b/fs/ext4/super.c index 845f2f8aee5f..24922184b622 100644 --- a/fs/ext4/super.c +++ b/fs/ext4/super.c @@ -171,7 +171,7 @@ static inline void __ext4_read_bh(struct buffer_head *bh, int op_flags, bh->b_end_io = end_io ? end_io : end_buffer_read_sync; get_bh(bh); - submit_bh(REQ_OP_READ, op_flags, bh); + submit_bh(REQ_OP_READ | op_flags, bh); } void ext4_read_bh_nowait(struct buffer_head *bh, int op_flags, @@ -5939,8 +5939,8 @@ static int ext4_commit_super(struct super_block *sb) /* Clear potential dirty bit if it was journalled update */ clear_buffer_dirty(sbh); sbh->b_end_io = end_buffer_write_sync; - submit_bh(REQ_OP_WRITE, - REQ_SYNC | (test_opt(sb, BARRIER) ? REQ_FUA : 0), sbh); + submit_bh(REQ_OP_WRITE | REQ_SYNC | + (test_opt(sb, BARRIER) ? REQ_FUA : 0), sbh); wait_on_buffer(sbh); if (buffer_write_io_error(sbh)) { ext4_msg(sb, KERN_ERR, "I/O error while writing " diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index b6697333bb2b..3bdb2c668a71 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -310,9 +310,8 @@ static void gfs2_metapath_ra(struct gfs2_glock *gl, __be64 *start, __be64 *end) if (trylock_buffer(rabh)) { if (!buffer_uptodate(rabh)) { rabh->b_end_io = end_buffer_read_sync; - submit_bh(REQ_OP_READ, - REQ_RAHEAD | REQ_META | REQ_PRIO, - rabh); + submit_bh(REQ_OP_READ | REQ_RAHEAD | REQ_META | + REQ_PRIO, rabh); continue; } unlock_buffer(rabh); diff --git a/fs/gfs2/dir.c b/fs/gfs2/dir.c index 42b7dfffb5e7..a0562dd1bada 100644 --- a/fs/gfs2/dir.c +++ b/fs/gfs2/dir.c @@ -1508,9 +1508,8 @@ static void gfs2_dir_readahead(struct inode *inode, unsigned hsize, u32 index, continue; } bh->b_end_io = end_buffer_read_sync; - submit_bh(REQ_OP_READ, - REQ_RAHEAD | REQ_META | REQ_PRIO, - bh); + submit_bh(REQ_OP_READ | REQ_RAHEAD | REQ_META | + REQ_PRIO, bh); continue; } brelse(bh); diff --git a/fs/gfs2/meta_io.c b/fs/gfs2/meta_io.c index 868dcc71b581..3570739f005d 100644 --- a/fs/gfs2/meta_io.c +++ b/fs/gfs2/meta_io.c @@ -75,7 +75,7 @@ static int gfs2_aspace_writepage(struct page *page, struct writeback_control *wb do { struct buffer_head *next = bh->b_this_page; if (buffer_async_write(bh)) { - submit_bh(REQ_OP_WRITE, write_flags, bh); + submit_bh(REQ_OP_WRITE | write_flags, bh); nr_underway++; } bh = next; @@ -527,7 +527,7 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen) if (buffer_uptodate(first_bh)) goto out; if (!buffer_locked(first_bh)) - ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1, &first_bh); + ll_rw_block(REQ_OP_READ | REQ_META | REQ_PRIO, 1, &first_bh); dblock++; extlen--; @@ -536,9 +536,8 @@ struct buffer_head *gfs2_meta_ra(struct gfs2_glock *gl, u64 dblock, u32 extlen) bh = gfs2_getbuf(gl, dblock, CREATE); if (!buffer_uptodate(bh) && !buffer_locked(bh)) - ll_rw_block(REQ_OP_READ, - REQ_RAHEAD | REQ_META | REQ_PRIO, - 1, &bh); + ll_rw_block(REQ_OP_READ | REQ_RAHEAD | REQ_META | + REQ_PRIO, 1, &bh); brelse(bh); dblock++; extlen--; diff --git a/fs/gfs2/quota.c b/fs/gfs2/quota.c index 59d727a4ae2c..c98a7faa67d3 100644 --- a/fs/gfs2/quota.c +++ b/fs/gfs2/quota.c @@ -746,7 +746,7 @@ static int gfs2_write_buf_to_page(struct gfs2_inode *ip, unsigned long index, if (PageUptodate(page)) set_buffer_uptodate(bh); if (!buffer_uptodate(bh)) { - ll_rw_block(REQ_OP_READ, REQ_META | REQ_PRIO, 1, &bh); + ll_rw_block(REQ_OP_READ | REQ_META | REQ_PRIO, 1, &bh); wait_on_buffer(bh); if (!buffer_uptodate(bh)) goto unlock_out; diff --git a/fs/isofs/compress.c b/fs/isofs/compress.c index 95a19f25d61c..b466172eec25 100644 --- a/fs/isofs/compress.c +++ b/fs/isofs/compress.c @@ -82,7 +82,7 @@ static loff_t zisofs_uncompress_block(struct inode *inode, loff_t block_start, return 0; } haveblocks = isofs_get_blocks(inode, blocknum, bhs, needblocks); - ll_rw_block(REQ_OP_READ, 0, haveblocks, bhs); + ll_rw_block(REQ_OP_READ, haveblocks, bhs); curbh = 0; curpage = 0; diff --git a/fs/jbd2/commit.c b/fs/jbd2/commit.c index eb315e81f1a6..890b5543a1c5 100644 --- a/fs/jbd2/commit.c +++ b/fs/jbd2/commit.c @@ -155,10 +155,10 @@ static int journal_submit_commit_record(journal_t *journal, if (journal->j_flags & JBD2_BARRIER && !jbd2_has_feature_async_commit(journal)) - ret = submit_bh(REQ_OP_WRITE, - REQ_SYNC | REQ_PREFLUSH | REQ_FUA, bh); + ret = submit_bh(REQ_OP_WRITE | REQ_SYNC | REQ_PREFLUSH | + REQ_FUA, bh); else - ret = submit_bh(REQ_OP_WRITE, REQ_SYNC, bh); + ret = submit_bh(REQ_OP_WRITE | REQ_SYNC, bh); *cbh = bh; return ret; @@ -763,7 +763,7 @@ start_journal_io: clear_buffer_dirty(bh); set_buffer_uptodate(bh); bh->b_end_io = journal_end_buffer_io_sync; - submit_bh(REQ_OP_WRITE, REQ_SYNC, bh); + submit_bh(REQ_OP_WRITE | REQ_SYNC, bh); } cond_resched(); diff --git a/fs/jbd2/journal.c b/fs/jbd2/journal.c index 9015f5fa2862..07e6aaf7e213 100644 --- a/fs/jbd2/journal.c +++ b/fs/jbd2/journal.c @@ -1638,7 +1638,7 @@ static int jbd2_write_superblock(journal_t *journal, int write_flags) sb->s_checksum = jbd2_superblock_csum(journal, sb); get_bh(bh); bh->b_end_io = end_buffer_write_sync; - ret = submit_bh(REQ_OP_WRITE, write_flags, bh); + ret = submit_bh(REQ_OP_WRITE | write_flags, bh); wait_on_buffer(bh); if (buffer_write_io_error(bh)) { clear_buffer_write_io_error(bh); @@ -1900,7 +1900,7 @@ static int journal_get_superblock(journal_t *journal) J_ASSERT(bh != NULL); if (!buffer_uptodate(bh)) { - ll_rw_block(REQ_OP_READ, 0, 1, &bh); + ll_rw_block(REQ_OP_READ, 1, &bh); wait_on_buffer(bh); if (!buffer_uptodate(bh)) { printk(KERN_ERR diff --git a/fs/jbd2/recovery.c b/fs/jbd2/recovery.c index 8ca3527189f8..e699d6ab2c0e 100644 --- a/fs/jbd2/recovery.c +++ b/fs/jbd2/recovery.c @@ -100,7 +100,7 @@ static int do_readahead(journal_t *journal, unsigned int start) if (!buffer_uptodate(bh) && !buffer_locked(bh)) { bufs[nbufs++] = bh; if (nbufs == MAXBUF) { - ll_rw_block(REQ_OP_READ, 0, nbufs, bufs); + ll_rw_block(REQ_OP_READ, nbufs, bufs); journal_brelse_array(bufs, nbufs); nbufs = 0; } @@ -109,7 +109,7 @@ static int do_readahead(journal_t *journal, unsigned int start) } if (nbufs) - ll_rw_block(REQ_OP_READ, 0, nbufs, bufs); + ll_rw_block(REQ_OP_READ, nbufs, bufs); err = 0; failed: diff --git a/fs/nilfs2/btnode.c b/fs/nilfs2/btnode.c index ca611ac09f7c..5c39efbf733f 100644 --- a/fs/nilfs2/btnode.c +++ b/fs/nilfs2/btnode.c @@ -122,7 +122,7 @@ int nilfs_btnode_submit_block(struct address_space *btnc, __u64 blocknr, bh->b_blocknr = pblocknr; /* set block address for read */ bh->b_end_io = end_buffer_read_sync; get_bh(bh); - submit_bh(mode, mode_flags, bh); + submit_bh(mode | mode_flags, bh); bh->b_blocknr = blocknr; /* set back to the given block address */ *submit_ptr = pblocknr; err = 0; diff --git a/fs/nilfs2/gcinode.c b/fs/nilfs2/gcinode.c index 04fdd420eae7..847def8af315 100644 --- a/fs/nilfs2/gcinode.c +++ b/fs/nilfs2/gcinode.c @@ -92,7 +92,7 @@ int nilfs_gccache_submit_read_data(struct inode *inode, sector_t blkoff, bh->b_blocknr = pbn; bh->b_end_io = end_buffer_read_sync; get_bh(bh); - submit_bh(REQ_OP_READ, 0, bh); + submit_bh(REQ_OP_READ, bh); if (vbn) bh->b_blocknr = vbn; out: diff --git a/fs/nilfs2/mdt.c b/fs/nilfs2/mdt.c index d29a0f2b9c16..66e8811c2528 100644 --- a/fs/nilfs2/mdt.c +++ b/fs/nilfs2/mdt.c @@ -148,7 +148,7 @@ nilfs_mdt_submit_block(struct inode *inode, unsigned long blkoff, bh->b_end_io = end_buffer_read_sync; get_bh(bh); - submit_bh(mode, mode_flags, bh); + submit_bh(mode | mode_flags, bh); ret = 0; trace_nilfs2_mdt_submit_block(inode, inode->i_ino, blkoff, mode); diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c index 9e3964ea2ea0..b5765fdb3a47 100644 --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c @@ -342,7 +342,7 @@ handle_zblock: for (i = 0; i < nr; i++) { tbh = arr[i]; if (likely(!buffer_uptodate(tbh))) - submit_bh(REQ_OP_READ, 0, tbh); + submit_bh(REQ_OP_READ, tbh); else ntfs_end_buffer_async_read(tbh, 1); } @@ -859,7 +859,7 @@ lock_retry_remap: do { struct buffer_head *next = bh->b_this_page; if (buffer_async_write(bh)) { - submit_bh(REQ_OP_WRITE, 0, bh); + submit_bh(REQ_OP_WRITE, bh); need_end_writeback = false; } bh = next; @@ -1187,7 +1187,7 @@ lock_retry_remap: BUG_ON(!buffer_mapped(tbh)); get_bh(tbh); tbh->b_end_io = end_buffer_write_sync; - submit_bh(REQ_OP_WRITE, 0, tbh); + submit_bh(REQ_OP_WRITE, tbh); } /* Synchronize the mft mirror now if not @sync. */ if (is_mft && !sync) diff --git a/fs/ntfs/compress.c b/fs/ntfs/compress.c index a60f543e7557..587e9b187873 100644 --- a/fs/ntfs/compress.c +++ b/fs/ntfs/compress.c @@ -658,7 +658,7 @@ lock_retry_remap: } get_bh(tbh); tbh->b_end_io = end_buffer_read_sync; - submit_bh(REQ_OP_READ, 0, tbh); + submit_bh(REQ_OP_READ, tbh); } /* Wait for io completion on all buffer heads. */ diff --git a/fs/ntfs/file.c b/fs/ntfs/file.c index a8abe2296514..46ed69b86c33 100644 --- a/fs/ntfs/file.c +++ b/fs/ntfs/file.c @@ -537,7 +537,7 @@ static inline int ntfs_submit_bh_for_read(struct buffer_head *bh) lock_buffer(bh); get_bh(bh); bh->b_end_io = end_buffer_read_sync; - return submit_bh(REQ_OP_READ, 0, bh); + return submit_bh(REQ_OP_READ, bh); } /** diff --git a/fs/ntfs/logfile.c b/fs/ntfs/logfile.c index bc1bf217b38e..6ce60ffc6ac0 100644 --- a/fs/ntfs/logfile.c +++ b/fs/ntfs/logfile.c @@ -807,7 +807,7 @@ map_vcn: * completed ignore errors afterwards as we can assume * that if one buffer worked all of them will work. */ - submit_bh(REQ_OP_WRITE, 0, bh); + submit_bh(REQ_OP_WRITE, bh); if (should_wait) { should_wait = false; wait_on_buffer(bh); diff --git a/fs/ntfs/mft.c b/fs/ntfs/mft.c index 0d62cd5bb7f8..f7bf5ce960cc 100644 --- a/fs/ntfs/mft.c +++ b/fs/ntfs/mft.c @@ -583,7 +583,7 @@ int ntfs_sync_mft_mirror(ntfs_volume *vol, const unsigned long mft_no, clear_buffer_dirty(tbh); get_bh(tbh); tbh->b_end_io = end_buffer_write_sync; - submit_bh(REQ_OP_WRITE, 0, tbh); + submit_bh(REQ_OP_WRITE, tbh); } /* Wait on i/o completion of buffers. */ for (i_bhs = 0; i_bhs < nr_bhs; i_bhs++) { @@ -780,7 +780,7 @@ int write_mft_record_nolock(ntfs_inode *ni, MFT_RECORD *m, int sync) clear_buffer_dirty(tbh); get_bh(tbh); tbh->b_end_io = end_buffer_write_sync; - submit_bh(REQ_OP_WRITE, 0, tbh); + submit_bh(REQ_OP_WRITE, tbh); } /* Synchronize the mft mirror now if not @sync. */ if (!sync && ni->mft_no < vol->mftmirr_size) diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c index 8e9d2b35175f..4a21745711fe 100644 --- a/fs/ntfs3/file.c +++ b/fs/ntfs3/file.c @@ -242,7 +242,7 @@ static int ntfs_zero_range(struct inode *inode, u64 vbo, u64 vbo_to) lock_buffer(bh); bh->b_end_io = end_buffer_read_sync; get_bh(bh); - submit_bh(REQ_OP_READ, 0, bh); + submit_bh(REQ_OP_READ, bh); wait_on_buffer(bh); if (!buffer_uptodate(bh)) { diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c index be4ebdd8048b..d100a063def2 100644 --- a/fs/ntfs3/inode.c +++ b/fs/ntfs3/inode.c @@ -629,7 +629,7 @@ static noinline int ntfs_get_block_vbo(struct inode *inode, u64 vbo, bh->b_size = block_size; off = vbo & (PAGE_SIZE - 1); set_bh_page(bh, page, off); - ll_rw_block(REQ_OP_READ, 0, 1, &bh); + ll_rw_block(REQ_OP_READ, 1, &bh); wait_on_buffer(bh); if (!buffer_uptodate(bh)) { err = -EIO; diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 35d40a67204c..304ed2be1b83 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -638,7 +638,7 @@ int ocfs2_map_page_blocks(struct page *page, u64 *p_blkno, !buffer_new(bh) && ocfs2_should_read_blk(inode, page, block_start) && (block_start < from || block_end > to)) { - ll_rw_block(REQ_OP_READ, 0, 1, &bh); + ll_rw_block(REQ_OP_READ, 1, &bh); *wait_bh++=bh; } diff --git a/fs/ocfs2/buffer_head_io.c b/fs/ocfs2/buffer_head_io.c index e7758778abef..196638a22b48 100644 --- a/fs/ocfs2/buffer_head_io.c +++ b/fs/ocfs2/buffer_head_io.c @@ -64,7 +64,7 @@ int ocfs2_write_block(struct ocfs2_super *osb, struct buffer_head *bh, get_bh(bh); /* for end_buffer_write_sync() */ bh->b_end_io = end_buffer_write_sync; - submit_bh(REQ_OP_WRITE, 0, bh); + submit_bh(REQ_OP_WRITE, bh); wait_on_buffer(bh); @@ -147,7 +147,7 @@ int ocfs2_read_blocks_sync(struct ocfs2_super *osb, u64 block, get_bh(bh); /* for end_buffer_read_sync() */ bh->b_end_io = end_buffer_read_sync; - submit_bh(REQ_OP_READ, 0, bh); + submit_bh(REQ_OP_READ, bh); } read_failure: @@ -328,7 +328,7 @@ int ocfs2_read_blocks(struct ocfs2_caching_info *ci, u64 block, int nr, if (validate) set_buffer_needs_validate(bh); bh->b_end_io = end_buffer_read_sync; - submit_bh(REQ_OP_READ, 0, bh); + submit_bh(REQ_OP_READ, bh); continue; } } @@ -449,7 +449,7 @@ int ocfs2_write_super_or_backup(struct ocfs2_super *osb, get_bh(bh); /* for end_buffer_write_sync() */ bh->b_end_io = end_buffer_write_sync; ocfs2_compute_meta_ecc(osb->sb, bh->b_data, &di->i_check); - submit_bh(REQ_OP_WRITE, 0, bh); + submit_bh(REQ_OP_WRITE, bh); wait_on_buffer(bh); diff --git a/fs/ocfs2/super.c b/fs/ocfs2/super.c index f7298816d8d9..e68807196076 100644 --- a/fs/ocfs2/super.c +++ b/fs/ocfs2/super.c @@ -1785,7 +1785,7 @@ static int ocfs2_get_sector(struct super_block *sb, if (!buffer_dirty(*bh)) clear_buffer_uptodate(*bh); unlock_buffer(*bh); - ll_rw_block(REQ_OP_READ, 0, 1, bh); + ll_rw_block(REQ_OP_READ, 1, bh); wait_on_buffer(*bh); if (!buffer_uptodate(*bh)) { mlog_errno(-EIO); diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index 0cffe054b78e..23f542d1748b 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -2664,7 +2664,7 @@ static int reiserfs_write_full_page(struct page *page, do { struct buffer_head *next = bh->b_this_page; if (buffer_async_write(bh)) { - submit_bh(REQ_OP_WRITE, 0, bh); + submit_bh(REQ_OP_WRITE, bh); nr++; } put_bh(bh); @@ -2724,7 +2724,7 @@ fail: struct buffer_head *next = bh->b_this_page; if (buffer_async_write(bh)) { clear_buffer_dirty(bh); - submit_bh(REQ_OP_WRITE, 0, bh); + submit_bh(REQ_OP_WRITE, bh); nr++; } put_bh(bh); diff --git a/fs/reiserfs/journal.c b/fs/reiserfs/journal.c index d8cc9a366124..94addfcefede 100644 --- a/fs/reiserfs/journal.c +++ b/fs/reiserfs/journal.c @@ -650,7 +650,7 @@ static void submit_logged_buffer(struct buffer_head *bh) BUG(); if (!buffer_uptodate(bh)) BUG(); - submit_bh(REQ_OP_WRITE, 0, bh); + submit_bh(REQ_OP_WRITE, bh); } static void submit_ordered_buffer(struct buffer_head *bh) @@ -660,7 +660,7 @@ static void submit_ordered_buffer(struct buffer_head *bh) clear_buffer_dirty(bh); if (!buffer_uptodate(bh)) BUG(); - submit_bh(REQ_OP_WRITE, 0, bh); + submit_bh(REQ_OP_WRITE, bh); } #define CHUNK_SIZE 32 @@ -868,7 +868,7 @@ loop_next: */ if (buffer_dirty(bh) && unlikely(bh->b_page->mapping == NULL)) { spin_unlock(lock); - ll_rw_block(REQ_OP_WRITE, 0, 1, &bh); + ll_rw_block(REQ_OP_WRITE, 1, &bh); spin_lock(lock); } put_bh(bh); @@ -1054,7 +1054,7 @@ static int flush_commit_list(struct super_block *s, if (tbh) { if (buffer_dirty(tbh)) { depth = reiserfs_write_unlock_nested(s); - ll_rw_block(REQ_OP_WRITE, 0, 1, &tbh); + ll_rw_block(REQ_OP_WRITE, 1, &tbh); reiserfs_write_lock_nested(s, depth); } put_bh(tbh) ; @@ -2240,7 +2240,7 @@ abort_replay: } } /* read in the log blocks, memcpy to the corresponding real block */ - ll_rw_block(REQ_OP_READ, 0, get_desc_trans_len(desc), log_blocks); + ll_rw_block(REQ_OP_READ, get_desc_trans_len(desc), log_blocks); for (i = 0; i < get_desc_trans_len(desc); i++) { wait_on_buffer(log_blocks[i]); @@ -2342,7 +2342,7 @@ static struct buffer_head *reiserfs_breada(struct block_device *dev, } else bhlist[j++] = bh; } - ll_rw_block(REQ_OP_READ, 0, j, bhlist); + ll_rw_block(REQ_OP_READ, j, bhlist); for (i = 1; i < j; i++) brelse(bhlist[i]); bh = bhlist[0]; diff --git a/fs/reiserfs/stree.c b/fs/reiserfs/stree.c index ef42729216d1..9a293609a022 100644 --- a/fs/reiserfs/stree.c +++ b/fs/reiserfs/stree.c @@ -579,7 +579,7 @@ static int search_by_key_reada(struct super_block *s, if (!buffer_uptodate(bh[j])) { if (depth == -1) depth = reiserfs_write_unlock_nested(s); - ll_rw_block(REQ_OP_READ, REQ_RAHEAD, 1, bh + j); + ll_rw_block(REQ_OP_READ | REQ_RAHEAD, 1, bh + j); } brelse(bh[j]); } @@ -685,7 +685,7 @@ int search_by_key(struct super_block *sb, const struct cpu_key *key, if (!buffer_uptodate(bh) && depth == -1) depth = reiserfs_write_unlock_nested(sb); - ll_rw_block(REQ_OP_READ, 0, 1, &bh); + ll_rw_block(REQ_OP_READ, 1, &bh); wait_on_buffer(bh); if (depth != -1) diff --git a/fs/reiserfs/super.c b/fs/reiserfs/super.c index cfb7c44c7366..c88cd2ce0665 100644 --- a/fs/reiserfs/super.c +++ b/fs/reiserfs/super.c @@ -1702,7 +1702,7 @@ static int read_super_block(struct super_block *s, int offset) /* after journal replay, reread all bitmap and super blocks */ static int reread_meta_blocks(struct super_block *s) { - ll_rw_block(REQ_OP_READ, 0, 1, &SB_BUFFER_WITH_SB(s)); + ll_rw_block(REQ_OP_READ, 1, &SB_BUFFER_WITH_SB(s)); wait_on_buffer(SB_BUFFER_WITH_SB(s)); if (!buffer_uptodate(SB_BUFFER_WITH_SB(s))) { reiserfs_warning(s, "reiserfs-2504", "error reading the super"); diff --git a/fs/udf/dir.c b/fs/udf/dir.c index 42e3e551fa4c..cad3772f9dbe 100644 --- a/fs/udf/dir.c +++ b/fs/udf/dir.c @@ -130,7 +130,7 @@ static int udf_readdir(struct file *file, struct dir_context *ctx) brelse(tmp); } if (num) { - ll_rw_block(REQ_OP_READ, REQ_RAHEAD, num, bha); + ll_rw_block(REQ_OP_READ | REQ_RAHEAD, num, bha); for (i = 0; i < num; i++) brelse(bha[i]); } diff --git a/fs/udf/directory.c b/fs/udf/directory.c index 73720320f0ab..a2adf6293093 100644 --- a/fs/udf/directory.c +++ b/fs/udf/directory.c @@ -89,7 +89,7 @@ struct fileIdentDesc *udf_fileident_read(struct inode *dir, loff_t *nf_pos, brelse(tmp); } if (num) { - ll_rw_block(REQ_OP_READ, REQ_RAHEAD, num, bha); + ll_rw_block(REQ_OP_READ | REQ_RAHEAD, num, bha); for (i = 0; i < num; i++) brelse(bha[i]); } diff --git a/fs/udf/inode.c b/fs/udf/inode.c index edc88716751a..8d06daed549f 100644 --- a/fs/udf/inode.c +++ b/fs/udf/inode.c @@ -1214,7 +1214,7 @@ struct buffer_head *udf_bread(struct inode *inode, udf_pblk_t block, if (buffer_uptodate(bh)) return bh; - ll_rw_block(REQ_OP_READ, 0, 1, &bh); + ll_rw_block(REQ_OP_READ, 1, &bh); wait_on_buffer(bh); if (buffer_uptodate(bh)) diff --git a/fs/ufs/balloc.c b/fs/ufs/balloc.c index 075d3d9114c8..bd810d8239f2 100644 --- a/fs/ufs/balloc.c +++ b/fs/ufs/balloc.c @@ -296,7 +296,7 @@ static void ufs_change_blocknr(struct inode *inode, sector_t beg, if (!buffer_mapped(bh)) map_bh(bh, inode->i_sb, oldb + pos); if (!buffer_uptodate(bh)) { - ll_rw_block(REQ_OP_READ, 0, 1, &bh); + ll_rw_block(REQ_OP_READ, 1, &bh); wait_on_buffer(bh); if (!buffer_uptodate(bh)) { ufs_error(inode->i_sb, __func__, diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 9795df9400bd..bb68eb6407da 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -202,11 +202,11 @@ struct buffer_head *alloc_buffer_head(gfp_t gfp_flags); void free_buffer_head(struct buffer_head * bh); void unlock_buffer(struct buffer_head *bh); void __lock_buffer(struct buffer_head *bh); -void ll_rw_block(enum req_op, blk_opf_t, int, struct buffer_head * bh[]); +void ll_rw_block(blk_opf_t, int, struct buffer_head * bh[]); int sync_dirty_buffer(struct buffer_head *bh); int __sync_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags); void write_dirty_buffer(struct buffer_head *bh, blk_opf_t op_flags); -int submit_bh(enum req_op, blk_opf_t, struct buffer_head *); +int submit_bh(blk_opf_t, struct buffer_head *); void write_boundary_block(struct block_device *bdev, sector_t bblock, unsigned blocksize); int bh_uptodate_or_lock(struct buffer_head *bh); -- cgit v1.2.3 From 67235182a41c1bd6b32806a1556a1d299b84212b Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 6 Jun 2022 10:20:31 -0400 Subject: mm/migrate: Convert buffer_migrate_page() to buffer_migrate_folio() Use a folio throughout __buffer_migrate_folio(), add kernel-doc for buffer_migrate_folio() and buffer_migrate_folio_norefs(), move their declarations to buffer.h and switch all filesystems that have wired them up. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig --- block/fops.c | 2 +- fs/ext2/inode.c | 4 +-- fs/ext4/inode.c | 4 +-- fs/ntfs/aops.c | 6 ++-- fs/ocfs2/aops.c | 2 +- include/linux/buffer_head.h | 10 ++++++ include/linux/fs.h | 12 ------- mm/migrate.c | 76 +++++++++++++++++++++++++++------------------ 8 files changed, 65 insertions(+), 51 deletions(-) (limited to 'include/linux/buffer_head.h') diff --git a/block/fops.c b/block/fops.c index d6b3276a6c68..743fc46d0aad 100644 --- a/block/fops.c +++ b/block/fops.c @@ -417,7 +417,7 @@ const struct address_space_operations def_blk_aops = { .write_end = blkdev_write_end, .writepages = blkdev_writepages, .direct_IO = blkdev_direct_IO, - .migratepage = buffer_migrate_page_norefs, + .migrate_folio = buffer_migrate_folio_norefs, .is_dirty_writeback = buffer_check_dirty_writeback, }; diff --git a/fs/ext2/inode.c b/fs/ext2/inode.c index e6b932219803..58a9d061f17d 100644 --- a/fs/ext2/inode.c +++ b/fs/ext2/inode.c @@ -973,7 +973,7 @@ const struct address_space_operations ext2_aops = { .bmap = ext2_bmap, .direct_IO = ext2_direct_IO, .writepages = ext2_writepages, - .migratepage = buffer_migrate_page, + .migrate_folio = buffer_migrate_folio, .is_partially_uptodate = block_is_partially_uptodate, .error_remove_page = generic_error_remove_page, }; @@ -989,7 +989,7 @@ const struct address_space_operations ext2_nobh_aops = { .bmap = ext2_bmap, .direct_IO = ext2_direct_IO, .writepages = ext2_writepages, - .migratepage = buffer_migrate_page, + .migrate_folio = buffer_migrate_folio, .error_remove_page = generic_error_remove_page, }; diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index 06cc68878176..87a8b4382bce 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -3633,7 +3633,7 @@ static const struct address_space_operations ext4_aops = { .invalidate_folio = ext4_invalidate_folio, .release_folio = ext4_release_folio, .direct_IO = noop_direct_IO, - .migratepage = buffer_migrate_page, + .migrate_folio = buffer_migrate_folio, .is_partially_uptodate = block_is_partially_uptodate, .error_remove_page = generic_error_remove_page, .swap_activate = ext4_iomap_swap_activate, @@ -3668,7 +3668,7 @@ static const struct address_space_operations ext4_da_aops = { .invalidate_folio = ext4_invalidate_folio, .release_folio = ext4_release_folio, .direct_IO = noop_direct_IO, - .migratepage = buffer_migrate_page, + .migrate_folio = buffer_migrate_folio, .is_partially_uptodate = block_is_partially_uptodate, .error_remove_page = generic_error_remove_page, .swap_activate = ext4_iomap_swap_activate, diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c index 9e3964ea2ea0..5f4fb6ca6f2e 100644 --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c @@ -1659,7 +1659,7 @@ const struct address_space_operations ntfs_normal_aops = { .dirty_folio = block_dirty_folio, #endif /* NTFS_RW */ .bmap = ntfs_bmap, - .migratepage = buffer_migrate_page, + .migrate_folio = buffer_migrate_folio, .is_partially_uptodate = block_is_partially_uptodate, .error_remove_page = generic_error_remove_page, }; @@ -1673,7 +1673,7 @@ const struct address_space_operations ntfs_compressed_aops = { .writepage = ntfs_writepage, .dirty_folio = block_dirty_folio, #endif /* NTFS_RW */ - .migratepage = buffer_migrate_page, + .migrate_folio = buffer_migrate_folio, .is_partially_uptodate = block_is_partially_uptodate, .error_remove_page = generic_error_remove_page, }; @@ -1688,7 +1688,7 @@ const struct address_space_operations ntfs_mst_aops = { .writepage = ntfs_writepage, /* Write dirty page to disk. */ .dirty_folio = filemap_dirty_folio, #endif /* NTFS_RW */ - .migratepage = buffer_migrate_page, + .migrate_folio = buffer_migrate_folio, .is_partially_uptodate = block_is_partially_uptodate, .error_remove_page = generic_error_remove_page, }; diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 767df51f8657..1d489003f99d 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -2462,7 +2462,7 @@ const struct address_space_operations ocfs2_aops = { .direct_IO = ocfs2_direct_IO, .invalidate_folio = block_invalidate_folio, .release_folio = ocfs2_release_folio, - .migratepage = buffer_migrate_page, + .migrate_folio = buffer_migrate_folio, .is_partially_uptodate = block_is_partially_uptodate, .error_remove_page = generic_error_remove_page, }; diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index c9d1463bb20f..b0366c89d6a4 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -267,6 +267,16 @@ int nobh_truncate_page(struct address_space *, loff_t, get_block_t *); int nobh_writepage(struct page *page, get_block_t *get_block, struct writeback_control *wbc); +#ifdef CONFIG_MIGRATION +extern int buffer_migrate_folio(struct address_space *, + struct folio *dst, struct folio *src, enum migrate_mode); +extern int buffer_migrate_folio_norefs(struct address_space *, + struct folio *dst, struct folio *src, enum migrate_mode); +#else +#define buffer_migrate_folio NULL +#define buffer_migrate_folio_norefs NULL +#endif + void buffer_init(void); /* diff --git a/include/linux/fs.h b/include/linux/fs.h index 47431cf8fbb3..9e6b17da4e11 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3215,18 +3215,6 @@ extern int generic_check_addressable(unsigned, u64); extern void generic_set_encrypted_ci_d_ops(struct dentry *dentry); -#ifdef CONFIG_MIGRATION -extern int buffer_migrate_page(struct address_space *, - struct page *, struct page *, - enum migrate_mode); -extern int buffer_migrate_page_norefs(struct address_space *, - struct page *, struct page *, - enum migrate_mode); -#else -#define buffer_migrate_page NULL -#define buffer_migrate_page_norefs NULL -#endif - int may_setattr(struct user_namespace *mnt_userns, struct inode *inode, unsigned int ia_valid); int setattr_prepare(struct user_namespace *, struct dentry *, struct iattr *); diff --git a/mm/migrate.c b/mm/migrate.c index 75b171425c45..ea5398d0f7f1 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -656,23 +656,23 @@ static bool buffer_migrate_lock_buffers(struct buffer_head *head, return true; } -static int __buffer_migrate_page(struct address_space *mapping, - struct page *newpage, struct page *page, enum migrate_mode mode, +static int __buffer_migrate_folio(struct address_space *mapping, + struct folio *dst, struct folio *src, enum migrate_mode mode, bool check_refs) { struct buffer_head *bh, *head; int rc; int expected_count; - if (!page_has_buffers(page)) - return migrate_page(mapping, newpage, page, mode); + head = folio_buffers(src); + if (!head) + return migrate_page(mapping, &dst->page, &src->page, mode); /* Check whether page does not have extra refs before we do more work */ - expected_count = expected_page_refs(mapping, page); - if (page_count(page) != expected_count) + expected_count = expected_page_refs(mapping, &src->page); + if (folio_ref_count(src) != expected_count) return -EAGAIN; - head = page_buffers(page); if (!buffer_migrate_lock_buffers(head, mode)) return -EAGAIN; @@ -703,23 +703,22 @@ recheck_buffers: } } - rc = migrate_page_move_mapping(mapping, newpage, page, 0); + rc = folio_migrate_mapping(mapping, dst, src, 0); if (rc != MIGRATEPAGE_SUCCESS) goto unlock_buffers; - attach_page_private(newpage, detach_page_private(page)); + folio_attach_private(dst, folio_detach_private(src)); bh = head; do { - set_bh_page(bh, newpage, bh_offset(bh)); + set_bh_page(bh, &dst->page, bh_offset(bh)); bh = bh->b_this_page; - } while (bh != head); if (mode != MIGRATE_SYNC_NO_COPY) - migrate_page_copy(newpage, page); + folio_migrate_copy(dst, src); else - migrate_page_states(newpage, page); + folio_migrate_flags(dst, src); rc = MIGRATEPAGE_SUCCESS; unlock_buffers: @@ -729,34 +728,51 @@ unlock_buffers: do { unlock_buffer(bh); bh = bh->b_this_page; - } while (bh != head); return rc; } -/* - * Migration function for pages with buffers. This function can only be used - * if the underlying filesystem guarantees that no other references to "page" - * exist. For example attached buffer heads are accessed only under page lock. +/** + * buffer_migrate_folio() - Migration function for folios with buffers. + * @mapping: The address space containing @src. + * @dst: The folio to migrate to. + * @src: The folio to migrate from. + * @mode: How to migrate the folio. + * + * This function can only be used if the underlying filesystem guarantees + * that no other references to @src exist. For example attached buffer + * heads are accessed only under the folio lock. If your filesystem cannot + * provide this guarantee, buffer_migrate_folio_norefs() may be more + * appropriate. + * + * Return: 0 on success or a negative errno on failure. */ -int buffer_migrate_page(struct address_space *mapping, - struct page *newpage, struct page *page, enum migrate_mode mode) +int buffer_migrate_folio(struct address_space *mapping, + struct folio *dst, struct folio *src, enum migrate_mode mode) { - return __buffer_migrate_page(mapping, newpage, page, mode, false); + return __buffer_migrate_folio(mapping, dst, src, mode, false); } -EXPORT_SYMBOL(buffer_migrate_page); +EXPORT_SYMBOL(buffer_migrate_folio); -/* - * Same as above except that this variant is more careful and checks that there - * are also no buffer head references. This function is the right one for - * mappings where buffer heads are directly looked up and referenced (such as - * block device mappings). +/** + * buffer_migrate_folio_norefs() - Migration function for folios with buffers. + * @mapping: The address space containing @src. + * @dst: The folio to migrate to. + * @src: The folio to migrate from. + * @mode: How to migrate the folio. + * + * Like buffer_migrate_folio() except that this variant is more careful + * and checks that there are also no buffer head references. This function + * is the right one for mappings where buffer heads are directly looked + * up and referenced (such as block device mappings). + * + * Return: 0 on success or a negative errno on failure. */ -int buffer_migrate_page_norefs(struct address_space *mapping, - struct page *newpage, struct page *page, enum migrate_mode mode) +int buffer_migrate_folio_norefs(struct address_space *mapping, + struct folio *dst, struct folio *src, enum migrate_mode mode) { - return __buffer_migrate_page(mapping, newpage, page, mode, true); + return __buffer_migrate_folio(mapping, dst, src, mode, true); } #endif -- cgit v1.2.3 From cc9cf350d100f4c336edb228cbd078003430cfe7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 13 Jun 2022 07:37:13 +0200 Subject: fs: remove the nobh helpers All callers are gone, so remove the now dead code. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Matthew Wilcox (Oracle) --- fs/buffer.c | 324 -------------------------------------------- fs/mpage.c | 25 +--- include/linux/buffer_head.h | 8 -- include/linux/mpage.h | 2 - 4 files changed, 1 insertion(+), 358 deletions(-) (limited to 'include/linux/buffer_head.h') diff --git a/fs/buffer.c b/fs/buffer.c index ce9844d7c10f..5717d1881d2f 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2537,330 +2537,6 @@ out_unlock: } EXPORT_SYMBOL(block_page_mkwrite); -/* - * nobh_write_begin()'s prereads are special: the buffer_heads are freed - * immediately, while under the page lock. So it needs a special end_io - * handler which does not touch the bh after unlocking it. - */ -static void end_buffer_read_nobh(struct buffer_head *bh, int uptodate) -{ - __end_buffer_read_notouch(bh, uptodate); -} - -/* - * Attach the singly-linked list of buffers created by nobh_write_begin, to - * the page (converting it to circular linked list and taking care of page - * dirty races). - */ -static void attach_nobh_buffers(struct page *page, struct buffer_head *head) -{ - struct buffer_head *bh; - - BUG_ON(!PageLocked(page)); - - spin_lock(&page->mapping->private_lock); - bh = head; - do { - if (PageDirty(page)) - set_buffer_dirty(bh); - if (!bh->b_this_page) - bh->b_this_page = head; - bh = bh->b_this_page; - } while (bh != head); - attach_page_private(page, head); - spin_unlock(&page->mapping->private_lock); -} - -/* - * On entry, the page is fully not uptodate. - * On exit the page is fully uptodate in the areas outside (from,to) - * The filesystem needs to handle block truncation upon failure. - */ -int nobh_write_begin(struct address_space *mapping, loff_t pos, unsigned len, - struct page **pagep, void **fsdata, - get_block_t *get_block) -{ - struct inode *inode = mapping->host; - const unsigned blkbits = inode->i_blkbits; - const unsigned blocksize = 1 << blkbits; - struct buffer_head *head, *bh; - struct page *page; - pgoff_t index; - unsigned from, to; - unsigned block_in_page; - unsigned block_start, block_end; - sector_t block_in_file; - int nr_reads = 0; - int ret = 0; - int is_mapped_to_disk = 1; - - index = pos >> PAGE_SHIFT; - from = pos & (PAGE_SIZE - 1); - to = from + len; - - page = grab_cache_page_write_begin(mapping, index); - if (!page) - return -ENOMEM; - *pagep = page; - *fsdata = NULL; - - if (page_has_buffers(page)) { - ret = __block_write_begin(page, pos, len, get_block); - if (unlikely(ret)) - goto out_release; - return ret; - } - - if (PageMappedToDisk(page)) - return 0; - - /* - * Allocate buffers so that we can keep track of state, and potentially - * attach them to the page if an error occurs. In the common case of - * no error, they will just be freed again without ever being attached - * to the page (which is all OK, because we're under the page lock). - * - * Be careful: the buffer linked list is a NULL terminated one, rather - * than the circular one we're used to. - */ - head = alloc_page_buffers(page, blocksize, false); - if (!head) { - ret = -ENOMEM; - goto out_release; - } - - block_in_file = (sector_t)page->index << (PAGE_SHIFT - blkbits); - - /* - * We loop across all blocks in the page, whether or not they are - * part of the affected region. This is so we can discover if the - * page is fully mapped-to-disk. - */ - for (block_start = 0, block_in_page = 0, bh = head; - block_start < PAGE_SIZE; - block_in_page++, block_start += blocksize, bh = bh->b_this_page) { - int create; - - block_end = block_start + blocksize; - bh->b_state = 0; - create = 1; - if (block_start >= to) - create = 0; - ret = get_block(inode, block_in_file + block_in_page, - bh, create); - if (ret) - goto failed; - if (!buffer_mapped(bh)) - is_mapped_to_disk = 0; - if (buffer_new(bh)) - clean_bdev_bh_alias(bh); - if (PageUptodate(page)) { - set_buffer_uptodate(bh); - continue; - } - if (buffer_new(bh) || !buffer_mapped(bh)) { - zero_user_segments(page, block_start, from, - to, block_end); - continue; - } - if (buffer_uptodate(bh)) - continue; /* reiserfs does this */ - if (block_start < from || block_end > to) { - lock_buffer(bh); - bh->b_end_io = end_buffer_read_nobh; - submit_bh(REQ_OP_READ, 0, bh); - nr_reads++; - } - } - - if (nr_reads) { - /* - * The page is locked, so these buffers are protected from - * any VM or truncate activity. Hence we don't need to care - * for the buffer_head refcounts. - */ - for (bh = head; bh; bh = bh->b_this_page) { - wait_on_buffer(bh); - if (!buffer_uptodate(bh)) - ret = -EIO; - } - if (ret) - goto failed; - } - - if (is_mapped_to_disk) - SetPageMappedToDisk(page); - - *fsdata = head; /* to be released by nobh_write_end */ - - return 0; - -failed: - BUG_ON(!ret); - /* - * Error recovery is a bit difficult. We need to zero out blocks that - * were newly allocated, and dirty them to ensure they get written out. - * Buffers need to be attached to the page at this point, otherwise - * the handling of potential IO errors during writeout would be hard - * (could try doing synchronous writeout, but what if that fails too?) - */ - attach_nobh_buffers(page, head); - page_zero_new_buffers(page, from, to); - -out_release: - unlock_page(page); - put_page(page); - *pagep = NULL; - - return ret; -} -EXPORT_SYMBOL(nobh_write_begin); - -int nobh_write_end(struct file *file, struct address_space *mapping, - loff_t pos, unsigned len, unsigned copied, - struct page *page, void *fsdata) -{ - struct inode *inode = page->mapping->host; - struct buffer_head *head = fsdata; - struct buffer_head *bh; - BUG_ON(fsdata != NULL && page_has_buffers(page)); - - if (unlikely(copied < len) && head) - attach_nobh_buffers(page, head); - if (page_has_buffers(page)) - return generic_write_end(file, mapping, pos, len, - copied, page, fsdata); - - SetPageUptodate(page); - set_page_dirty(page); - if (pos+copied > inode->i_size) { - i_size_write(inode, pos+copied); - mark_inode_dirty(inode); - } - - unlock_page(page); - put_page(page); - - while (head) { - bh = head; - head = head->b_this_page; - free_buffer_head(bh); - } - - return copied; -} -EXPORT_SYMBOL(nobh_write_end); - -/* - * nobh_writepage() - based on block_full_write_page() except - * that it tries to operate without attaching bufferheads to - * the page. - */ -int nobh_writepage(struct page *page, get_block_t *get_block, - struct writeback_control *wbc) -{ - struct inode * const inode = page->mapping->host; - loff_t i_size = i_size_read(inode); - const pgoff_t end_index = i_size >> PAGE_SHIFT; - unsigned offset; - int ret; - - /* Is the page fully inside i_size? */ - if (page->index < end_index) - goto out; - - /* Is the page fully outside i_size? (truncate in progress) */ - offset = i_size & (PAGE_SIZE-1); - if (page->index >= end_index+1 || !offset) { - unlock_page(page); - return 0; /* don't care */ - } - - /* - * The page straddles i_size. It must be zeroed out on each and every - * writepage invocation because it may be mmapped. "A file is mapped - * in multiples of the page size. For a file that is not a multiple of - * the page size, the remaining memory is zeroed when mapped, and - * writes to that region are not written out to the file." - */ - zero_user_segment(page, offset, PAGE_SIZE); -out: - ret = mpage_writepage(page, get_block, wbc); - if (ret == -EAGAIN) - ret = __block_write_full_page(inode, page, get_block, wbc, - end_buffer_async_write); - return ret; -} -EXPORT_SYMBOL(nobh_writepage); - -int nobh_truncate_page(struct address_space *mapping, - loff_t from, get_block_t *get_block) -{ - pgoff_t index = from >> PAGE_SHIFT; - struct inode *inode = mapping->host; - unsigned blocksize = i_blocksize(inode); - struct folio *folio; - struct buffer_head map_bh; - size_t offset; - sector_t iblock; - int err; - - /* Block boundary? Nothing to do */ - if (!(from & (blocksize - 1))) - return 0; - - folio = __filemap_get_folio(mapping, index, FGP_LOCK | FGP_CREAT, - mapping_gfp_mask(mapping)); - err = -ENOMEM; - if (!folio) - goto out; - - if (folio_buffers(folio)) - goto has_buffers; - - iblock = from >> inode->i_blkbits; - map_bh.b_size = blocksize; - map_bh.b_state = 0; - err = get_block(inode, iblock, &map_bh, 0); - if (err) - goto unlock; - /* unmapped? It's a hole - nothing to do */ - if (!buffer_mapped(&map_bh)) - goto unlock; - - /* Ok, it's mapped. Make sure it's up-to-date */ - if (!folio_test_uptodate(folio)) { - err = mapping->a_ops->read_folio(NULL, folio); - if (err) { - folio_put(folio); - goto out; - } - folio_lock(folio); - if (!folio_test_uptodate(folio)) { - err = -EIO; - goto unlock; - } - if (folio_buffers(folio)) - goto has_buffers; - } - offset = offset_in_folio(folio, from); - folio_zero_segment(folio, offset, round_up(offset, blocksize)); - folio_mark_dirty(folio); - err = 0; - -unlock: - folio_unlock(folio); - folio_put(folio); -out: - return err; - -has_buffers: - folio_unlock(folio); - folio_put(folio); - return block_truncate_page(mapping, from, get_block); -} -EXPORT_SYMBOL(nobh_truncate_page); - int block_truncate_page(struct address_space *mapping, loff_t from, get_block_t *get_block) { diff --git a/fs/mpage.c b/fs/mpage.c index 681a4b9a36e3..b7e0b7fbb41f 100644 --- a/fs/mpage.c +++ b/fs/mpage.c @@ -404,7 +404,6 @@ struct mpage_data { struct bio *bio; sector_t last_block_in_bio; get_block_t *get_block; - unsigned use_writepage; }; /* @@ -624,15 +623,10 @@ confused: if (bio) bio = mpage_bio_submit(bio); - if (mpd->use_writepage) { - ret = mapping->a_ops->writepage(page, wbc); - } else { - ret = -EAGAIN; - goto out; - } /* * The caller has a ref on the inode, so *mapping is stable */ + ret = mapping->a_ops->writepage(page, wbc); mapping_set_error(mapping, ret); out: mpd->bio = bio; @@ -674,7 +668,6 @@ mpage_writepages(struct address_space *mapping, .bio = NULL, .last_block_in_bio = 0, .get_block = get_block, - .use_writepage = 1, }; ret = write_cache_pages(mapping, wbc, __mpage_writepage, &mpd); @@ -685,19 +678,3 @@ mpage_writepages(struct address_space *mapping, return ret; } EXPORT_SYMBOL(mpage_writepages); - -int mpage_writepage(struct page *page, get_block_t get_block, - struct writeback_control *wbc) -{ - struct mpage_data mpd = { - .bio = NULL, - .last_block_in_bio = 0, - .get_block = get_block, - .use_writepage = 0, - }; - int ret = __mpage_writepage(page, wbc, &mpd); - if (mpd.bio) - mpage_bio_submit(mpd.bio); - return ret; -} -EXPORT_SYMBOL(mpage_writepage); diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index b0366c89d6a4..61afb81cfdae 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -258,14 +258,6 @@ static inline vm_fault_t block_page_mkwrite_return(int err) } sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *); int block_truncate_page(struct address_space *, loff_t, get_block_t *); -int nobh_write_begin(struct address_space *, loff_t, unsigned len, - struct page **, void **, get_block_t*); -int nobh_write_end(struct file *, struct address_space *, - loff_t, unsigned, unsigned, - struct page *, void *); -int nobh_truncate_page(struct address_space *, loff_t, get_block_t *); -int nobh_writepage(struct page *page, get_block_t *get_block, - struct writeback_control *wbc); #ifdef CONFIG_MIGRATION extern int buffer_migrate_folio(struct address_space *, diff --git a/include/linux/mpage.h b/include/linux/mpage.h index 43986f7ec4dd..1bdc39daac0a 100644 --- a/include/linux/mpage.h +++ b/include/linux/mpage.h @@ -19,7 +19,5 @@ void mpage_readahead(struct readahead_control *, get_block_t get_block); int mpage_read_folio(struct folio *folio, get_block_t get_block); int mpage_writepages(struct address_space *mapping, struct writeback_control *wbc, get_block_t get_block); -int mpage_writepage(struct page *page, get_block_t *get_block, - struct writeback_control *wbc); #endif -- cgit v1.2.3 From d4252071b97d2027d246f6a82cbee4d52f618b47 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 9 Aug 2022 14:32:13 -0400 Subject: add barriers to buffer_uptodate and set_buffer_uptodate Let's have a look at this piece of code in __bread_slow: get_bh(bh); bh->b_end_io = end_buffer_read_sync; submit_bh(REQ_OP_READ, 0, bh); wait_on_buffer(bh); if (buffer_uptodate(bh)) return bh; Neither wait_on_buffer nor buffer_uptodate contain any memory barrier. Consequently, if someone calls sb_bread and then reads the buffer data, the read of buffer data may be executed before wait_on_buffer(bh) on architectures with weak memory ordering and it may return invalid data. Fix this bug by adding a memory barrier to set_buffer_uptodate and an acquire barrier to buffer_uptodate (in a similar way as folio_test_uptodate and folio_mark_uptodate). Signed-off-by: Mikulas Patocka Reviewed-by: Matthew Wilcox (Oracle) Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- include/linux/buffer_head.h | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) (limited to 'include/linux/buffer_head.h') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 307445d1c69e..def8b8d30ccc 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -118,7 +118,6 @@ static __always_inline int test_clear_buffer_##name(struct buffer_head *bh) \ * of the form "mark_buffer_foo()". These are higher-level functions which * do something in addition to setting a b_state bit. */ -BUFFER_FNS(Uptodate, uptodate) BUFFER_FNS(Dirty, dirty) TAS_BUFFER_FNS(Dirty, dirty) BUFFER_FNS(Lock, locked) @@ -136,6 +135,30 @@ BUFFER_FNS(Meta, meta) BUFFER_FNS(Prio, prio) BUFFER_FNS(Defer_Completion, defer_completion) +static __always_inline void set_buffer_uptodate(struct buffer_head *bh) +{ + /* + * make it consistent with folio_mark_uptodate + * pairs with smp_load_acquire in buffer_uptodate + */ + smp_mb__before_atomic(); + set_bit(BH_Uptodate, &bh->b_state); +} + +static __always_inline void clear_buffer_uptodate(struct buffer_head *bh) +{ + clear_bit(BH_Uptodate, &bh->b_state); +} + +static __always_inline int buffer_uptodate(const struct buffer_head *bh) +{ + /* + * make it consistent with folio_test_uptodate + * pairs with smp_mb__before_atomic in set_buffer_uptodate + */ + return (smp_load_acquire(&bh->b_state) & (1UL << BH_Uptodate)) != 0; +} + #define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) /* If we *know* page->private refers to buffer_heads */ -- cgit v1.2.3 From 8238b4579866b7c1bb99883cfe102a43db5506ff Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Fri, 26 Aug 2022 09:17:08 -0400 Subject: wait_on_bit: add an acquire memory barrier There are several places in the kernel where wait_on_bit is not followed by a memory barrier (for example, in drivers/md/dm-bufio.c:new_read). On architectures with weak memory ordering, it may happen that memory accesses that follow wait_on_bit are reordered before wait_on_bit and they may return invalid data. Fix this class of bugs by introducing a new function "test_bit_acquire" that works like test_bit, but has acquire memory ordering semantics. Signed-off-by: Mikulas Patocka Acked-by: Will Deacon Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- Documentation/atomic_bitops.txt | 10 ++++------ arch/x86/include/asm/bitops.h | 21 +++++++++++++++++++++ include/asm-generic/bitops/generic-non-atomic.h | 14 ++++++++++++++ .../asm-generic/bitops/instrumented-non-atomic.h | 12 ++++++++++++ include/asm-generic/bitops/non-atomic.h | 1 + .../bitops/non-instrumented-non-atomic.h | 1 + include/linux/bitops.h | 1 + include/linux/buffer_head.h | 2 +- include/linux/wait_bit.h | 8 ++++---- kernel/sched/wait_bit.c | 2 +- 10 files changed, 60 insertions(+), 12 deletions(-) (limited to 'include/linux/buffer_head.h') diff --git a/Documentation/atomic_bitops.txt b/Documentation/atomic_bitops.txt index d8b101c97031..edea4656c5c0 100644 --- a/Documentation/atomic_bitops.txt +++ b/Documentation/atomic_bitops.txt @@ -58,13 +58,11 @@ Like with atomic_t, the rule of thumb is: - RMW operations that have a return value are fully ordered. - - RMW operations that are conditional are unordered on FAILURE, - otherwise the above rules apply. In the case of test_and_set_bit_lock(), - if the bit in memory is unchanged by the operation then it is deemed to have - failed. + - RMW operations that are conditional are fully ordered. -Except for a successful test_and_set_bit_lock() which has ACQUIRE semantics and -clear_bit_unlock() which has RELEASE semantics. +Except for a successful test_and_set_bit_lock() which has ACQUIRE semantics, +clear_bit_unlock() which has RELEASE semantics and test_bit_acquire which has +ACQUIRE semantics. Since a platform only has a single means of achieving atomic operations the same barriers as for atomic_t are used, see atomic_t.txt. diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index 973c6bd17f98..0fe9de58af31 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -207,6 +207,20 @@ static __always_inline bool constant_test_bit(long nr, const volatile unsigned l (addr[nr >> _BITOPS_LONG_SHIFT])) != 0; } +static __always_inline bool constant_test_bit_acquire(long nr, const volatile unsigned long *addr) +{ + bool oldbit; + + asm volatile("testb %2,%1" + CC_SET(nz) + : CC_OUT(nz) (oldbit) + : "m" (((unsigned char *)addr)[nr >> 3]), + "i" (1 << (nr & 7)) + :"memory"); + + return oldbit; +} + static __always_inline bool variable_test_bit(long nr, volatile const unsigned long *addr) { bool oldbit; @@ -226,6 +240,13 @@ arch_test_bit(unsigned long nr, const volatile unsigned long *addr) variable_test_bit(nr, addr); } +static __always_inline bool +arch_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + return __builtin_constant_p(nr) ? constant_test_bit_acquire(nr, addr) : + variable_test_bit(nr, addr); +} + /** * __ffs - find first set bit in word * @word: The word to search diff --git a/include/asm-generic/bitops/generic-non-atomic.h b/include/asm-generic/bitops/generic-non-atomic.h index 3d5ebd24652b..564a8c675d85 100644 --- a/include/asm-generic/bitops/generic-non-atomic.h +++ b/include/asm-generic/bitops/generic-non-atomic.h @@ -4,6 +4,7 @@ #define __ASM_GENERIC_BITOPS_GENERIC_NON_ATOMIC_H #include +#include #ifndef _LINUX_BITOPS_H #error only can be included directly @@ -127,6 +128,18 @@ generic_test_bit(unsigned long nr, const volatile unsigned long *addr) return 1UL & (addr[BIT_WORD(nr)] >> (nr & (BITS_PER_LONG-1))); } +/** + * generic_test_bit_acquire - Determine, with acquire semantics, whether a bit is set + * @nr: bit number to test + * @addr: Address to start counting from + */ +static __always_inline bool +generic_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + unsigned long *p = ((unsigned long *)addr) + BIT_WORD(nr); + return 1UL & (smp_load_acquire(p) >> (nr & (BITS_PER_LONG-1))); +} + /* * const_*() definitions provide good compile-time optimizations when * the passed arguments can be resolved at compile time. @@ -137,6 +150,7 @@ generic_test_bit(unsigned long nr, const volatile unsigned long *addr) #define const___test_and_set_bit generic___test_and_set_bit #define const___test_and_clear_bit generic___test_and_clear_bit #define const___test_and_change_bit generic___test_and_change_bit +#define const_test_bit_acquire generic_test_bit_acquire /** * const_test_bit - Determine whether a bit is set diff --git a/include/asm-generic/bitops/instrumented-non-atomic.h b/include/asm-generic/bitops/instrumented-non-atomic.h index 988a3bbfba34..2b238b161a62 100644 --- a/include/asm-generic/bitops/instrumented-non-atomic.h +++ b/include/asm-generic/bitops/instrumented-non-atomic.h @@ -142,4 +142,16 @@ _test_bit(unsigned long nr, const volatile unsigned long *addr) return arch_test_bit(nr, addr); } +/** + * _test_bit_acquire - Determine, with acquire semantics, whether a bit is set + * @nr: bit number to test + * @addr: Address to start counting from + */ +static __always_inline bool +_test_bit_acquire(unsigned long nr, const volatile unsigned long *addr) +{ + instrument_atomic_read(addr + BIT_WORD(nr), sizeof(long)); + return arch_test_bit_acquire(nr, addr); +} + #endif /* _ASM_GENERIC_BITOPS_INSTRUMENTED_NON_ATOMIC_H */ diff --git a/include/asm-generic/bitops/non-atomic.h b/include/asm-generic/bitops/non-atomic.h index 5c37ced343ae..71f8d54a5195 100644 --- a/include/asm-generic/bitops/non-atomic.h +++ b/include/asm-generic/bitops/non-atomic.h @@ -13,6 +13,7 @@ #define arch___test_and_change_bit generic___test_and_change_bit #define arch_test_bit generic_test_bit +#define arch_test_bit_acquire generic_test_bit_acquire #include diff --git a/include/asm-generic/bitops/non-instrumented-non-atomic.h b/include/asm-generic/bitops/non-instrumented-non-atomic.h index bdb9b1ffaee9..0ddc78dfc358 100644 --- a/include/asm-generic/bitops/non-instrumented-non-atomic.h +++ b/include/asm-generic/bitops/non-instrumented-non-atomic.h @@ -12,5 +12,6 @@ #define ___test_and_change_bit arch___test_and_change_bit #define _test_bit arch_test_bit +#define _test_bit_acquire arch_test_bit_acquire #endif /* __ASM_GENERIC_BITOPS_NON_INSTRUMENTED_NON_ATOMIC_H */ diff --git a/include/linux/bitops.h b/include/linux/bitops.h index cf9bf65039f2..3b89c64bcfd8 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -59,6 +59,7 @@ extern unsigned long __sw_hweight64(__u64 w); #define __test_and_clear_bit(nr, addr) bitop(___test_and_clear_bit, nr, addr) #define __test_and_change_bit(nr, addr) bitop(___test_and_change_bit, nr, addr) #define test_bit(nr, addr) bitop(_test_bit, nr, addr) +#define test_bit_acquire(nr, addr) bitop(_test_bit_acquire, nr, addr) /* * Include this here because some architectures need generic_ffs/fls in diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index def8b8d30ccc..089c9ade4325 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -156,7 +156,7 @@ static __always_inline int buffer_uptodate(const struct buffer_head *bh) * make it consistent with folio_test_uptodate * pairs with smp_mb__before_atomic in set_buffer_uptodate */ - return (smp_load_acquire(&bh->b_state) & (1UL << BH_Uptodate)) != 0; + return test_bit_acquire(BH_Uptodate, &bh->b_state); } #define bh_offset(bh) ((unsigned long)(bh)->b_data & ~PAGE_MASK) diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h index 7dec36aecbd9..7725b7579b78 100644 --- a/include/linux/wait_bit.h +++ b/include/linux/wait_bit.h @@ -71,7 +71,7 @@ static inline int wait_on_bit(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, bit_wait, @@ -96,7 +96,7 @@ static inline int wait_on_bit_io(unsigned long *word, int bit, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, bit_wait_io, @@ -123,7 +123,7 @@ wait_on_bit_timeout(unsigned long *word, int bit, unsigned mode, unsigned long timeout) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit_timeout(word, bit, bit_wait_timeout, @@ -151,7 +151,7 @@ wait_on_bit_action(unsigned long *word, int bit, wait_bit_action_f *action, unsigned mode) { might_sleep(); - if (!test_bit(bit, word)) + if (!test_bit_acquire(bit, word)) return 0; return out_of_line_wait_on_bit(word, bit, action, mode); } diff --git a/kernel/sched/wait_bit.c b/kernel/sched/wait_bit.c index d4788f810b55..0b1cd985dc27 100644 --- a/kernel/sched/wait_bit.c +++ b/kernel/sched/wait_bit.c @@ -47,7 +47,7 @@ __wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_ prepare_to_wait(wq_head, &wbq_entry->wq_entry, mode); if (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags)) ret = (*action)(&wbq_entry->key, mode); - } while (test_bit(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret); + } while (test_bit_acquire(wbq_entry->key.bit_nr, wbq_entry->key.flags) && !ret); finish_wait(wq_head, &wbq_entry->wq_entry); -- cgit v1.2.3 From 2f79cdfe58c13949bbbb65ba5926abfe9561d0ec Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 31 Aug 2022 09:46:12 -0700 Subject: fs: only do a memory barrier for the first set_buffer_uptodate() Commit d4252071b97d ("add barriers to buffer_uptodate and set_buffer_uptodate") added proper memory barriers to the buffer head BH_Uptodate bit, so that anybody who tests a buffer for being up-to-date will be guaranteed to actually see initialized state. However, that commit didn't _just_ add the memory barrier, it also ended up dropping the "was it already set" logic that the BUFFER_FNS() macro had. That's conceptually the right thing for a generic "this is a memory barrier" operation, but in the case of the buffer contents, we really only care about the memory barrier for the _first_ time we set the bit, in that the only memory ordering protection we need is to avoid anybody seeing uninitialized memory contents. Any other access ordering wouldn't be about the BH_Uptodate bit anyway, and would require some other proper lock (typically BH_Lock or the folio lock). A reader that races with somebody invalidating the buffer head isn't an issue wrt the memory ordering, it's a serialization issue. Now, you'd think that the buffer head operations don't matter in this day and age (and I certainly thought so), but apparently some loads still end up being heavy users of buffer heads. In particular, the kernel test robot reported that not having this bit access optimization in place caused a noticeable direct IO performance regression on ext4: fxmark.ssd_ext4_no_jnl_DWTL_54_directio.works/sec -26.5% regression although you presumably need a fast disk and a lot of cores to actually notice. Link: https://lore.kernel.org/all/Yw8L7HTZ%2FdE2%2Fo9C@xsang-OptiPlex-9020/ Reported-by: kernel test robot Tested-by: Fengwei Yin Cc: Mikulas Patocka Cc: Matthew Wilcox (Oracle) Cc: stable@kernel.org Signed-off-by: Linus Torvalds --- include/linux/buffer_head.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux/buffer_head.h') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 089c9ade4325..df518c429667 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -137,6 +137,17 @@ BUFFER_FNS(Defer_Completion, defer_completion) static __always_inline void set_buffer_uptodate(struct buffer_head *bh) { + /* + * If somebody else already set this uptodate, they will + * have done the memory barrier, and a reader will thus + * see *some* valid buffer state. + * + * Any other serialization (with IO errors or whatever that + * might clear the bit) has to come from other state (eg BH_Lock). + */ + if (test_bit(BH_Uptodate, &bh->b_state)) + return; + /* * make it consistent with folio_mark_uptodate * pairs with smp_load_acquire in buffer_uptodate -- cgit v1.2.3