From 31b2ebc0929e964f4edfbfa7129d43f7e3c17165 Mon Sep 17 00:00:00 2001 From: "Ritesh Harjani (IBM)" Date: Fri, 21 Apr 2023 15:16:12 +0530 Subject: fs/buffer.c: Add generic_buffers_fsync*() implementation Some of the higher layers like iomap takes inode_lock() when calling generic_write_sync(). Also writeback already happens from other paths without inode lock, so it's difficult to say that we really need sync_mapping_buffers() to take any inode locking here. Having said that, let's add generic_buffers_fsync/_noflush() implementation in buffer.c with no inode_lock/unlock() for now so that filesystems like ext2 and ext4's nojournal mode can use it. Ext4 when got converted to iomap for direct-io already copied it's own variant of __generic_file_fsync() without lock. This patch adds generic_buffers_fsync() & generic_buffers_fsync_noflush() implementations for use in filesystems like ext2 & ext4 respectively. Later we can review other filesystems as well to see if we can make generic_buffers_fsync/_noflush() which does not take any inode_lock() as the default path. Tested-by: Disha Goel Reviewed-by: Christoph Hellwig Signed-off-by: Ritesh Harjani (IBM) Signed-off-by: Jan Kara Message-Id: --- include/linux/buffer_head.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux/buffer_head.h') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 1520793c72da..1bd73cefd311 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -217,6 +217,10 @@ int inode_has_buffers(struct inode *); void invalidate_inode_buffers(struct inode *); int remove_inode_buffers(struct inode *inode); int sync_mapping_buffers(struct address_space *mapping); +int generic_buffers_fsync_noflush(struct file *file, loff_t start, loff_t end, + bool datasync); +int generic_buffers_fsync(struct file *file, loff_t start, loff_t end, + bool datasync); void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len); static inline void clean_bdev_bh_alias(struct buffer_head *bh) -- cgit v1.2.3 From 53418a18fcbbb086dbfacbdd9b853c1071d3ec16 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 12 Jun 2023 22:01:31 +0100 Subject: buffer: convert __block_write_full_page() to __block_write_full_folio() Remove nine hidden calls to compound_head() by using a folio instead of a page. Link: https://lkml.kernel.org/r/20230612210141.730128-5-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Tested-by: Bob Peterson Reviewed-by: Bob Peterson Cc: Andreas Gruenbacher Cc: Hannes Reinecke Cc: Luis Chamberlain Signed-off-by: Andrew Morton --- fs/buffer.c | 53 +++++++++++++++++++++++---------------------- fs/gfs2/aops.c | 5 ++--- fs/ntfs/aops.c | 2 +- fs/reiserfs/inode.c | 2 +- include/linux/buffer_head.h | 2 +- 5 files changed, 32 insertions(+), 32 deletions(-) (limited to 'include/linux/buffer_head.h') diff --git a/fs/buffer.c b/fs/buffer.c index a7fc561758b1..4d518df50fab 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1764,7 +1764,7 @@ static struct buffer_head *folio_create_buffers(struct folio *folio, * WB_SYNC_ALL, the writes are posted using REQ_SYNC; this * causes the writes to be flagged as synchronous writes. */ -int __block_write_full_page(struct inode *inode, struct page *page, +int __block_write_full_folio(struct inode *inode, struct folio *folio, get_block_t *get_block, struct writeback_control *wbc, bh_end_io_t *handler) { @@ -1776,14 +1776,14 @@ int __block_write_full_page(struct inode *inode, struct page *page, int nr_underway = 0; blk_opf_t write_flags = wbc_to_write_flags(wbc); - head = folio_create_buffers(page_folio(page), inode, + head = folio_create_buffers(folio, inode, (1 << BH_Dirty) | (1 << BH_Uptodate)); /* * Be very careful. We have no exclusion from block_dirty_folio * here, and the (potentially unmapped) buffers may become dirty at * any time. If a buffer becomes dirty here after we've inspected it - * then we just miss that fact, and the page stays dirty. + * then we just miss that fact, and the folio stays dirty. * * Buffers outside i_size may be dirtied by block_dirty_folio; * handle that here by just cleaning them. @@ -1793,7 +1793,7 @@ int __block_write_full_page(struct inode *inode, struct page *page, blocksize = bh->b_size; bbits = block_size_bits(blocksize); - block = (sector_t)page->index << (PAGE_SHIFT - bbits); + block = (sector_t)folio->index << (PAGE_SHIFT - bbits); last_block = (i_size_read(inode) - 1) >> bbits; /* @@ -1804,7 +1804,7 @@ int __block_write_full_page(struct inode *inode, struct page *page, if (block > last_block) { /* * mapped buffers outside i_size will occur, because - * this page can be outside i_size when there is a + * this folio can be outside i_size when there is a * truncate in progress. */ /* @@ -1834,7 +1834,7 @@ int __block_write_full_page(struct inode *inode, struct page *page, continue; /* * If it's a fully non-blocking write attempt and we cannot - * lock the buffer then redirty the page. Note that this can + * lock the buffer then redirty the folio. Note that this can * potentially cause a busy-wait loop from writeback threads * and kswapd activity, but those code paths have their own * higher-level throttling. @@ -1842,7 +1842,7 @@ int __block_write_full_page(struct inode *inode, struct page *page, if (wbc->sync_mode != WB_SYNC_NONE) { lock_buffer(bh); } else if (!trylock_buffer(bh)) { - redirty_page_for_writepage(wbc, page); + folio_redirty_for_writepage(wbc, folio); continue; } if (test_clear_buffer_dirty(bh)) { @@ -1853,11 +1853,11 @@ int __block_write_full_page(struct inode *inode, struct page *page, } while ((bh = bh->b_this_page) != head); /* - * The page and its buffers are protected by PageWriteback(), so we can - * drop the bh refcounts early. + * The folio and its buffers are protected by the writeback flag, + * so we can drop the bh refcounts early. */ - BUG_ON(PageWriteback(page)); - set_page_writeback(page); + BUG_ON(folio_test_writeback(folio)); + folio_start_writeback(folio); do { struct buffer_head *next = bh->b_this_page; @@ -1867,20 +1867,20 @@ int __block_write_full_page(struct inode *inode, struct page *page, } bh = next; } while (bh != head); - unlock_page(page); + folio_unlock(folio); err = 0; done: if (nr_underway == 0) { /* - * The page was marked dirty, but the buffers were + * The folio was marked dirty, but the buffers were * clean. Someone wrote them back by hand with * write_dirty_buffer/submit_bh. A rare case. */ - end_page_writeback(page); + folio_end_writeback(folio); /* - * The page and buffer_heads can be released at any time from + * The folio and buffer_heads can be released at any time from * here on. */ } @@ -1891,7 +1891,7 @@ recover: * ENOSPC, or some other error. We may already have added some * blocks to the file, so we need to write these out to avoid * exposing stale data. - * The page is currently locked and not marked for writeback + * The folio is currently locked and not marked for writeback */ bh = head; /* Recovery: lock and submit the mapped buffers */ @@ -1903,15 +1903,15 @@ recover: } else { /* * The buffer may have been set dirty during - * attachment to a dirty page. + * attachment to a dirty folio. */ clear_buffer_dirty(bh); } } while ((bh = bh->b_this_page) != head); - SetPageError(page); - BUG_ON(PageWriteback(page)); - mapping_set_error(page->mapping, err); - set_page_writeback(page); + folio_set_error(folio); + BUG_ON(folio_test_writeback(folio)); + mapping_set_error(folio->mapping, err); + folio_start_writeback(folio); do { struct buffer_head *next = bh->b_this_page; if (buffer_async_write(bh)) { @@ -1921,10 +1921,10 @@ recover: } bh = next; } while (bh != head); - unlock_page(page); + folio_unlock(folio); goto done; } -EXPORT_SYMBOL(__block_write_full_page); +EXPORT_SYMBOL(__block_write_full_folio); /* * If a page has any new buffers, zero them out here, and mark them uptodate @@ -2677,6 +2677,7 @@ EXPORT_SYMBOL(block_truncate_page); int block_write_full_page(struct page *page, get_block_t *get_block, struct writeback_control *wbc) { + struct folio *folio = page_folio(page); struct inode * const inode = page->mapping->host; loff_t i_size = i_size_read(inode); const pgoff_t end_index = i_size >> PAGE_SHIFT; @@ -2684,13 +2685,13 @@ int block_write_full_page(struct page *page, get_block_t *get_block, /* Is the page fully inside i_size? */ if (page->index < end_index) - return __block_write_full_page(inode, page, get_block, wbc, + return __block_write_full_folio(inode, folio, get_block, wbc, end_buffer_async_write); /* Is the page fully outside i_size? (truncate in progress) */ offset = i_size & (PAGE_SIZE-1); if (page->index >= end_index+1 || !offset) { - unlock_page(page); + folio_unlock(folio); return 0; /* don't care */ } @@ -2702,7 +2703,7 @@ int block_write_full_page(struct page *page, get_block_t *get_block, * writes to that region are not written out to the file." */ zero_user_segment(page, offset, PAGE_SIZE); - return __block_write_full_page(inode, page, get_block, wbc, + return __block_write_full_folio(inode, folio, get_block, wbc, end_buffer_async_write); } EXPORT_SYMBOL(block_write_full_page); diff --git a/fs/gfs2/aops.c b/fs/gfs2/aops.c index ec5b5c1ea634..3a2be1901e1e 100644 --- a/fs/gfs2/aops.c +++ b/fs/gfs2/aops.c @@ -107,9 +107,8 @@ static int gfs2_write_jdata_folio(struct folio *folio, folio_zero_segment(folio, offset_in_folio(folio, i_size), folio_size(folio)); - return __block_write_full_page(inode, &folio->page, - gfs2_get_block_noalloc, wbc, - end_buffer_async_write); + return __block_write_full_folio(inode, folio, gfs2_get_block_noalloc, + wbc, end_buffer_async_write); } /** diff --git a/fs/ntfs/aops.c b/fs/ntfs/aops.c index e8aeba124a95..4e158bce4192 100644 --- a/fs/ntfs/aops.c +++ b/fs/ntfs/aops.c @@ -526,7 +526,7 @@ err_out: * * Return 0 on success and -errno on error. * - * Based on ntfs_read_block() and __block_write_full_page(). + * Based on ntfs_read_block() and __block_write_full_folio(). */ static int ntfs_write_block(struct page *page, struct writeback_control *wbc) { diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index d8debbb6105f..ff34ee49106f 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -2506,7 +2506,7 @@ out: /* * mason@suse.com: updated in 2.5.54 to follow the same general io - * start/recovery path as __block_write_full_page, along with special + * start/recovery path as __block_write_full_folio, along with special * code to handle reiserfs tails. */ static int reiserfs_write_full_page(struct page *page, diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 1520793c72da..a366e01f8bd4 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -263,7 +263,7 @@ extern int buffer_heads_over_limit; void block_invalidate_folio(struct folio *folio, size_t offset, size_t length); int block_write_full_page(struct page *page, get_block_t *get_block, struct writeback_control *wbc); -int __block_write_full_page(struct inode *inode, struct page *page, +int __block_write_full_folio(struct inode *inode, struct folio *folio, get_block_t *get_block, struct writeback_control *wbc, bh_end_io_t *handler); int block_read_full_folio(struct folio *, get_block_t *); -- cgit v1.2.3 From 4a9622f2fdaee84c373f3f285d898a3ea60ee9f2 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 12 Jun 2023 22:01:36 +0100 Subject: buffer: convert page_zero_new_buffers() to folio_zero_new_buffers() Most of the callers already have a folio; convert reiserfs_write_end() to have a folio. Removes a couple of hidden calls to compound_head(). Link: https://lkml.kernel.org/r/20230612210141.730128-10-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Andreas Gruenbacher Cc: Bob Peterson Cc: Hannes Reinecke Cc: Luis Chamberlain Signed-off-by: Andrew Morton --- fs/buffer.c | 27 ++++++++++++++------------- fs/ext4/inode.c | 4 ++-- fs/reiserfs/inode.c | 7 ++++--- include/linux/buffer_head.h | 2 +- 4 files changed, 21 insertions(+), 19 deletions(-) (limited to 'include/linux/buffer_head.h') diff --git a/fs/buffer.c b/fs/buffer.c index 97c64b05151f..e4bd465ecee8 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1927,33 +1927,34 @@ recover: EXPORT_SYMBOL(__block_write_full_folio); /* - * If a page has any new buffers, zero them out here, and mark them uptodate + * If a folio has any new buffers, zero them out here, and mark them uptodate * and dirty so they'll be written out (in order to prevent uninitialised * block data from leaking). And clear the new bit. */ -void page_zero_new_buffers(struct page *page, unsigned from, unsigned to) +void folio_zero_new_buffers(struct folio *folio, size_t from, size_t to) { - unsigned int block_start, block_end; + size_t block_start, block_end; struct buffer_head *head, *bh; - BUG_ON(!PageLocked(page)); - if (!page_has_buffers(page)) + BUG_ON(!folio_test_locked(folio)); + head = folio_buffers(folio); + if (!head) return; - bh = head = page_buffers(page); + bh = head; block_start = 0; do { block_end = block_start + bh->b_size; if (buffer_new(bh)) { if (block_end > from && block_start < to) { - if (!PageUptodate(page)) { - unsigned start, size; + if (!folio_test_uptodate(folio)) { + size_t start, xend; start = max(from, block_start); - size = min(to, block_end) - start; + xend = min(to, block_end); - zero_user(page, start, size); + folio_zero_segment(folio, start, xend); set_buffer_uptodate(bh); } @@ -1966,7 +1967,7 @@ void page_zero_new_buffers(struct page *page, unsigned from, unsigned to) bh = bh->b_this_page; } while (bh != head); } -EXPORT_SYMBOL(page_zero_new_buffers); +EXPORT_SYMBOL(folio_zero_new_buffers); static void iomap_to_bh(struct inode *inode, sector_t block, struct buffer_head *bh, @@ -2104,7 +2105,7 @@ int __block_write_begin_int(struct folio *folio, loff_t pos, unsigned len, err = -EIO; } if (unlikely(err)) - page_zero_new_buffers(&folio->page, from, to); + folio_zero_new_buffers(folio, from, to); return err; } @@ -2208,7 +2209,7 @@ int block_write_end(struct file *file, struct address_space *mapping, if (!folio_test_uptodate(folio)) copied = 0; - page_zero_new_buffers(&folio->page, start+copied, start+len); + folio_zero_new_buffers(folio, start+copied, start+len); } flush_dcache_folio(folio); diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c index ce5f21b6c2b3..31b839a0ce8b 100644 --- a/fs/ext4/inode.c +++ b/fs/ext4/inode.c @@ -1093,7 +1093,7 @@ static int ext4_block_write_begin(struct folio *folio, loff_t pos, unsigned len, err = -EIO; } if (unlikely(err)) { - page_zero_new_buffers(&folio->page, from, to); + folio_zero_new_buffers(folio, from, to); } else if (fscrypt_inode_uses_fs_layer_crypto(inode)) { for (i = 0; i < nr_wait; i++) { int err2; @@ -1339,7 +1339,7 @@ static int ext4_write_end(struct file *file, } /* - * This is a private version of page_zero_new_buffers() which doesn't + * This is a private version of folio_zero_new_buffers() which doesn't * set the buffer to be dirty, since in data=journalled mode we need * to call ext4_dirty_journalled_data() instead. */ diff --git a/fs/reiserfs/inode.c b/fs/reiserfs/inode.c index ff34ee49106f..77bd3b27059f 100644 --- a/fs/reiserfs/inode.c +++ b/fs/reiserfs/inode.c @@ -2872,6 +2872,7 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping, loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata) { + struct folio *folio = page_folio(page); struct inode *inode = page->mapping->host; int ret = 0; int update_sd = 0; @@ -2887,12 +2888,12 @@ static int reiserfs_write_end(struct file *file, struct address_space *mapping, start = pos & (PAGE_SIZE - 1); if (unlikely(copied < len)) { - if (!PageUptodate(page)) + if (!folio_test_uptodate(folio)) copied = 0; - page_zero_new_buffers(page, start + copied, start + len); + folio_zero_new_buffers(folio, start + copied, start + len); } - flush_dcache_page(page); + flush_dcache_folio(folio); reiserfs_commit_page(inode, page, start, start + copied); diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index a366e01f8bd4..c794ea7096ba 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -278,7 +278,7 @@ int block_write_end(struct file *, struct address_space *, int generic_write_end(struct file *, struct address_space *, loff_t, unsigned, unsigned, struct page *, void *); -void page_zero_new_buffers(struct page *page, unsigned from, unsigned to); +void folio_zero_new_buffers(struct folio *folio, size_t from, size_t to); void clean_page_buffers(struct page *page); int cont_write_begin(struct file *, struct address_space *, loff_t, unsigned, struct page **, void **, -- cgit v1.2.3