From 224941e8379a0de8652ffec768cc8394f0b1cb95 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 11 Apr 2024 15:53:37 +0100 Subject: use ->bd_mapping instead of ->bd_inode->i_mapping Just the low-hanging fruit... Signed-off-by: Al Viro Link: https://lore.kernel.org/r/20240411145346.2516848-2-viro@zeniv.linux.org.uk Signed-off-by: Christian Brauner --- fs/buffer.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index 4f73d23c2c46..d5a0932ae68d 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1463,7 +1463,7 @@ __bread_gfp(struct block_device *bdev, sector_t block, { struct buffer_head *bh; - gfp |= mapping_gfp_constraint(bdev->bd_inode->i_mapping, ~__GFP_FS); + gfp |= mapping_gfp_constraint(bdev->bd_mapping, ~__GFP_FS); /* * Prefer looping in the allocator rather than here, at least that -- cgit v1.2.3 From 22f89a4f8c049b884c320bc7477397916ee97b63 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 11 Apr 2024 15:53:38 +0100 Subject: grow_dev_folio(): we only want ->bd_inode->i_mapping there Signed-off-by: Al Viro Link: https://lore.kernel.org/r/20240411145346.2516848-3-viro@zeniv.linux.org.uk Reviewed-by: Matthew Wilcox (Oracle) Signed-off-by: Christian Brauner --- fs/buffer.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index d5a0932ae68d..78a4e95ba2f2 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1034,12 +1034,12 @@ static sector_t folio_init_buffers(struct folio *folio, static bool grow_dev_folio(struct block_device *bdev, sector_t block, pgoff_t index, unsigned size, gfp_t gfp) { - struct inode *inode = bdev->bd_inode; + struct address_space *mapping = bdev->bd_mapping; struct folio *folio; struct buffer_head *bh; sector_t end_block = 0; - folio = __filemap_get_folio(inode->i_mapping, index, + folio = __filemap_get_folio(mapping, index, FGP_LOCK | FGP_ACCESSED | FGP_CREAT, gfp); if (IS_ERR(folio)) return false; @@ -1073,10 +1073,10 @@ static bool grow_dev_folio(struct block_device *bdev, sector_t block, * lock to be atomic wrt __find_get_block(), which does not * run under the folio lock. */ - spin_lock(&inode->i_mapping->i_private_lock); + spin_lock(&mapping->i_private_lock); link_dev_buffers(folio, bh); end_block = folio_init_buffers(folio, bdev, size); - spin_unlock(&inode->i_mapping->i_private_lock); + spin_unlock(&mapping->i_private_lock); unlock: folio_unlock(folio); folio_put(folio); -- cgit v1.2.3 From 53cd4cd3b12d8a62719bef950f298a2bc5a6b415 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 28 Apr 2024 19:41:13 -0400 Subject: fs/buffer.c: massage the remaining users of ->bd_inode to ->bd_mapping both for ->i_blkbits and both want the address_space in question anyway. Signed-off-by: Al Viro --- fs/buffer.c | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index 78a4e95ba2f2..ac29e0f221bc 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -189,8 +189,8 @@ EXPORT_SYMBOL(end_buffer_write_sync); static struct buffer_head * __find_get_block_slow(struct block_device *bdev, sector_t block) { - struct inode *bd_inode = bdev->bd_inode; - struct address_space *bd_mapping = bd_inode->i_mapping; + struct address_space *bd_mapping = bdev->bd_mapping; + const int blkbits = bd_mapping->host->i_blkbits; struct buffer_head *ret = NULL; pgoff_t index; struct buffer_head *bh; @@ -199,7 +199,7 @@ __find_get_block_slow(struct block_device *bdev, sector_t block) int all_mapped = 1; static DEFINE_RATELIMIT_STATE(last_warned, HZ, 1); - index = ((loff_t)block << bd_inode->i_blkbits) / PAGE_SIZE; + index = ((loff_t)block << blkbits) / PAGE_SIZE; folio = __filemap_get_folio(bd_mapping, index, FGP_ACCESSED, 0); if (IS_ERR(folio)) goto out; @@ -233,7 +233,7 @@ __find_get_block_slow(struct block_device *bdev, sector_t block) (unsigned long long)block, (unsigned long long)bh->b_blocknr, bh->b_state, bh->b_size, bdev, - 1 << bd_inode->i_blkbits); + 1 << blkbits); } out_unlock: spin_unlock(&bd_mapping->i_private_lock); @@ -1696,16 +1696,16 @@ EXPORT_SYMBOL(create_empty_buffers); */ void clean_bdev_aliases(struct block_device *bdev, sector_t block, sector_t len) { - struct inode *bd_inode = bdev->bd_inode; - struct address_space *bd_mapping = bd_inode->i_mapping; + struct address_space *bd_mapping = bdev->bd_mapping; + const int blkbits = bd_mapping->host->i_blkbits; struct folio_batch fbatch; - pgoff_t index = ((loff_t)block << bd_inode->i_blkbits) / PAGE_SIZE; + pgoff_t index = ((loff_t)block << blkbits) / PAGE_SIZE; pgoff_t end; int i, count; struct buffer_head *bh; struct buffer_head *head; - end = ((loff_t)(block + len - 1) << bd_inode->i_blkbits) / PAGE_SIZE; + end = ((loff_t)(block + len - 1) << blkbits) / PAGE_SIZE; folio_batch_init(&fbatch); while (filemap_get_folios(bd_mapping, &index, end, &fbatch)) { count = folio_batch_count(&fbatch); -- cgit v1.2.3 From 3814ec89540d9ce1a92cb4c9a6f9f7a0a343d73d Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 16 Apr 2024 04:17:46 +0100 Subject: buffer: add kernel-doc for block_dirty_folio() Turn the excellent documentation for this function into kernel-doc. Replace 'page' with 'folio' and make a few other minor updates. Link: https://lkml.kernel.org/r/20240416031754.4076917-3-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Pankaj Raghav Tested-by: Randy Dunlap Signed-off-by: Andrew Morton --- fs/buffer.c | 55 +++++++++++++++++++++++++++++++------------------------ 1 file changed, 31 insertions(+), 24 deletions(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index 4f73d23c2c46..b08526bdcb54 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -687,30 +687,37 @@ void mark_buffer_dirty_inode(struct buffer_head *bh, struct inode *inode) } EXPORT_SYMBOL(mark_buffer_dirty_inode); -/* - * Add a page to the dirty page list. - * - * It is a sad fact of life that this function is called from several places - * deeply under spinlocking. It may not sleep. - * - * If the page has buffers, the uptodate buffers are set dirty, to preserve - * dirty-state coherency between the page and the buffers. It the page does - * not have buffers then when they are later attached they will all be set - * dirty. - * - * The buffers are dirtied before the page is dirtied. There's a small race - * window in which a writepage caller may see the page cleanness but not the - * buffer dirtiness. That's fine. If this code were to set the page dirty - * before the buffers, a concurrent writepage caller could clear the page dirty - * bit, see a bunch of clean buffers and we'd end up with dirty buffers/clean - * page on the dirty page list. - * - * We use i_private_lock to lock against try_to_free_buffers while using the - * page's buffer list. Also use this to protect against clean buffers being - * added to the page after it was set dirty. - * - * FIXME: may need to call ->reservepage here as well. That's rather up to the - * address_space though. +/** + * block_dirty_folio - Mark a folio as dirty. + * @mapping: The address space containing this folio. + * @folio: The folio to mark dirty. + * + * Filesystems which use buffer_heads can use this function as their + * ->dirty_folio implementation. Some filesystems need to do a little + * work before calling this function. Filesystems which do not use + * buffer_heads should call filemap_dirty_folio() instead. + * + * If the folio has buffers, the uptodate buffers are set dirty, to + * preserve dirty-state coherency between the folio and the buffers. + * Buffers added to a dirty folio are created dirty. + * + * The buffers are dirtied before the folio is dirtied. There's a small + * race window in which writeback may see the folio cleanness but not the + * buffer dirtiness. That's fine. If this code were to set the folio + * dirty before the buffers, writeback could clear the folio dirty flag, + * see a bunch of clean buffers and we'd end up with dirty buffers/clean + * folio on the dirty folio list. + * + * We use i_private_lock to lock against try_to_free_buffers() while + * using the folio's buffer list. This also prevents clean buffers + * being added to the folio after it was set dirty. + * + * Context: May only be called from process context. Does not sleep. + * Caller must ensure that @folio cannot be truncated during this call, + * typically by holding the folio lock or having a page in the folio + * mapped and holding the page table lock. + * + * Return: True if the folio was dirtied; false if it was already dirtied. */ bool block_dirty_folio(struct address_space *mapping, struct folio *folio) { -- cgit v1.2.3 From b1888d143203589b71ab31b39d1070737287bc79 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 16 Apr 2024 04:17:47 +0100 Subject: buffer: add kernel-doc for try_to_free_buffers() The documentation for this function has become separated from it over time; move it to the right place and turn it into kernel-doc. Mild editing of the content to make it more about what the function does, and less about how it does it. Link: https://lkml.kernel.org/r/20240416031754.4076917-4-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Pankaj Raghav Tested-by: Randy Dunlap Signed-off-by: Andrew Morton --- fs/buffer.c | 44 ++++++++++++++++++++++++-------------------- 1 file changed, 24 insertions(+), 20 deletions(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index b08526bdcb54..0466ed7ed95a 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -2868,26 +2868,6 @@ int sync_dirty_buffer(struct buffer_head *bh) } EXPORT_SYMBOL(sync_dirty_buffer); -/* - * try_to_free_buffers() checks if all the buffers on this particular folio - * are unused, and releases them if so. - * - * Exclusion against try_to_free_buffers may be obtained by either - * locking the folio or by holding its mapping's i_private_lock. - * - * If the folio is dirty but all the buffers are clean then we need to - * be sure to mark the folio clean as well. This is because the folio - * may be against a block device, and a later reattachment of buffers - * to a dirty folio will set *all* buffers dirty. Which would corrupt - * filesystem data on the same device. - * - * The same applies to regular filesystem folios: if all the buffers are - * clean then we set the folio clean and proceed. To do that, we require - * total exclusion from block_dirty_folio(). That is obtained with - * i_private_lock. - * - * try_to_free_buffers() is non-blocking. - */ static inline int buffer_busy(struct buffer_head *bh) { return atomic_read(&bh->b_count) | @@ -2921,6 +2901,30 @@ failed: return false; } +/** + * try_to_free_buffers - Release buffers attached to this folio. + * @folio: The folio. + * + * If any buffers are in use (dirty, under writeback, elevated refcount), + * no buffers will be freed. + * + * If the folio is dirty but all the buffers are clean then we need to + * be sure to mark the folio clean as well. This is because the folio + * may be against a block device, and a later reattachment of buffers + * to a dirty folio will set *all* buffers dirty. Which would corrupt + * filesystem data on the same device. + * + * The same applies to regular filesystem folios: if all the buffers are + * clean then we set the folio clean and proceed. To do that, we require + * total exclusion from block_dirty_folio(). That is obtained with + * i_private_lock. + * + * Exclusion against try_to_free_buffers may be obtained by either + * locking the folio or by holding its mapping's i_private_lock. + * + * Context: Process context. @folio must be locked. Will not sleep. + * Return: true if all buffers attached to this folio were freed. + */ bool try_to_free_buffers(struct folio *folio) { struct address_space * const mapping = folio->mapping; -- cgit v1.2.3 From 324ecaee46f86c1eaf083fd82eaf997335e70163 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 16 Apr 2024 04:17:48 +0100 Subject: buffer: fix __bread and __bread_gfp kernel-doc The extra indentation confused the kernel-doc parser, so remove it. Fix some other wording while I'm here, and advise the user they need to call brelse() on this buffer. __bread_gfp() isn't used directly by filesystems, but the other wrappers for it don't have documentation, so document it accordingly. Link: https://lkml.kernel.org/r/20240416031754.4076917-5-willy@infradead.org Co-developed-by: Pankaj Raghav Signed-off-by: Pankaj Raghav Signed-off-by: Matthew Wilcox (Oracle) Tested-by: Randy Dunlap Signed-off-by: Andrew Morton --- fs/buffer.c | 35 ++++++++++++++++++++++------------- include/linux/buffer_head.h | 22 +++++++++++++--------- 2 files changed, 35 insertions(+), 22 deletions(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index 0466ed7ed95a..32ab3eddc44f 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1453,20 +1453,29 @@ void __breadahead(struct block_device *bdev, sector_t block, unsigned size) EXPORT_SYMBOL(__breadahead); /** - * __bread_gfp() - reads a specified block and returns the bh - * @bdev: the block_device to read from - * @block: number of block - * @size: size (in bytes) to read - * @gfp: page allocation flag - * - * Reads a specified block, and returns buffer head that contains it. - * The page cache can be allocated from non-movable area - * not to prevent page migration if you set gfp to zero. - * It returns NULL if the block was unreadable. + * __bread_gfp() - Read a block. + * @bdev: The block device to read from. + * @block: Block number in units of block size. + * @size: The block size of this device in bytes. + * @gfp: Not page allocation flags; see below. + * + * You are not expected to call this function. You should use one of + * sb_bread(), sb_bread_unmovable() or __bread(). + * + * Read a specified block, and return the buffer head that refers to it. + * If @gfp is 0, the memory will be allocated using the block device's + * default GFP flags. If @gfp is __GFP_MOVABLE, the memory may be + * allocated from a movable area. Do not pass in a complete set of + * GFP flags. + * + * The returned buffer head has its refcount increased. The caller should + * call brelse() when it has finished with the buffer. + * + * Context: May sleep waiting for I/O. + * Return: NULL if the block was unreadable. */ -struct buffer_head * -__bread_gfp(struct block_device *bdev, sector_t block, - unsigned size, gfp_t gfp) +struct buffer_head *__bread_gfp(struct block_device *bdev, sector_t block, + unsigned size, gfp_t gfp) { struct buffer_head *bh; diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index d78454a4dd1f..56a1e9c1e71e 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -437,17 +437,21 @@ static inline void bh_readahead_batch(int nr, struct buffer_head *bhs[], } /** - * __bread() - reads a specified block and returns the bh - * @bdev: the block_device to read from - * @block: number of block - * @size: size (in bytes) to read + * __bread() - Read a block. + * @bdev: The block device to read from. + * @block: Block number in units of block size. + * @size: The block size of this device in bytes. * - * Reads a specified block, and returns buffer head that contains it. - * The page cache is allocated from movable area so that it can be migrated. - * It returns NULL if the block was unreadable. + * Read a specified block, and return the buffer head that refers + * to it. The memory is allocated from the movable area so that it can + * be migrated. The returned buffer head has its refcount increased. + * The caller should call brelse() when it has finished with the buffer. + * + * Context: May sleep waiting for I/O. + * Return: NULL if the block was unreadable. */ -static inline struct buffer_head * -__bread(struct block_device *bdev, sector_t block, unsigned size) +static inline struct buffer_head *__bread(struct block_device *bdev, + sector_t block, unsigned size) { return __bread_gfp(bdev, block, size, __GFP_MOVABLE); } -- cgit v1.2.3 From 66924fdaf835f5fc6fe78d92a79afcca7d4db7ec Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 16 Apr 2024 04:17:49 +0100 Subject: buffer: add kernel-doc for brelse() and __brelse() Move the documentation for __brelse() to brelse(), format it as kernel-doc and update it from talking about pages to folios. Link: https://lkml.kernel.org/r/20240416031754.4076917-6-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Tested-by: Randy Dunlap Cc: Pankaj Raghav Signed-off-by: Andrew Morton --- fs/buffer.c | 17 ++++++++--------- include/linux/buffer_head.h | 16 ++++++++++++++++ 2 files changed, 24 insertions(+), 9 deletions(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index 32ab3eddc44f..e5beca3868a7 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1226,17 +1226,16 @@ void mark_buffer_write_io_error(struct buffer_head *bh) } EXPORT_SYMBOL(mark_buffer_write_io_error); -/* - * Decrement a buffer_head's reference count. If all buffers against a page - * have zero reference count, are clean and unlocked, and if the page is clean - * and unlocked then try_to_free_buffers() may strip the buffers from the page - * in preparation for freeing it (sometimes, rarely, buffers are removed from - * a page but it ends up not being freed, and buffers may later be reattached). +/** + * __brelse - Release a buffer. + * @bh: The buffer to release. + * + * This variant of brelse() can be called if @bh is guaranteed to not be NULL. */ -void __brelse(struct buffer_head * buf) +void __brelse(struct buffer_head *bh) { - if (atomic_read(&buf->b_count)) { - put_bh(buf); + if (atomic_read(&bh->b_count)) { + put_bh(bh); return; } WARN(1, KERN_ERR "VFS: brelse: Trying to free free buffer\n"); diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 56a1e9c1e71e..c145817c6ca0 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -303,6 +303,22 @@ static inline void put_bh(struct buffer_head *bh) atomic_dec(&bh->b_count); } +/** + * brelse - Release a buffer. + * @bh: The buffer to release. + * + * Decrement a buffer_head's reference count. If @bh is NULL, this + * function is a no-op. + * + * If all buffers on a folio have zero reference count, are clean + * and unlocked, and if the folio is unlocked and not under writeback + * then try_to_free_buffers() may strip the buffers from the folio in + * preparation for freeing it (sometimes, rarely, buffers are removed + * from a folio but it ends up not being freed, and buffers may later + * be reattached). + * + * Context: Any context. + */ static inline void brelse(struct buffer_head *bh) { if (bh) -- cgit v1.2.3 From b73a936f99914e23fbe236f75ecf257923cb06e7 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 16 Apr 2024 04:17:50 +0100 Subject: buffer: add kernel-doc for bforget() and __bforget() Distinguish these functions from brelse() and __brelse(). Link: https://lkml.kernel.org/r/20240416031754.4076917-7-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Tested-by: Randy Dunlap Cc: Pankaj Raghav Signed-off-by: Andrew Morton --- fs/buffer.c | 9 ++++++--- include/linux/buffer_head.h | 10 ++++++++++ 2 files changed, 16 insertions(+), 3 deletions(-) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index e5beca3868a7..60829312787a 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1242,9 +1242,12 @@ void __brelse(struct buffer_head *bh) } EXPORT_SYMBOL(__brelse); -/* - * bforget() is like brelse(), except it discards any - * potentially dirty data. +/** + * __bforget - Discard any dirty data in a buffer. + * @bh: The buffer to forget. + * + * This variant of bforget() can be called if @bh is guaranteed to not + * be NULL. */ void __bforget(struct buffer_head *bh) { diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index c145817c6ca0..a1c0bdd0cca6 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -325,6 +325,16 @@ static inline void brelse(struct buffer_head *bh) __brelse(bh); } +/** + * bforget - Discard any dirty data in a buffer. + * @bh: The buffer to forget. + * + * Call this function instead of brelse() if the data written to a buffer + * no longer needs to be written back. It will clear the buffer's dirty + * flag so writeback of this buffer will be skipped. + * + * Context: Any context. + */ static inline void bforget(struct buffer_head *bh) { if (bh) -- cgit v1.2.3 From 0b116ff4dc40ec84ce4bfd451436e66ab2bbc86d Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 16 Apr 2024 04:17:51 +0100 Subject: buffer: improve bdev_getblk documentation Add some more information about the state of the buffer_head returned. Link: https://lkml.kernel.org/r/20240416031754.4076917-8-willy@infradead.org Signed-off-by: Matthew Wilcox (Oracle) Cc: Pankaj Raghav Signed-off-by: Andrew Morton --- fs/buffer.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'fs/buffer.c') diff --git a/fs/buffer.c b/fs/buffer.c index 60829312787a..ed698caa8834 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -1424,6 +1424,11 @@ EXPORT_SYMBOL(__find_get_block); * @size: The size of buffer_heads for this @bdev. * @gfp: The memory allocation flags to use. * + * The returned buffer head has its reference count incremented, but is + * not locked. The caller should call brelse() when it has finished + * with the buffer. The buffer may not be uptodate. If needed, the + * caller can bring it uptodate either by reading it or overwriting it. + * * Return: The buffer head, or NULL if memory could not be allocated. */ struct buffer_head *bdev_getblk(struct block_device *bdev, sector_t block, -- cgit v1.2.3