From 2e7e80f7e7e9dbbb3c2a85ee923ca32826052816 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 9 Feb 2022 20:21:27 +0000 Subject: fs: Convert is_partially_uptodate to folios Since the uptodate property is maintained on a per-folio basis, the is_partially_uptodate method should also take a folio. Fix the types at the same time so it's clear that it returns true/false and takes the count in bytes, not blocks. Signed-off-by: Matthew Wilcox (Oracle) Tested-by: Damien Le Moal Acked-by: Damien Le Moal Tested-by: Mike Marshall # orangefs Tested-by: David Howells # afs --- Documentation/filesystems/locking.rst | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'Documentation/filesystems/locking.rst') diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst index 3f9b1497ebb8..88b33524687f 100644 --- a/Documentation/filesystems/locking.rst +++ b/Documentation/filesystems/locking.rst @@ -258,7 +258,7 @@ prototypes:: int (*migratepage)(struct address_space *, struct page *, struct page *); void (*putback_page) (struct page *); int (*launder_page)(struct page *); - int (*is_partially_uptodate)(struct page *, unsigned long, unsigned long); + bool (*is_partially_uptodate)(struct folio *, size_t from, size_t count); int (*error_remove_page)(struct address_space *, struct page *); int (*swap_activate)(struct file *); int (*swap_deactivate)(struct file *); -- cgit v1.2.3 From 128d1f8241d62ab014eef6dd4ef9bb977dbeadb2 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 9 Feb 2022 20:21:32 +0000 Subject: fs: Add invalidate_folio() aops method This is used in preference to invalidatepage, if defined. Signed-off-by: Matthew Wilcox (Oracle) Tested-by: Damien Le Moal Acked-by: Damien Le Moal Tested-by: Mike Marshall # orangefs Tested-by: David Howells # afs --- Documentation/filesystems/locking.rst | 13 +++++++------ Documentation/filesystems/vfs.rst | 11 ++++++----- include/linux/fs.h | 1 + mm/truncate.c | 8 +++++++- 4 files changed, 21 insertions(+), 12 deletions(-) (limited to 'Documentation/filesystems/locking.rst') diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst index 88b33524687f..29a045fd3860 100644 --- a/Documentation/filesystems/locking.rst +++ b/Documentation/filesystems/locking.rst @@ -250,6 +250,7 @@ prototypes:: loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata); sector_t (*bmap)(struct address_space *, sector_t); + void (*invalidate_folio) (struct folio *, size_t start, size_t len); void (*invalidatepage) (struct page *, unsigned int, unsigned int); int (*releasepage) (struct page *, int); void (*freepage)(struct page *); @@ -278,6 +279,7 @@ readpages: no shared write_begin: locks the page exclusive write_end: yes, unlocks exclusive bmap: +invalidate_folio: yes exclusive invalidatepage: yes exclusive releasepage: yes freepage: yes @@ -370,13 +372,12 @@ not locked. filesystems and by the swapper. The latter will eventually go away. Please, keep it that way and don't breed new callers. -->invalidatepage() is called when the filesystem must attempt to drop +->invalidate_folio() is called when the filesystem must attempt to drop some or all of the buffers from the page when it is being truncated. It -returns zero on success. If ->invalidatepage is zero, the kernel uses -block_invalidatepage() instead. The filesystem must exclusively acquire -invalidate_lock before invalidating page cache in truncate / hole punch path -(and thus calling into ->invalidatepage) to block races between page cache -invalidation and page cache filling functions (fault, read, ...). +returns zero on success. The filesystem must exclusively acquire +invalidate_lock before invalidating page cache in truncate / hole punch +path (and thus calling into ->invalidate_folio) to block races between page +cache invalidation and page cache filling functions (fault, read, ...). ->releasepage() is called when the kernel is about to try to drop the buffers from the page in preparation for freeing it. It returns zero to diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst index da3e7b470f0a..26c090cd8cf5 100644 --- a/Documentation/filesystems/vfs.rst +++ b/Documentation/filesystems/vfs.rst @@ -735,6 +735,7 @@ cache in your filesystem. The following members are defined: loff_t pos, unsigned len, unsigned copied, struct page *page, void *fsdata); sector_t (*bmap)(struct address_space *, sector_t); + void (*invalidate_folio) (struct folio *, size_t start, size_t len); void (*invalidatepage) (struct page *, unsigned int, unsigned int); int (*releasepage) (struct page *, int); void (*freepage)(struct page *); @@ -868,15 +869,15 @@ cache in your filesystem. The following members are defined: to find out where the blocks in the file are and uses those addresses directly. -``invalidatepage`` - If a page has PagePrivate set, then invalidatepage will be - called when part or all of the page is to be removed from the +``invalidate_folio`` + If a folio has private data, then invalidate_folio will be + called when part or all of the folio is to be removed from the address space. This generally corresponds to either a truncation, punch hole or a complete invalidation of the address space (in the latter case 'offset' will always be 0 and 'length' - will be PAGE_SIZE). Any private data associated with the page + will be folio_size()). Any private data associated with the page should be updated to reflect this truncation. If offset is 0 - and length is PAGE_SIZE, then the private data should be + and length is folio_size(), then the private data should be released, because the page must be able to be completely discarded. This may be done by calling the ->releasepage function, but in this case the release MUST succeed. diff --git a/include/linux/fs.h b/include/linux/fs.h index 5939e6694ada..bcdb613cd652 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -387,6 +387,7 @@ struct address_space_operations { /* Unfortunately this kludge is needed for FIBMAP. Don't use it */ sector_t (*bmap)(struct address_space *, sector_t); + void (*invalidate_folio) (struct folio *, size_t offset, size_t len); void (*invalidatepage) (struct page *, unsigned int, unsigned int); int (*releasepage) (struct page *, gfp_t); void (*freepage)(struct page *); diff --git a/mm/truncate.c b/mm/truncate.c index aa0ed373789d..b9ad298e6ce7 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -154,9 +154,15 @@ static int invalidate_exceptional_entry2(struct address_space *mapping, */ void folio_invalidate(struct folio *folio, size_t offset, size_t length) { + const struct address_space_operations *aops = folio->mapping->a_ops; void (*invalidatepage)(struct page *, unsigned int, unsigned int); - invalidatepage = folio->mapping->a_ops->invalidatepage; + if (aops->invalidate_folio) { + aops->invalidate_folio(folio, offset, length); + return; + } + + invalidatepage = aops->invalidatepage; #ifdef CONFIG_BLOCK if (!invalidatepage) invalidatepage = block_invalidatepage; -- cgit v1.2.3 From f50015a596fa106bf642bd85fbf6e6b52cc913d0 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 9 Feb 2022 20:21:51 +0000 Subject: fs: Remove aops->invalidatepage With all users migrated to ->invalidate_folio, remove the old operation. Signed-off-by: Matthew Wilcox (Oracle) Tested-by: Damien Le Moal Acked-by: Damien Le Moal Tested-by: Mike Marshall # orangefs Tested-by: David Howells # afs --- Documentation/filesystems/locking.rst | 2 -- Documentation/filesystems/vfs.rst | 1 - include/linux/fs.h | 1 - mm/truncate.c | 14 +++----------- 4 files changed, 3 insertions(+), 15 deletions(-) (limited to 'Documentation/filesystems/locking.rst') diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst index 29a045fd3860..8e9cbc0fb70f 100644 --- a/Documentation/filesystems/locking.rst +++ b/Documentation/filesystems/locking.rst @@ -251,7 +251,6 @@ prototypes:: struct page *page, void *fsdata); sector_t (*bmap)(struct address_space *, sector_t); void (*invalidate_folio) (struct folio *, size_t start, size_t len); - void (*invalidatepage) (struct page *, unsigned int, unsigned int); int (*releasepage) (struct page *, int); void (*freepage)(struct page *); int (*direct_IO)(struct kiocb *, struct iov_iter *iter); @@ -280,7 +279,6 @@ write_begin: locks the page exclusive write_end: yes, unlocks exclusive bmap: invalidate_folio: yes exclusive -invalidatepage: yes exclusive releasepage: yes freepage: yes direct_IO: diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst index 26c090cd8cf5..28704831652c 100644 --- a/Documentation/filesystems/vfs.rst +++ b/Documentation/filesystems/vfs.rst @@ -736,7 +736,6 @@ cache in your filesystem. The following members are defined: struct page *page, void *fsdata); sector_t (*bmap)(struct address_space *, sector_t); void (*invalidate_folio) (struct folio *, size_t start, size_t len); - void (*invalidatepage) (struct page *, unsigned int, unsigned int); int (*releasepage) (struct page *, int); void (*freepage)(struct page *); ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter); diff --git a/include/linux/fs.h b/include/linux/fs.h index a40ea82248da..af9ae091bd82 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -388,7 +388,6 @@ struct address_space_operations { /* Unfortunately this kludge is needed for FIBMAP. Don't use it */ sector_t (*bmap)(struct address_space *, sector_t); void (*invalidate_folio) (struct folio *, size_t offset, size_t len); - void (*invalidatepage) (struct page *, unsigned int, unsigned int); int (*releasepage) (struct page *, gfp_t); void (*freepage)(struct page *); ssize_t (*direct_IO)(struct kiocb *, struct iov_iter *iter); diff --git a/mm/truncate.c b/mm/truncate.c index 28650151091a..8010461a59bd 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -19,8 +19,7 @@ #include #include #include -#include /* grr. try_to_release_page, - do_invalidatepage */ +#include /* grr. try_to_release_page */ #include #include #include "internal.h" @@ -155,16 +154,9 @@ static int invalidate_exceptional_entry2(struct address_space *mapping, void folio_invalidate(struct folio *folio, size_t offset, size_t length) { const struct address_space_operations *aops = folio->mapping->a_ops; - void (*invalidatepage)(struct page *, unsigned int, unsigned int); - if (aops->invalidate_folio) { + if (aops->invalidate_folio) aops->invalidate_folio(folio, offset, length); - return; - } - - invalidatepage = aops->invalidatepage; - if (invalidatepage) - (*invalidatepage)(&folio->page, offset, length); } EXPORT_SYMBOL_GPL(folio_invalidate); @@ -334,7 +326,7 @@ int invalidate_inode_page(struct page *page) * mapping is large, it is probably the case that the final pages are the most * recently touched, and freeing happens in ascending file offset order. * - * Note that since ->invalidatepage() accepts range to invalidate + * Note that since ->invalidate_folio() accepts range to invalidate * truncate_inode_pages_range is able to handle cases where lend + 1 is not * page aligned properly. */ -- cgit v1.2.3 From affa80e8c6a1df473694c2087259901872309cc4 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 9 Feb 2022 20:21:52 +0000 Subject: fs: Add aops->launder_folio Since the only difference between ->launder_page and ->launder_folio is the type of the pointer, these can safely use a union without affecting bisectability. Signed-off-by: Matthew Wilcox (Oracle) Tested-by: Damien Le Moal Acked-by: Damien Le Moal Tested-by: Mike Marshall # orangefs Tested-by: David Howells # afs --- Documentation/filesystems/locking.rst | 10 +++++----- Documentation/filesystems/vfs.rst | 8 ++++---- include/linux/fs.h | 5 ++++- mm/truncate.c | 8 ++++---- 4 files changed, 17 insertions(+), 14 deletions(-) (limited to 'Documentation/filesystems/locking.rst') diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst index 8e9cbc0fb70f..dee512efb458 100644 --- a/Documentation/filesystems/locking.rst +++ b/Documentation/filesystems/locking.rst @@ -257,7 +257,7 @@ prototypes:: bool (*isolate_page) (struct page *, isolate_mode_t); int (*migratepage)(struct address_space *, struct page *, struct page *); void (*putback_page) (struct page *); - int (*launder_page)(struct page *); + int (*launder_folio)(struct folio *); bool (*is_partially_uptodate)(struct folio *, size_t from, size_t count); int (*error_remove_page)(struct address_space *, struct page *); int (*swap_activate)(struct file *); @@ -285,7 +285,7 @@ direct_IO: isolate_page: yes migratepage: yes (both) putback_page: yes -launder_page: yes +launder_folio: yes is_partially_uptodate: yes error_remove_page: yes swap_activate: no @@ -385,9 +385,9 @@ the kernel assumes that the fs has no private interest in the buffers. ->freepage() is called when the kernel is done dropping the page from the page cache. -->launder_page() may be called prior to releasing a page if -it is still found to be dirty. It returns zero if the page was successfully -cleaned, or an error value if not. Note that in order to prevent the page +->launder_folio() may be called prior to releasing a folio if +it is still found to be dirty. It returns zero if the folio was successfully +cleaned, or an error value if not. Note that in order to prevent the folio getting mapped back in and redirtied, it needs to be kept locked across the entire operation. diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst index 28704831652c..c54ca4d88ed6 100644 --- a/Documentation/filesystems/vfs.rst +++ b/Documentation/filesystems/vfs.rst @@ -745,7 +745,7 @@ cache in your filesystem. The following members are defined: int (*migratepage) (struct page *, struct page *); /* put migration-failed page back to right list */ void (*putback_page) (struct page *); - int (*launder_page) (struct page *); + int (*launder_folio) (struct folio *); bool (*is_partially_uptodate) (struct folio *, size_t from, size_t count); @@ -930,9 +930,9 @@ cache in your filesystem. The following members are defined: ``putback_page`` Called by the VM when isolated page's migration fails. -``launder_page`` - Called before freeing a page - it writes back the dirty page. - To prevent redirtying the page, it is kept locked during the +``launder_folio`` + Called before freeing a folio - it writes back the dirty folio. + To prevent redirtying the folio, it is kept locked during the whole operation. ``is_partially_uptodate`` diff --git a/include/linux/fs.h b/include/linux/fs.h index af9ae091bd82..0af3075cdff2 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -399,7 +399,10 @@ struct address_space_operations { struct page *, struct page *, enum migrate_mode); bool (*isolate_page)(struct page *, isolate_mode_t); void (*putback_page)(struct page *); - int (*launder_page) (struct page *); + union { + int (*launder_page) (struct page *); + int (*launder_folio) (struct folio *); + }; bool (*is_partially_uptodate) (struct folio *, size_t from, size_t count); void (*is_dirty_writeback) (struct page *, bool *, bool *); diff --git a/mm/truncate.c b/mm/truncate.c index 8010461a59bd..6ad44b546dff 100644 --- a/mm/truncate.c +++ b/mm/truncate.c @@ -614,13 +614,13 @@ failed: return 0; } -static int do_launder_folio(struct address_space *mapping, struct folio *folio) +static int folio_launder(struct address_space *mapping, struct folio *folio) { if (!folio_test_dirty(folio)) return 0; - if (folio->mapping != mapping || mapping->a_ops->launder_page == NULL) + if (folio->mapping != mapping || mapping->a_ops->launder_folio == NULL) return 0; - return mapping->a_ops->launder_page(&folio->page); + return mapping->a_ops->launder_folio(folio); } /** @@ -686,7 +686,7 @@ int invalidate_inode_pages2_range(struct address_space *mapping, unmap_mapping_folio(folio); BUG_ON(folio_mapped(folio)); - ret2 = do_launder_folio(mapping, folio); + ret2 = folio_launder(mapping, folio); if (ret2 == 0) { if (!invalidate_complete_folio2(mapping, folio)) ret2 = -EBUSY; -- cgit v1.2.3 From 6f31a5a261dbbe7bf7f585dfe81f8acd4b25ec3b Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Wed, 9 Feb 2022 20:22:00 +0000 Subject: fs: Add aops->dirty_folio This replaces ->set_page_dirty(). It returns a bool instead of an int and takes the address_space as a parameter instead of expecting the implementations to retrieve the address_space from the page. This is particularly important for filesystems which use FS_OPS for swap. Signed-off-by: Matthew Wilcox (Oracle) Tested-by: Damien Le Moal Acked-by: Damien Le Moal Tested-by: Mike Marshall # orangefs Tested-by: David Howells # afs --- Documentation/filesystems/locking.rst | 15 ++++++++------- Documentation/filesystems/vfs.rst | 16 ++++++++-------- include/linux/fs.h | 1 + mm/page-writeback.c | 17 ++++++++++------- mm/page_io.c | 5 ++++- 5 files changed, 31 insertions(+), 23 deletions(-) (limited to 'Documentation/filesystems/locking.rst') diff --git a/Documentation/filesystems/locking.rst b/Documentation/filesystems/locking.rst index dee512efb458..72fa12dabd39 100644 --- a/Documentation/filesystems/locking.rst +++ b/Documentation/filesystems/locking.rst @@ -239,7 +239,7 @@ prototypes:: int (*writepage)(struct page *page, struct writeback_control *wbc); int (*readpage)(struct file *, struct page *); int (*writepages)(struct address_space *, struct writeback_control *); - int (*set_page_dirty)(struct page *page); + bool (*dirty_folio)(struct address_space *, struct folio *folio); void (*readahead)(struct readahead_control *); int (*readpages)(struct file *filp, struct address_space *mapping, struct list_head *pages, unsigned nr_pages); @@ -264,7 +264,7 @@ prototypes:: int (*swap_deactivate)(struct file *); locking rules: - All except set_page_dirty and freepage may block + All except dirty_folio and freepage may block ====================== ======================== ========= =============== ops PageLocked(page) i_rwsem invalidate_lock @@ -272,7 +272,7 @@ ops PageLocked(page) i_rwsem invalidate_lock writepage: yes, unlocks (see below) readpage: yes, unlocks shared writepages: -set_page_dirty no +dirty_folio maybe readahead: yes, unlocks shared readpages: no shared write_begin: locks the page exclusive @@ -361,10 +361,11 @@ If nr_to_write is NULL, all dirty pages must be written. writepages should _only_ write pages which are present on mapping->io_pages. -->set_page_dirty() is called from various places in the kernel -when the target page is marked as needing writeback. It may be called -under spinlock (it cannot block) and is sometimes called with the page -not locked. +->dirty_folio() is called from various places in the kernel when +the target folio is marked as needing writeback. The folio cannot be +truncated because either the caller holds the folio lock, or the caller +has found the folio while holding the page table lock which will block +truncation. ->bmap() is currently used by legacy ioctl() (FIBMAP) provided by some filesystems and by the swapper. The latter will eventually go away. Please, diff --git a/Documentation/filesystems/vfs.rst b/Documentation/filesystems/vfs.rst index c54ca4d88ed6..d16bee420326 100644 --- a/Documentation/filesystems/vfs.rst +++ b/Documentation/filesystems/vfs.rst @@ -658,7 +658,7 @@ pages, however the address_space has finer control of write sizes. The read process essentially only requires 'readpage'. The write process is more complicated and uses write_begin/write_end or -set_page_dirty to write data into the address_space, and writepage and +dirty_folio to write data into the address_space, and writepage and writepages to writeback data to storage. Adding and removing pages to/from an address_space is protected by the @@ -724,7 +724,7 @@ cache in your filesystem. The following members are defined: int (*writepage)(struct page *page, struct writeback_control *wbc); int (*readpage)(struct file *, struct page *); int (*writepages)(struct address_space *, struct writeback_control *); - int (*set_page_dirty)(struct page *page); + bool (*dirty_folio)(struct address_space *, struct folio *); void (*readahead)(struct readahead_control *); int (*readpages)(struct file *filp, struct address_space *mapping, struct list_head *pages, unsigned nr_pages); @@ -793,13 +793,13 @@ cache in your filesystem. The following members are defined: This will choose pages from the address space that are tagged as DIRTY and will pass them to ->writepage. -``set_page_dirty`` - called by the VM to set a page dirty. This is particularly - needed if an address space attaches private data to a page, and - that data needs to be updated when a page is dirtied. This is +``dirty_folio`` + called by the VM to mark a folio as dirty. This is particularly + needed if an address space attaches private data to a folio, and + that data needs to be updated when a folio is dirtied. This is called, for example, when a memory mapped page gets modified. - If defined, it should set the PageDirty flag, and the - PAGECACHE_TAG_DIRTY tag in the radix tree. + If defined, it should set the folio dirty flag, and the + PAGECACHE_TAG_DIRTY search mark in i_pages. ``readahead`` Called by the VM to read pages associated with the address_space diff --git a/include/linux/fs.h b/include/linux/fs.h index 055be40084f1..c3d5db8851ae 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -369,6 +369,7 @@ struct address_space_operations { /* Set a page dirty. Return true if this dirtied it */ int (*set_page_dirty)(struct page *page); + bool (*dirty_folio)(struct address_space *, struct folio *); /* * Reads in the requested pages. Unlike ->readpage(), this is diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 91d163f8d36b..27a87ae4502c 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2616,7 +2616,7 @@ EXPORT_SYMBOL(folio_redirty_for_writepage); * folio_mark_dirty - Mark a folio as being modified. * @folio: The folio. * - * For folios with a mapping this should be done under the page lock + * For folios with a mapping this should be done with the folio lock held * for the benefit of asynchronous memory errors who prefer a consistent * dirty state. This rule can be broken in some special cases, * but should be better not to. @@ -2630,16 +2630,19 @@ bool folio_mark_dirty(struct folio *folio) if (likely(mapping)) { /* * readahead/lru_deactivate_page could remain - * PG_readahead/PG_reclaim due to race with end_page_writeback - * About readahead, if the page is written, the flags would be + * PG_readahead/PG_reclaim due to race with folio_end_writeback + * About readahead, if the folio is written, the flags would be * reset. So no problem. - * About lru_deactivate_page, if the page is redirty, the flag - * will be reset. So no problem. but if the page is used by readahead - * it will confuse readahead and make it restart the size rampup - * process. But it's a trivial problem. + * About lru_deactivate_page, if the folio is redirtied, + * the flag will be reset. So no problem. but if the + * folio is used by readahead it will confuse readahead + * and make it restart the size rampup process. But it's + * a trivial problem. */ if (folio_test_reclaim(folio)) folio_clear_reclaim(folio); + if (mapping->a_ops->dirty_folio) + return mapping->a_ops->dirty_folio(mapping, folio); return mapping->a_ops->set_page_dirty(&folio->page); } if (!folio_test_dirty(folio)) { diff --git a/mm/page_io.c b/mm/page_io.c index 0bf8e40f4e57..24c975fb4e21 100644 --- a/mm/page_io.c +++ b/mm/page_io.c @@ -444,9 +444,12 @@ int swap_set_page_dirty(struct page *page) if (data_race(sis->flags & SWP_FS_OPS)) { struct address_space *mapping = sis->swap_file->f_mapping; + const struct address_space_operations *aops = mapping->a_ops; VM_BUG_ON_PAGE(!PageSwapCache(page), page); - return mapping->a_ops->set_page_dirty(page); + if (aops->dirty_folio) + return aops->dirty_folio(mapping, page_folio(page)); + return aops->set_page_dirty(page); } else { return __set_page_dirty_no_writeback(page); } -- cgit v1.2.3