From 85d4d2ebc86f02740c5f5f72ec43cc47d3560720 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 3 May 2021 23:30:44 -0400 Subject: mm/writeback: Add filemap_dirty_folio() Reimplement __set_page_dirty_nobuffers() as a wrapper around filemap_dirty_folio(). Eventually folio_mark_dirty() will pass the folio's mapping to the address space's ->dirty_folio() operation, so add the parameter to filemap_dirty_folio() now. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: David Howells Acked-by: Vlastimil Babka --- include/linux/writeback.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/writeback.h') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index d1f65adf6a26..d4f24eba066f 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -393,6 +393,7 @@ void writeback_set_ratelimit(void); void tag_pages_for_writeback(struct address_space *mapping, pgoff_t start, pgoff_t end); +bool filemap_dirty_folio(struct address_space *mapping, struct folio *folio); void account_page_redirty(struct page *page); void sb_mark_inode_writeback(struct inode *inode); -- cgit v1.2.3 From 25ff8b15537dfa0e1a62d55cfcc48f3c8bd8a76c Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Mon, 3 May 2021 10:06:55 -0400 Subject: mm/writeback: Add folio_account_redirty() Account the number of pages in the folio that we're redirtying. Turn account_page_dirty() into a wrapper around it. Also turn the comment on folio_account_redirty() into kernel-doc and edit it slightly so it makes sense to its potential callers. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: David Howells Acked-by: Vlastimil Babka --- include/linux/writeback.h | 6 +++++- mm/page-writeback.c | 32 +++++++++++++++++++------------- 2 files changed, 24 insertions(+), 14 deletions(-) (limited to 'include/linux/writeback.h') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index d4f24eba066f..852100bea351 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -394,7 +394,11 @@ void tag_pages_for_writeback(struct address_space *mapping, pgoff_t start, pgoff_t end); bool filemap_dirty_folio(struct address_space *mapping, struct folio *folio); -void account_page_redirty(struct page *page); +void folio_account_redirty(struct folio *folio); +static inline void account_page_redirty(struct page *page) +{ + folio_account_redirty(page_folio(page)); +} void sb_mark_inode_writeback(struct inode *inode); void sb_clear_inode_writeback(struct inode *inode); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 85c04445de97..7ef904363fb7 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -1084,7 +1084,7 @@ static void wb_update_write_bandwidth(struct bdi_writeback *wb, * write_bandwidth = --------------------------------------------------- * period * - * @written may have decreased due to account_page_redirty(). + * @written may have decreased due to folio_account_redirty(). * Avoid underflowing @bw calculation. */ bw = written - min(written, wb->written_stamp); @@ -2544,30 +2544,36 @@ bool filemap_dirty_folio(struct address_space *mapping, struct folio *folio) } EXPORT_SYMBOL(filemap_dirty_folio); -/* - * Call this whenever redirtying a page, to de-account the dirty counters - * (NR_DIRTIED, WB_DIRTIED, tsk->nr_dirtied), so that they match the written - * counters (NR_WRITTEN, WB_WRITTEN) in long term. The mismatches will lead to - * systematic errors in balanced_dirty_ratelimit and the dirty pages position - * control. +/** + * folio_account_redirty - Manually account for redirtying a page. + * @folio: The folio which is being redirtied. + * + * Most filesystems should call folio_redirty_for_writepage() instead + * of this fuction. If your filesystem is doing writeback outside the + * context of a writeback_control(), it can call this when redirtying + * a folio, to de-account the dirty counters (NR_DIRTIED, WB_DIRTIED, + * tsk->nr_dirtied), so that they match the written counters (NR_WRITTEN, + * WB_WRITTEN) in long term. The mismatches will lead to systematic errors + * in balanced_dirty_ratelimit and the dirty pages position control. */ -void account_page_redirty(struct page *page) +void folio_account_redirty(struct folio *folio) { - struct address_space *mapping = page->mapping; + struct address_space *mapping = folio->mapping; if (mapping && mapping_can_writeback(mapping)) { struct inode *inode = mapping->host; struct bdi_writeback *wb; struct wb_lock_cookie cookie = {}; + long nr = folio_nr_pages(folio); wb = unlocked_inode_to_wb_begin(inode, &cookie); - current->nr_dirtied--; - dec_node_page_state(page, NR_DIRTIED); - dec_wb_stat(wb, WB_DIRTIED); + current->nr_dirtied -= nr; + node_stat_mod_folio(folio, NR_DIRTIED, -nr); + wb_stat_mod(wb, WB_DIRTIED, -nr); unlocked_inode_to_wb_end(inode, &cookie); } } -EXPORT_SYMBOL(account_page_redirty); +EXPORT_SYMBOL(folio_account_redirty); /* * When a writepage implementation decides that it doesn't want to write this -- cgit v1.2.3 From cd78ab11a8810dd297f4751d17cc53e3dce36024 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 2 May 2021 23:22:52 -0400 Subject: mm/writeback: Add folio_redirty_for_writepage() Reimplement redirty_page_for_writepage() as a wrapper around folio_redirty_for_writepage(). Account the number of pages in the folio, add kernel-doc and move the prototype to writeback.h. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: David Howells Acked-by: Vlastimil Babka --- fs/jfs/jfs_metapage.c | 1 + include/linux/mm.h | 4 ---- include/linux/writeback.h | 2 ++ mm/folio-compat.c | 7 +++++++ mm/page-writeback.c | 30 ++++++++++++++++++++---------- 5 files changed, 30 insertions(+), 14 deletions(-) (limited to 'include/linux/writeback.h') diff --git a/fs/jfs/jfs_metapage.c b/fs/jfs/jfs_metapage.c index 176580f54af9..104ae698443e 100644 --- a/fs/jfs/jfs_metapage.c +++ b/fs/jfs/jfs_metapage.c @@ -13,6 +13,7 @@ #include #include #include +#include #include "jfs_incore.h" #include "jfs_superblock.h" #include "jfs_filsys.h" diff --git a/include/linux/mm.h b/include/linux/mm.h index dd5c6970ba67..23ab040aa65a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -36,9 +36,7 @@ struct mempolicy; struct anon_vma; struct anon_vma_chain; -struct file_ra_state; struct user_struct; -struct writeback_control; struct pt_regs; extern int sysctl_page_lock_unfairness; @@ -2003,8 +2001,6 @@ extern int try_to_release_page(struct page * page, gfp_t gfp_mask); extern void do_invalidatepage(struct page *page, unsigned int offset, unsigned int length); -int redirty_page_for_writepage(struct writeback_control *wbc, - struct page *page); bool folio_mark_dirty(struct folio *folio); bool set_page_dirty(struct page *page); int set_page_dirty_lock(struct page *page); diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 852100bea351..3571f383dfb6 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -399,6 +399,8 @@ static inline void account_page_redirty(struct page *page) { folio_account_redirty(page_folio(page)); } +bool folio_redirty_for_writepage(struct writeback_control *, struct folio *); +bool redirty_page_for_writepage(struct writeback_control *, struct page *); void sb_mark_inode_writeback(struct inode *inode); void sb_clear_inode_writeback(struct inode *inode); diff --git a/mm/folio-compat.c b/mm/folio-compat.c index 39f5a8d963b1..c1e01bc36d32 100644 --- a/mm/folio-compat.c +++ b/mm/folio-compat.c @@ -95,3 +95,10 @@ bool clear_page_dirty_for_io(struct page *page) return folio_clear_dirty_for_io(page_folio(page)); } EXPORT_SYMBOL(clear_page_dirty_for_io); + +bool redirty_page_for_writepage(struct writeback_control *wbc, + struct page *page) +{ + return folio_redirty_for_writepage(wbc, page_folio(page)); +} +EXPORT_SYMBOL(redirty_page_for_writepage); diff --git a/mm/page-writeback.c b/mm/page-writeback.c index 7ef904363fb7..a0c35a9d8029 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2575,21 +2575,31 @@ void folio_account_redirty(struct folio *folio) } EXPORT_SYMBOL(folio_account_redirty); -/* - * When a writepage implementation decides that it doesn't want to write this - * page for some reason, it should redirty the locked page via - * redirty_page_for_writepage() and it should then unlock the page and return 0 +/** + * folio_redirty_for_writepage - Decline to write a dirty folio. + * @wbc: The writeback control. + * @folio: The folio. + * + * When a writepage implementation decides that it doesn't want to write + * @folio for some reason, it should call this function, unlock @folio and + * return 0. + * + * Return: True if we redirtied the folio. False if someone else dirtied + * it first. */ -int redirty_page_for_writepage(struct writeback_control *wbc, struct page *page) +bool folio_redirty_for_writepage(struct writeback_control *wbc, + struct folio *folio) { - int ret; + bool ret; + long nr = folio_nr_pages(folio); + + wbc->pages_skipped += nr; + ret = filemap_dirty_folio(folio->mapping, folio); + folio_account_redirty(folio); - wbc->pages_skipped++; - ret = __set_page_dirty_nobuffers(page); - account_page_redirty(page); return ret; } -EXPORT_SYMBOL(redirty_page_for_writepage); +EXPORT_SYMBOL(folio_redirty_for_writepage); /** * folio_mark_dirty - Mark a folio as being modified. -- cgit v1.2.3 From 348332e000697b4ca82ef96719e02876434b8346 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 20 Sep 2021 14:33:12 +0200 Subject: mm: don't include in blk-cgroup.h pulls in blkdev.h and thus pretty much all the block headers. Break this dependency chain by turning wbc_blkcg_css into a macro and dropping the blk-cgroup.h include. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20210920123328.1399408-2-hch@lst.de Signed-off-by: Jens Axboe --- drivers/gpu/drm/i915/i915_utils.h | 1 + fs/btrfs/inode.c | 1 + fs/quota/quota.c | 1 + include/linux/writeback.h | 14 +++++--------- lib/random32.c | 1 + 5 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux/writeback.h') diff --git a/drivers/gpu/drm/i915/i915_utils.h b/drivers/gpu/drm/i915/i915_utils.h index 5259edacde38..066a9118c374 100644 --- a/drivers/gpu/drm/i915/i915_utils.h +++ b/drivers/gpu/drm/i915/i915_utils.h @@ -30,6 +30,7 @@ #include #include #include +#include struct drm_i915_private; struct timer_list; diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 487533c35ddb..4a9077c52444 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -6,6 +6,7 @@ #include #include #include +#include #include #include #include diff --git a/fs/quota/quota.c b/fs/quota/quota.c index 2bcc9a6f1bfc..052f143e2e0e 100644 --- a/fs/quota/quota.c +++ b/fs/quota/quota.c @@ -10,6 +10,7 @@ #include #include #include +#include #include #include #include diff --git a/include/linux/writeback.h b/include/linux/writeback.h index d1f65adf6a26..8eb165760752 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -11,7 +11,6 @@ #include #include #include -#include struct bio; @@ -109,15 +108,12 @@ static inline int wbc_to_write_flags(struct writeback_control *wbc) return flags; } -static inline struct cgroup_subsys_state * -wbc_blkcg_css(struct writeback_control *wbc) -{ #ifdef CONFIG_CGROUP_WRITEBACK - if (wbc->wb) - return wbc->wb->blkcg_css; -#endif - return blkcg_root_css; -} +#define wbc_blkcg_css(wbc) \ + ((wbc)->wb ? (wbc)->wb->blkcg_css : blkcg_root_css) +#else +#define wbc_blkcg_css(wbc) (blkcg_root_css) +#endif /* CONFIG_CGROUP_WRITEBACK */ /* * A wb_domain represents a domain that wb's (bdi_writeback's) belong to diff --git a/lib/random32.c b/lib/random32.c index 4d0e05e471d7..a57a0e18819d 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -39,6 +39,7 @@ #include #include #include +#include #include #include -- cgit v1.2.3 From 08276bdae68b022a7726edf7416b6748e3df5395 Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 20 Oct 2021 23:50:01 +0100 Subject: vfs, fscache: Implement pinning of cache usage for writeback Cachefiles has a problem in that it needs to keep the backing file for a cookie open whilst there are local modifications pending that need to be written to it. However, we don't want to keep the file open indefinitely, as that causes EMFILE/ENFILE/ENOMEM problems. Reopening the cache file, however, is a problem if this is being done due to writeback triggered by exit(). Some filesystems will oops if we try to open a file in that context because they want to access current->fs or other resources that have already been dismantled. To get around this, I added the following: (1) An inode flag, I_PINNING_FSCACHE_WB, to be set on a network filesystem inode to indicate that we have a usage count on the cookie caching that inode. (2) A flag in struct writeback_control, unpinned_fscache_wb, that is set when __writeback_single_inode() clears the last dirty page from i_pages - at which point it clears I_PINNING_FSCACHE_WB and sets this flag. This has to be done here so that clearing I_PINNING_FSCACHE_WB can be done atomically with the check of PAGECACHE_TAG_DIRTY that clears I_DIRTY_PAGES. (3) A function, fscache_set_page_dirty(), which if it is not set, sets I_PINNING_FSCACHE_WB and calls fscache_use_cookie() to pin the cache resources. (4) A function, fscache_unpin_writeback(), to be called by ->write_inode() to unuse the cookie. (5) A function, fscache_clear_inode_writeback(), to be called when the inode is evicted, before clear_inode() is called. This cleans up any lingering I_PINNING_FSCACHE_WB. The network filesystem can then use these tools to make sure that fscache_write_to_cache() can write locally modified data to the cache as well as to the server. For the future, I'm working on write helpers for netfs lib that should allow this facility to be removed by keeping track of the dirty regions separately - but that's incomplete at the moment and is also going to be affected by folios, one way or another, since it deals with pages Signed-off-by: David Howells Reviewed-by: Jeff Layton cc: linux-cachefs@redhat.com Link: https://lore.kernel.org/r/163819615157.215744.17623791756928043114.stgit@warthog.procyon.org.uk/ # v1 Link: https://lore.kernel.org/r/163906917856.143852.8224898306177154573.stgit@warthog.procyon.org.uk/ # v2 Link: https://lore.kernel.org/r/163967124567.1823006.14188359004568060298.stgit@warthog.procyon.org.uk/ # v3 Link: https://lore.kernel.org/r/164021524705.640689.17824932021727663017.stgit@warthog.procyon.org.uk/ # v4 --- fs/fs-writeback.c | 8 ++++++++ fs/fscache/io.c | 38 ++++++++++++++++++++++++++++++++++++++ include/linux/fs.h | 3 +++ include/linux/fscache.h | 41 +++++++++++++++++++++++++++++++++++++++++ include/linux/writeback.h | 1 + 5 files changed, 91 insertions(+) (limited to 'include/linux/writeback.h') diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index 67f0e88eed01..8294a60ce323 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1666,6 +1666,13 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY)) inode->i_state |= I_DIRTY_PAGES; + else if (unlikely(inode->i_state & I_PINNING_FSCACHE_WB)) { + if (!(inode->i_state & I_DIRTY_PAGES)) { + inode->i_state &= ~I_PINNING_FSCACHE_WB; + wbc->unpinned_fscache_wb = true; + dirty |= I_PINNING_FSCACHE_WB; /* Cause write_inode */ + } + } spin_unlock(&inode->i_lock); @@ -1675,6 +1682,7 @@ __writeback_single_inode(struct inode *inode, struct writeback_control *wbc) if (ret == 0) ret = err; } + wbc->unpinned_fscache_wb = false; trace_writeback_single_inode(inode, wbc, nr_to_write); return ret; } diff --git a/fs/fscache/io.c b/fs/fscache/io.c index 74cde7acf434..e9e5d6758ea8 100644 --- a/fs/fscache/io.c +++ b/fs/fscache/io.c @@ -150,6 +150,44 @@ int __fscache_begin_read_operation(struct netfs_cache_resources *cres, } EXPORT_SYMBOL(__fscache_begin_read_operation); +/** + * fscache_set_page_dirty - Mark page dirty and pin a cache object for writeback + * @page: The page being dirtied + * @cookie: The cookie referring to the cache object + * + * Set the dirty flag on a page and pin an in-use cache object in memory when + * dirtying a page so that writeback can later write to it. This is intended + * to be called from the filesystem's ->set_page_dirty() method. + * + * Returns 1 if PG_dirty was set on the page, 0 otherwise. + */ +int fscache_set_page_dirty(struct page *page, struct fscache_cookie *cookie) +{ + struct inode *inode = page->mapping->host; + bool need_use = false; + + _enter(""); + + if (!__set_page_dirty_nobuffers(page)) + return 0; + if (!fscache_cookie_valid(cookie)) + return 1; + + if (!(inode->i_state & I_PINNING_FSCACHE_WB)) { + spin_lock(&inode->i_lock); + if (!(inode->i_state & I_PINNING_FSCACHE_WB)) { + inode->i_state |= I_PINNING_FSCACHE_WB; + need_use = true; + } + spin_unlock(&inode->i_lock); + + if (need_use) + fscache_use_cookie(cookie, true); + } + return 1; +} +EXPORT_SYMBOL(fscache_set_page_dirty); + struct fscache_write_request { struct netfs_cache_resources cache_resources; struct address_space *mapping; diff --git a/include/linux/fs.h b/include/linux/fs.h index bbf812ce89a8..2c0b8e77d9ab 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2418,6 +2418,8 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, * Used to detect that mark_inode_dirty() should not move * inode between dirty lists. * + * I_PINNING_FSCACHE_WB Inode is pinning an fscache object for writeback. + * * Q: What is the difference between I_WILL_FREE and I_FREEING? */ #define I_DIRTY_SYNC (1 << 0) @@ -2440,6 +2442,7 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, #define I_CREATING (1 << 15) #define I_DONTCACHE (1 << 16) #define I_SYNC_QUEUED (1 << 17) +#define I_PINNING_FSCACHE_WB (1 << 18) #define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC) #define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES) diff --git a/include/linux/fscache.h b/include/linux/fscache.h index 9d469613e16c..18e725671594 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -16,6 +16,7 @@ #include #include +#include #if defined(CONFIG_FSCACHE) || defined(CONFIG_FSCACHE_MODULE) #define __fscache_available (1) @@ -566,4 +567,44 @@ static inline void fscache_write_to_cache(struct fscache_cookie *cookie, } +#if __fscache_available +extern int fscache_set_page_dirty(struct page *page, struct fscache_cookie *cookie); +#else +#define fscache_set_page_dirty(PAGE, COOKIE) (__set_page_dirty_nobuffers((PAGE))) +#endif + +/** + * fscache_unpin_writeback - Unpin writeback resources + * @wbc: The writeback control + * @cookie: The cookie referring to the cache object + * + * Unpin the writeback resources pinned by fscache_set_page_dirty(). This is + * intended to be called by the netfs's ->write_inode() method. + */ +static inline void fscache_unpin_writeback(struct writeback_control *wbc, + struct fscache_cookie *cookie) +{ + if (wbc->unpinned_fscache_wb) + fscache_unuse_cookie(cookie, NULL, NULL); +} + +/** + * fscache_clear_inode_writeback - Clear writeback resources pinned by an inode + * @cookie: The cookie referring to the cache object + * @inode: The inode to clean up + * @aux: Auxiliary data to apply to the inode + * + * Clear any writeback resources held by an inode when the inode is evicted. + * This must be called before clear_inode() is called. + */ +static inline void fscache_clear_inode_writeback(struct fscache_cookie *cookie, + struct inode *inode, + const void *aux) +{ + if (inode->i_state & I_PINNING_FSCACHE_WB) { + loff_t i_size = i_size_read(inode); + fscache_unuse_cookie(cookie, aux, &i_size); + } +} + #endif /* _LINUX_FSCACHE_H */ diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 3bfd487d1dd2..fec248ab1fec 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -68,6 +68,7 @@ struct writeback_control { unsigned for_reclaim:1; /* Invoked from the page allocator */ unsigned range_cyclic:1; /* range_start is cyclic */ unsigned for_sync:1; /* sync(2) WB_SYNC_ALL writeback */ + unsigned unpinned_fscache_wb:1; /* Cleared I_PINNING_FSCACHE_WB */ /* * When writeback IOs are bounced through async layers, only the -- cgit v1.2.3