From 1b0306981e0f1e5a9ad3d08b9c88bfe21b68e3b9 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 9 Jul 2023 18:17:33 -0400 Subject: iov_iter: Add copy_folio_from_iter_atomic() Add a folio wrapper around copy_page_from_iter_atomic(). Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Darrick J. Wong --- include/linux/uio.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/uio.h b/include/linux/uio.h index ff81e5ccaef2..42bce38a8e87 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -163,7 +163,7 @@ static inline size_t iov_length(const struct iovec *iov, unsigned long nr_segs) return ret; } -size_t copy_page_from_iter_atomic(struct page *page, unsigned offset, +size_t copy_page_from_iter_atomic(struct page *page, size_t offset, size_t bytes, struct iov_iter *i); void iov_iter_advance(struct iov_iter *i, size_t bytes); void iov_iter_revert(struct iov_iter *i, size_t bytes); @@ -184,6 +184,13 @@ static inline size_t copy_folio_to_iter(struct folio *folio, size_t offset, { return copy_page_to_iter(&folio->page, offset, bytes, i); } + +static inline size_t copy_folio_from_iter_atomic(struct folio *folio, + size_t offset, size_t bytes, struct iov_iter *i) +{ + return copy_page_from_iter_atomic(&folio->page, offset, bytes, i); +} + size_t copy_page_to_iter_nofault(struct page *page, unsigned offset, size_t bytes, struct iov_iter *i); -- cgit v1.2.3 From ffc143db63eeea7c8a27deb3c56d090a220a1ace Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 26 May 2023 16:43:23 -0400 Subject: filemap: Add fgf_t typedef Similarly to gfp_t, define fgf_t as its own type to prevent various misuses and confusion. Leave the flags as FGP_* for now to reduce the size of this patch; they will be converted to FGF_* later. Move the documentation to the definition of the type insted of burying it in the __filemap_get_folio() documentation. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong Reviewed-by: Kent Overstreet --- fs/btrfs/file.c | 6 +++--- fs/f2fs/compress.c | 2 +- fs/f2fs/f2fs.h | 2 +- fs/iomap/buffered-io.c | 2 +- include/linux/pagemap.h | 48 +++++++++++++++++++++++++++++++++++++----------- mm/filemap.c | 19 ++----------------- mm/folio-compat.c | 2 +- 7 files changed, 46 insertions(+), 35 deletions(-) (limited to 'include/linux') diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c index fd03e689a6be..3876fae90fc3 100644 --- a/fs/btrfs/file.c +++ b/fs/btrfs/file.c @@ -876,9 +876,9 @@ static int prepare_uptodate_page(struct inode *inode, return 0; } -static unsigned int get_prepare_fgp_flags(bool nowait) +static fgf_t get_prepare_fgp_flags(bool nowait) { - unsigned int fgp_flags = FGP_LOCK | FGP_ACCESSED | FGP_CREAT; + fgf_t fgp_flags = FGP_LOCK | FGP_ACCESSED | FGP_CREAT; if (nowait) fgp_flags |= FGP_NOWAIT; @@ -910,7 +910,7 @@ static noinline int prepare_pages(struct inode *inode, struct page **pages, int i; unsigned long index = pos >> PAGE_SHIFT; gfp_t mask = get_prepare_gfp_flags(inode, nowait); - unsigned int fgp_flags = get_prepare_fgp_flags(nowait); + fgf_t fgp_flags = get_prepare_fgp_flags(nowait); int err = 0; int faili; diff --git a/fs/f2fs/compress.c b/fs/f2fs/compress.c index 236d890f560b..0f7df9c11af3 100644 --- a/fs/f2fs/compress.c +++ b/fs/f2fs/compress.c @@ -1045,7 +1045,7 @@ static int prepare_compress_overwrite(struct compress_ctx *cc, struct address_space *mapping = cc->inode->i_mapping; struct page *page; sector_t last_block_in_bio; - unsigned fgp_flag = FGP_LOCK | FGP_WRITE | FGP_CREAT; + fgf_t fgp_flag = FGP_LOCK | FGP_WRITE | FGP_CREAT; pgoff_t start_idx = start_idx_of_cluster(cc); int i, ret; diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index c7cb2177b252..c275ff2753c2 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -2736,7 +2736,7 @@ static inline struct page *f2fs_grab_cache_page(struct address_space *mapping, static inline struct page *f2fs_pagecache_get_page( struct address_space *mapping, pgoff_t index, - int fgp_flags, gfp_t gfp_mask) + fgf_t fgp_flags, gfp_t gfp_mask) { if (time_to_inject(F2FS_M_SB(mapping), FAULT_PAGE_GET)) return NULL; diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index cc640576f9f7..94a6ae05e03b 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -467,7 +467,7 @@ EXPORT_SYMBOL_GPL(iomap_is_partially_uptodate); */ struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos) { - unsigned fgp = FGP_WRITEBEGIN | FGP_NOFS; + fgf_t fgp = FGP_WRITEBEGIN | FGP_NOFS; if (iter->flags & IOMAP_NOWAIT) fgp |= FGP_NOWAIT; diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 716953ee1ebd..911201fc41fc 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -501,22 +501,48 @@ pgoff_t page_cache_next_miss(struct address_space *mapping, pgoff_t page_cache_prev_miss(struct address_space *mapping, pgoff_t index, unsigned long max_scan); -#define FGP_ACCESSED 0x00000001 -#define FGP_LOCK 0x00000002 -#define FGP_CREAT 0x00000004 -#define FGP_WRITE 0x00000008 -#define FGP_NOFS 0x00000010 -#define FGP_NOWAIT 0x00000020 -#define FGP_FOR_MMAP 0x00000040 -#define FGP_STABLE 0x00000080 +/** + * typedef fgf_t - Flags for getting folios from the page cache. + * + * Most users of the page cache will not need to use these flags; + * there are convenience functions such as filemap_get_folio() and + * filemap_lock_folio(). For users which need more control over exactly + * what is done with the folios, these flags to __filemap_get_folio() + * are available. + * + * * %FGP_ACCESSED - The folio will be marked accessed. + * * %FGP_LOCK - The folio is returned locked. + * * %FGP_CREAT - If no folio is present then a new folio is allocated, + * added to the page cache and the VM's LRU list. The folio is + * returned locked. + * * %FGP_FOR_MMAP - The caller wants to do its own locking dance if the + * folio is already in cache. If the folio was allocated, unlock it + * before returning so the caller can do the same dance. + * * %FGP_WRITE - The folio will be written to by the caller. + * * %FGP_NOFS - __GFP_FS will get cleared in gfp. + * * %FGP_NOWAIT - Don't block on the folio lock. + * * %FGP_STABLE - Wait for the folio to be stable (finished writeback) + * * %FGP_WRITEBEGIN - The flags to use in a filesystem write_begin() + * implementation. + */ +typedef unsigned int __bitwise fgf_t; + +#define FGP_ACCESSED ((__force fgf_t)0x00000001) +#define FGP_LOCK ((__force fgf_t)0x00000002) +#define FGP_CREAT ((__force fgf_t)0x00000004) +#define FGP_WRITE ((__force fgf_t)0x00000008) +#define FGP_NOFS ((__force fgf_t)0x00000010) +#define FGP_NOWAIT ((__force fgf_t)0x00000020) +#define FGP_FOR_MMAP ((__force fgf_t)0x00000040) +#define FGP_STABLE ((__force fgf_t)0x00000080) #define FGP_WRITEBEGIN (FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE) void *filemap_get_entry(struct address_space *mapping, pgoff_t index); struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, - int fgp_flags, gfp_t gfp); + fgf_t fgp_flags, gfp_t gfp); struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index, - int fgp_flags, gfp_t gfp); + fgf_t fgp_flags, gfp_t gfp); /** * filemap_get_folio - Find and get a folio. @@ -590,7 +616,7 @@ static inline struct page *find_get_page(struct address_space *mapping, } static inline struct page *find_get_page_flags(struct address_space *mapping, - pgoff_t offset, int fgp_flags) + pgoff_t offset, fgf_t fgp_flags) { return pagecache_get_page(mapping, offset, fgp_flags, 0); } diff --git a/mm/filemap.c b/mm/filemap.c index 9e44a49bbd74..8a669fecfd1c 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1855,30 +1855,15 @@ out: * * Looks up the page cache entry at @mapping & @index. * - * @fgp_flags can be zero or more of these flags: - * - * * %FGP_ACCESSED - The folio will be marked accessed. - * * %FGP_LOCK - The folio is returned locked. - * * %FGP_CREAT - If no page is present then a new page is allocated using - * @gfp and added to the page cache and the VM's LRU list. - * The page is returned locked and with an increased refcount. - * * %FGP_FOR_MMAP - The caller wants to do its own locking dance if the - * page is already in cache. If the page was allocated, unlock it before - * returning so the caller can do the same dance. - * * %FGP_WRITE - The page will be written to by the caller. - * * %FGP_NOFS - __GFP_FS will get cleared in gfp. - * * %FGP_NOWAIT - Don't get blocked by page lock. - * * %FGP_STABLE - Wait for the folio to be stable (finished writeback) - * * If %FGP_LOCK or %FGP_CREAT are specified then the function may sleep even * if the %GFP flags specified for %FGP_CREAT are atomic. * - * If there is a page cache page, it is returned with an increased refcount. + * If this function returns a folio, it is returned with an increased refcount. * * Return: The found folio or an ERR_PTR() otherwise. */ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, - int fgp_flags, gfp_t gfp) + fgf_t fgp_flags, gfp_t gfp) { struct folio *folio; diff --git a/mm/folio-compat.c b/mm/folio-compat.c index c6f056c20503..10c3247542cb 100644 --- a/mm/folio-compat.c +++ b/mm/folio-compat.c @@ -92,7 +92,7 @@ EXPORT_SYMBOL(add_to_page_cache_lru); noinline struct page *pagecache_get_page(struct address_space *mapping, pgoff_t index, - int fgp_flags, gfp_t gfp) + fgf_t fgp_flags, gfp_t gfp) { struct folio *folio; -- cgit v1.2.3 From 4f66170119107f1452d2438ba4606e105e9e3afe Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 19 May 2023 16:10:37 -0400 Subject: filemap: Allow __filemap_get_folio to allocate large folios Allow callers of __filemap_get_folio() to specify a preferred folio order in the FGP flags. This is only honoured in the FGP_CREATE path; if there is already a folio in the page cache that covers the index, we will return it, no matter what its order is. No create-around is attempted; we will only create folios which start at the specified index. Unmodified callers will continue to allocate order 0 folios. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- include/linux/pagemap.h | 34 ++++++++++++++++++++++++++++++++++ mm/filemap.c | 46 +++++++++++++++++++++++++++++++++------------- mm/readahead.c | 13 ------------- 3 files changed, 67 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 911201fc41fc..d87840acbfb2 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -470,6 +470,19 @@ static inline void *detach_page_private(struct page *page) return folio_detach_private(page_folio(page)); } +/* + * There are some parts of the kernel which assume that PMD entries + * are exactly HPAGE_PMD_ORDER. Those should be fixed, but until then, + * limit the maximum allocation order to PMD size. I'm not aware of any + * assumptions about maximum order if THP are disabled, but 8 seems like + * a good order (that's 1MB if you're using 4kB pages) + */ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#define MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER +#else +#define MAX_PAGECACHE_ORDER 8 +#endif + #ifdef CONFIG_NUMA struct folio *filemap_alloc_folio(gfp_t gfp, unsigned int order); #else @@ -535,9 +548,30 @@ typedef unsigned int __bitwise fgf_t; #define FGP_NOWAIT ((__force fgf_t)0x00000020) #define FGP_FOR_MMAP ((__force fgf_t)0x00000040) #define FGP_STABLE ((__force fgf_t)0x00000080) +#define FGF_GET_ORDER(fgf) (((__force unsigned)fgf) >> 26) /* top 6 bits */ #define FGP_WRITEBEGIN (FGP_LOCK | FGP_WRITE | FGP_CREAT | FGP_STABLE) +/** + * fgf_set_order - Encode a length in the fgf_t flags. + * @size: The suggested size of the folio to create. + * + * The caller of __filemap_get_folio() can use this to suggest a preferred + * size for the folio that is created. If there is already a folio at + * the index, it will be returned, no matter what its size. If a folio + * is freshly created, it may be of a different size than requested + * due to alignment constraints, memory pressure, or the presence of + * other folios at nearby indices. + */ +static inline fgf_t fgf_set_order(size_t size) +{ + unsigned int shift = ilog2(size); + + if (shift <= PAGE_SHIFT) + return 0; + return (__force fgf_t)((shift - PAGE_SHIFT) << 26); +} + void *filemap_get_entry(struct address_space *mapping, pgoff_t index); struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, fgf_t fgp_flags, gfp_t gfp); diff --git a/mm/filemap.c b/mm/filemap.c index 8a669fecfd1c..baafbf324c9f 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1905,7 +1905,9 @@ repeat: folio_wait_stable(folio); no_page: if (!folio && (fgp_flags & FGP_CREAT)) { + unsigned order = FGF_GET_ORDER(fgp_flags); int err; + if ((fgp_flags & FGP_WRITE) && mapping_can_writeback(mapping)) gfp |= __GFP_WRITE; if (fgp_flags & FGP_NOFS) @@ -1914,26 +1916,44 @@ no_page: gfp &= ~GFP_KERNEL; gfp |= GFP_NOWAIT | __GFP_NOWARN; } - - folio = filemap_alloc_folio(gfp, 0); - if (!folio) - return ERR_PTR(-ENOMEM); - if (WARN_ON_ONCE(!(fgp_flags & (FGP_LOCK | FGP_FOR_MMAP)))) fgp_flags |= FGP_LOCK; - /* Init accessed so avoid atomic mark_page_accessed later */ - if (fgp_flags & FGP_ACCESSED) - __folio_set_referenced(folio); + if (!mapping_large_folio_support(mapping)) + order = 0; + if (order > MAX_PAGECACHE_ORDER) + order = MAX_PAGECACHE_ORDER; + /* If we're not aligned, allocate a smaller folio */ + if (index & ((1UL << order) - 1)) + order = __ffs(index); - err = filemap_add_folio(mapping, folio, index, gfp); - if (unlikely(err)) { + do { + gfp_t alloc_gfp = gfp; + + err = -ENOMEM; + if (order == 1) + order = 0; + if (order > 0) + alloc_gfp |= __GFP_NORETRY | __GFP_NOWARN; + folio = filemap_alloc_folio(alloc_gfp, order); + if (!folio) + continue; + + /* Init accessed so avoid atomic mark_page_accessed later */ + if (fgp_flags & FGP_ACCESSED) + __folio_set_referenced(folio); + + err = filemap_add_folio(mapping, folio, index, gfp); + if (!err) + break; folio_put(folio); folio = NULL; - if (err == -EEXIST) - goto repeat; - } + } while (order-- > 0); + if (err == -EEXIST) + goto repeat; + if (err) + return ERR_PTR(err); /* * filemap_add_folio locks the page, and for mmap * we expect an unlocked page. diff --git a/mm/readahead.c b/mm/readahead.c index a9c999aa19af..e815c114de21 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -461,19 +461,6 @@ static int try_context_readahead(struct address_space *mapping, return 1; } -/* - * There are some parts of the kernel which assume that PMD entries - * are exactly HPAGE_PMD_ORDER. Those should be fixed, but until then, - * limit the maximum allocation order to PMD size. I'm not aware of any - * assumptions about maximum order if THP are disabled, but 8 seems like - * a good order (that's 1MB if you're using 4kB pages) - */ -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -#define MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER -#else -#define MAX_PAGECACHE_ORDER 8 -#endif - static inline int ra_alloc_folio(struct readahead_control *ractl, pgoff_t index, pgoff_t mark, unsigned int order, gfp_t gfp) { -- cgit v1.2.3 From d6bb59a9444d218dc60dee3b45fd93c0d4f5e123 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Fri, 19 May 2023 16:18:05 -0400 Subject: iomap: Create large folios in the buffered write path Use the size of the write as a hint for the size of the folio to create. Signed-off-by: Matthew Wilcox (Oracle) Reviewed-by: Christoph Hellwig Reviewed-by: Darrick J. Wong --- fs/gfs2/bmap.c | 2 +- fs/iomap/buffered-io.c | 6 ++++-- include/linux/iomap.h | 2 +- 3 files changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/fs/gfs2/bmap.c b/fs/gfs2/bmap.c index 8d611fbcf0bd..521d1e00a0ff 100644 --- a/fs/gfs2/bmap.c +++ b/fs/gfs2/bmap.c @@ -971,7 +971,7 @@ gfs2_iomap_get_folio(struct iomap_iter *iter, loff_t pos, unsigned len) if (status) return ERR_PTR(status); - folio = iomap_get_folio(iter, pos); + folio = iomap_get_folio(iter, pos, len); if (IS_ERR(folio)) gfs2_trans_end(sdp); return folio; diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c index 94a6ae05e03b..00e0e94c953e 100644 --- a/fs/iomap/buffered-io.c +++ b/fs/iomap/buffered-io.c @@ -461,16 +461,18 @@ EXPORT_SYMBOL_GPL(iomap_is_partially_uptodate); * iomap_get_folio - get a folio reference for writing * @iter: iteration structure * @pos: start offset of write + * @len: Suggested size of folio to create. * * Returns a locked reference to the folio at @pos, or an error pointer if the * folio could not be obtained. */ -struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos) +struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos, size_t len) { fgf_t fgp = FGP_WRITEBEGIN | FGP_NOFS; if (iter->flags & IOMAP_NOWAIT) fgp |= FGP_NOWAIT; + fgp |= fgf_set_order(len); return __filemap_get_folio(iter->inode->i_mapping, pos >> PAGE_SHIFT, fgp, mapping_gfp_mask(iter->inode->i_mapping)); @@ -597,7 +599,7 @@ static struct folio *__iomap_get_folio(struct iomap_iter *iter, loff_t pos, if (folio_ops && folio_ops->get_folio) return folio_ops->get_folio(iter, pos, len); else - return iomap_get_folio(iter, pos); + return iomap_get_folio(iter, pos, len); } static void __iomap_put_folio(struct iomap_iter *iter, loff_t pos, size_t ret, diff --git a/include/linux/iomap.h b/include/linux/iomap.h index e2b836c2e119..80facb9c9e5b 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -261,7 +261,7 @@ int iomap_file_buffered_write_punch_delalloc(struct inode *inode, int iomap_read_folio(struct folio *folio, const struct iomap_ops *ops); void iomap_readahead(struct readahead_control *, const struct iomap_ops *ops); bool iomap_is_partially_uptodate(struct folio *, size_t from, size_t count); -struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos); +struct folio *iomap_get_folio(struct iomap_iter *iter, loff_t pos, size_t len); bool iomap_release_folio(struct folio *folio, gfp_t gfp_flags); void iomap_invalidate_folio(struct folio *folio, size_t offset, size_t len); int iomap_file_unshare(struct inode *inode, loff_t pos, loff_t len, -- cgit v1.2.3