diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-03-23 03:03:12 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-03-23 03:03:12 +0300 |
commit | 9030fb0bb9d607908d51f9ee02efdbe01da355ee (patch) | |
tree | 9ee1d9d47fbb4b30c7f5cbc291432e666e58967a /mm/readahead.c | |
parent | 3bf03b9a0839c9fb06927ae53ebd0f960b19d408 (diff) | |
parent | 2a3c4bce3edb0d54983384aa8a88c0da330638f4 (diff) | |
download | linux-9030fb0bb9d607908d51f9ee02efdbe01da355ee.tar.xz |
Merge tag 'folio-5.18c' of git://git.infradead.org/users/willy/pagecache
Pull folio updates from Matthew Wilcox:
- Rewrite how munlock works to massively reduce the contention on
i_mmap_rwsem (Hugh Dickins):
https://lore.kernel.org/linux-mm/8e4356d-9622-a7f0-b2c-f116b5f2efea@google.com/
- Sort out the page refcount mess for ZONE_DEVICE pages (Christoph
Hellwig):
https://lore.kernel.org/linux-mm/20220210072828.2930359-1-hch@lst.de/
- Convert GUP to use folios and make pincount available for order-1
pages. (Matthew Wilcox)
- Convert a few more truncation functions to use folios (Matthew
Wilcox)
- Convert page_vma_mapped_walk to use PFNs instead of pages (Matthew
Wilcox)
- Convert rmap_walk to use folios (Matthew Wilcox)
- Convert most of shrink_page_list() to use a folio (Matthew Wilcox)
- Add support for creating large folios in readahead (Matthew Wilcox)
* tag 'folio-5.18c' of git://git.infradead.org/users/willy/pagecache: (114 commits)
mm/damon: minor cleanup for damon_pa_young
selftests/vm/transhuge-stress: Support file-backed PMD folios
mm/filemap: Support VM_HUGEPAGE for file mappings
mm/readahead: Switch to page_cache_ra_order
mm/readahead: Align file mappings for non-DAX
mm/readahead: Add large folio readahead
mm: Support arbitrary THP sizes
mm: Make large folios depend on THP
mm: Fix READ_ONLY_THP warning
mm/filemap: Allow large folios to be added to the page cache
mm: Turn can_split_huge_page() into can_split_folio()
mm/vmscan: Convert pageout() to take a folio
mm/vmscan: Turn page_check_references() into folio_check_references()
mm/vmscan: Account large folios correctly
mm/vmscan: Optimise shrink_page_list for non-PMD-sized folios
mm/vmscan: Free non-shmem folios without splitting them
mm/rmap: Constify the rmap_walk_control argument
mm/rmap: Convert rmap_walk() to take a folio
mm: Turn page_anon_vma() into folio_anon_vma()
mm/rmap: Turn page_lock_anon_vma_read() into folio_lock_anon_vma_read()
...
Diffstat (limited to 'mm/readahead.c')
-rw-r--r-- | mm/readahead.c | 108 |
1 files changed, 100 insertions, 8 deletions
diff --git a/mm/readahead.c b/mm/readahead.c index f61943fd1741..21e5f9161cf2 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -262,7 +262,7 @@ static void read_pages(struct readahead_control *rac, struct list_head *pages, blk_finish_plug(&plug); - BUG_ON(!list_empty(pages)); + BUG_ON(pages && !list_empty(pages)); BUG_ON(readahead_count(rac)); out: @@ -361,7 +361,7 @@ EXPORT_SYMBOL_GPL(page_cache_ra_unbounded); * behaviour which would occur if page allocations are causing VM writeback. * We really don't want to intermingle reads and writes like that. */ -void do_page_cache_ra(struct readahead_control *ractl, +static void do_page_cache_ra(struct readahead_control *ractl, unsigned long nr_to_read, unsigned long lookahead_size) { struct inode *inode = ractl->mapping->host; @@ -546,10 +546,102 @@ static int try_context_readahead(struct address_space *mapping, } /* + * There are some parts of the kernel which assume that PMD entries + * are exactly HPAGE_PMD_ORDER. Those should be fixed, but until then, + * limit the maximum allocation order to PMD size. I'm not aware of any + * assumptions about maximum order if THP are disabled, but 8 seems like + * a good order (that's 1MB if you're using 4kB pages) + */ +#ifdef CONFIG_TRANSPARENT_HUGEPAGE +#define MAX_PAGECACHE_ORDER HPAGE_PMD_ORDER +#else +#define MAX_PAGECACHE_ORDER 8 +#endif + +static inline int ra_alloc_folio(struct readahead_control *ractl, pgoff_t index, + pgoff_t mark, unsigned int order, gfp_t gfp) +{ + int err; + struct folio *folio = filemap_alloc_folio(gfp, order); + + if (!folio) + return -ENOMEM; + if (mark - index < (1UL << order)) + folio_set_readahead(folio); + err = filemap_add_folio(ractl->mapping, folio, index, gfp); + if (err) + folio_put(folio); + else + ractl->_nr_pages += 1UL << order; + return err; +} + +void page_cache_ra_order(struct readahead_control *ractl, + struct file_ra_state *ra, unsigned int new_order) +{ + struct address_space *mapping = ractl->mapping; + pgoff_t index = readahead_index(ractl); + pgoff_t limit = (i_size_read(mapping->host) - 1) >> PAGE_SHIFT; + pgoff_t mark = index + ra->size - ra->async_size; + int err = 0; + gfp_t gfp = readahead_gfp_mask(mapping); + + if (!mapping_large_folio_support(mapping) || ra->size < 4) + goto fallback; + + limit = min(limit, index + ra->size - 1); + + if (new_order < MAX_PAGECACHE_ORDER) { + new_order += 2; + if (new_order > MAX_PAGECACHE_ORDER) + new_order = MAX_PAGECACHE_ORDER; + while ((1 << new_order) > ra->size) + new_order--; + } + + while (index <= limit) { + unsigned int order = new_order; + + /* Align with smaller pages if needed */ + if (index & ((1UL << order) - 1)) { + order = __ffs(index); + if (order == 1) + order = 0; + } + /* Don't allocate pages past EOF */ + while (index + (1UL << order) - 1 > limit) { + if (--order == 1) + order = 0; + } + err = ra_alloc_folio(ractl, index, mark, order, gfp); + if (err) + break; + index += 1UL << order; + } + + if (index > limit) { + ra->size += index - limit - 1; + ra->async_size += index - limit - 1; + } + + read_pages(ractl, NULL, false); + + /* + * If there were already pages in the page cache, then we may have + * left some gaps. Let the regular readahead code take care of this + * situation. + */ + if (!err) + return; +fallback: + do_page_cache_ra(ractl, ra->size, ra->async_size); +} + +/* * A minimal readahead algorithm for trivial sequential/random reads. */ static void ondemand_readahead(struct readahead_control *ractl, - bool hit_readahead_marker, unsigned long req_size) + struct folio *folio, unsigned long req_size) { struct backing_dev_info *bdi = inode_to_bdi(ractl->mapping->host); struct file_ra_state *ra = ractl->ra; @@ -584,12 +676,12 @@ static void ondemand_readahead(struct readahead_control *ractl, } /* - * Hit a marked page without valid readahead state. + * Hit a marked folio without valid readahead state. * E.g. interleaved reads. * Query the pagecache for async_size, which normally equals to * readahead size. Ramp it up and use it as the new readahead size. */ - if (hit_readahead_marker) { + if (folio) { pgoff_t start; rcu_read_lock(); @@ -662,7 +754,7 @@ readit: } ractl->_index = ra->start; - do_page_cache_ra(ractl, ra->size, ra->async_size); + page_cache_ra_order(ractl, ra, folio ? folio_order(folio) : 0); } void page_cache_sync_ra(struct readahead_control *ractl, @@ -690,7 +782,7 @@ void page_cache_sync_ra(struct readahead_control *ractl, } /* do read-ahead */ - ondemand_readahead(ractl, false, req_count); + ondemand_readahead(ractl, NULL, req_count); } EXPORT_SYMBOL_GPL(page_cache_sync_ra); @@ -713,7 +805,7 @@ void page_cache_async_ra(struct readahead_control *ractl, return; /* do read-ahead */ - ondemand_readahead(ractl, true, req_count); + ondemand_readahead(ractl, folio, req_count); } EXPORT_SYMBOL_GPL(page_cache_async_ra); |