diff options
Diffstat (limited to 'mm/readahead.c')
-rw-r--r-- | mm/readahead.c | 108 |
1 files changed, 67 insertions, 41 deletions
diff --git a/mm/readahead.c b/mm/readahead.c index 8f1cf599b572..406756d34309 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -158,20 +158,10 @@ static void read_pages(struct readahead_control *rac) if (aops->readahead) { aops->readahead(rac); - /* - * Clean up the remaining folios. The sizes in ->ra - * may be used to size the next readahead, so make sure - * they accurately reflect what happened. - */ + /* Clean up the remaining folios. */ while ((folio = readahead_folio(rac)) != NULL) { - unsigned long nr = folio_nr_pages(folio); - folio_get(folio); - rac->ra->size -= nr; - if (rac->ra->async_size >= nr) { - rac->ra->async_size -= nr; - filemap_remove_folio(folio); - } + filemap_remove_folio(folio); folio_unlock(folio); folio_put(folio); } @@ -188,6 +178,18 @@ static void read_pages(struct readahead_control *rac) BUG_ON(readahead_count(rac)); } +static struct folio *ractl_alloc_folio(struct readahead_control *ractl, + gfp_t gfp_mask, unsigned int order) +{ + struct folio *folio; + + folio = filemap_alloc_folio(gfp_mask, order); + if (folio && ractl->dropbehind) + __folio_set_dropbehind(folio); + + return folio; +} + /** * page_cache_ra_unbounded - Start unchecked readahead. * @ractl: Readahead control. @@ -265,8 +267,8 @@ void page_cache_ra_unbounded(struct readahead_control *ractl, continue; } - folio = filemap_alloc_folio(gfp_mask, - mapping_min_folio_order(mapping)); + folio = ractl_alloc_folio(ractl, gfp_mask, + mapping_min_folio_order(mapping)); if (!folio) break; @@ -436,7 +438,7 @@ static inline int ra_alloc_folio(struct readahead_control *ractl, pgoff_t index, pgoff_t mark, unsigned int order, gfp_t gfp) { int err; - struct folio *folio = filemap_alloc_folio(gfp, order); + struct folio *folio = ractl_alloc_folio(ractl, gfp, order); if (!folio) return -ENOMEM; @@ -455,7 +457,7 @@ static inline int ra_alloc_folio(struct readahead_control *ractl, pgoff_t index, } void page_cache_ra_order(struct readahead_control *ractl, - struct file_ra_state *ra, unsigned int new_order) + struct file_ra_state *ra) { struct address_space *mapping = ractl->mapping; pgoff_t start = readahead_index(ractl); @@ -466,24 +468,21 @@ void page_cache_ra_order(struct readahead_control *ractl, unsigned int nofs; int err = 0; gfp_t gfp = readahead_gfp_mask(mapping); - unsigned int min_ra_size = max(4, mapping_min_folio_nrpages(mapping)); + unsigned int new_order = ra->order; - /* - * Fallback when size < min_nrpages as each folio should be - * at least min_nrpages anyway. - */ - if (!mapping_large_folio_support(mapping) || ra->size < min_ra_size) + if (!mapping_large_folio_support(mapping)) { + ra->order = 0; goto fallback; + } limit = min(limit, index + ra->size - 1); - if (new_order < mapping_max_folio_order(mapping)) - new_order += 2; - new_order = min(mapping_max_folio_order(mapping), new_order); new_order = min_t(unsigned int, new_order, ilog2(ra->size)); new_order = max(new_order, min_order); + ra->order = new_order; + /* See comment in page_cache_ra_unbounded() */ nofs = memalloc_nofs_save(); filemap_invalidate_lock_shared(mapping); @@ -517,12 +516,18 @@ void page_cache_ra_order(struct readahead_control *ractl, /* * If there were already pages in the page cache, then we may have * left some gaps. Let the regular readahead code take care of this - * situation. + * situation below. */ if (!err) return; fallback: - do_page_cache_ra(ractl, ra->size - (index - start), ra->async_size); + /* + * ->readahead() may have updated readahead window size so we have to + * check there's still something to read. + */ + if (ra->size > index - start) + do_page_cache_ra(ractl, ra->size - (index - start), + ra->async_size); } static unsigned long ractl_max_pages(struct readahead_control *ractl, @@ -609,8 +614,9 @@ void page_cache_sync_ra(struct readahead_control *ractl, ra->size = min(contig_count + req_count, max_pages); ra->async_size = 1; readit: + ra->order = 0; ractl->_index = ra->start; - page_cache_ra_order(ractl, ra, 0); + page_cache_ra_order(ractl, ra); } EXPORT_SYMBOL_GPL(page_cache_sync_ra); @@ -620,8 +626,7 @@ void page_cache_async_ra(struct readahead_control *ractl, unsigned long max_pages; struct file_ra_state *ra = ractl->ra; pgoff_t index = readahead_index(ractl); - pgoff_t expected, start; - unsigned int order = folio_order(folio); + pgoff_t expected, start, end, aligned_end, align; /* no readahead */ if (!ra->ra_pages) @@ -644,11 +649,14 @@ void page_cache_async_ra(struct readahead_control *ractl, * Ramp up sizes, and push forward the readahead window. */ expected = round_down(ra->start + ra->size - ra->async_size, - 1UL << order); + folio_nr_pages(folio)); if (index == expected) { ra->start += ra->size; - ra->size = get_next_ra_size(ra, max_pages); - ra->async_size = ra->size; + /* + * In the case of MADV_HUGEPAGE, the actual size might exceed + * the readahead window. + */ + ra->size = max(ra->size, get_next_ra_size(ra, max_pages)); goto readit; } @@ -669,18 +677,30 @@ void page_cache_async_ra(struct readahead_control *ractl, ra->size = start - index; /* old async_size */ ra->size += req_count; ra->size = get_next_ra_size(ra, max_pages); - ra->async_size = ra->size; readit: + ra->order += 2; + align = 1UL << min(ra->order, ffs(max_pages) - 1); + end = ra->start + ra->size; + aligned_end = round_down(end, align); + if (aligned_end > ra->start) + ra->size -= end - aligned_end; + ra->async_size = ra->size; ractl->_index = ra->start; - page_cache_ra_order(ractl, ra, order); + page_cache_ra_order(ractl, ra); } EXPORT_SYMBOL_GPL(page_cache_async_ra); ssize_t ksys_readahead(int fd, loff_t offset, size_t count) { + struct file *file; + const struct inode *inode; + CLASS(fd, f)(fd); + if (fd_empty(f)) + return -EBADF; - if (fd_empty(f) || !(fd_file(f)->f_mode & FMODE_READ)) + file = fd_file(f); + if (!(file->f_mode & FMODE_READ)) return -EBADF; /* @@ -688,9 +708,15 @@ ssize_t ksys_readahead(int fd, loff_t offset, size_t count) * that can execute readahead. If readahead is not possible * on this file, then we must return -EINVAL. */ - if (!fd_file(f)->f_mapping || !fd_file(f)->f_mapping->a_ops || - (!S_ISREG(file_inode(fd_file(f))->i_mode) && - !S_ISBLK(file_inode(fd_file(f))->i_mode))) + if (!file->f_mapping) + return -EINVAL; + if (!file->f_mapping->a_ops) + return -EINVAL; + + inode = file_inode(file); + if (!S_ISREG(inode->i_mode) && !S_ISBLK(inode->i_mode)) + return -EINVAL; + if (IS_ANON_FILE(inode)) return -EINVAL; return vfs_fadvise(fd_file(f), offset, count, POSIX_FADV_WILLNEED); @@ -751,7 +777,7 @@ void readahead_expand(struct readahead_control *ractl, if (folio && !xa_is_value(folio)) return; /* Folio apparently present */ - folio = filemap_alloc_folio(gfp_mask, min_order); + folio = ractl_alloc_folio(ractl, gfp_mask, min_order); if (!folio) return; @@ -780,7 +806,7 @@ void readahead_expand(struct readahead_control *ractl, if (folio && !xa_is_value(folio)) return; /* Folio apparently present */ - folio = filemap_alloc_folio(gfp_mask, min_order); + folio = ractl_alloc_folio(ractl, gfp_mask, min_order); if (!folio) return; |