diff options
Diffstat (limited to 'mm/filemap.c')
-rw-r--r-- | mm/filemap.c | 242 |
1 files changed, 133 insertions, 109 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index 9e44a49bbd74..bf6219d9aaac 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1669,46 +1669,47 @@ static int __folio_lock_async(struct folio *folio, struct wait_page_queue *wait) /* * Return values: - * true - folio is locked; mmap_lock is still held. - * false - folio is not locked. - * mmap_lock has been released (mmap_read_unlock(), unless flags had both - * FAULT_FLAG_ALLOW_RETRY and FAULT_FLAG_RETRY_NOWAIT set, in - * which case mmap_lock is still held. - * - * If neither ALLOW_RETRY nor KILLABLE are set, will always return true - * with the folio locked and the mmap_lock unperturbed. + * 0 - folio is locked. + * non-zero - folio is not locked. + * mmap_lock or per-VMA lock has been released (mmap_read_unlock() or + * vma_end_read()), unless flags had both FAULT_FLAG_ALLOW_RETRY and + * FAULT_FLAG_RETRY_NOWAIT set, in which case the lock is still held. + * + * If neither ALLOW_RETRY nor KILLABLE are set, will always return 0 + * with the folio locked and the mmap_lock/per-VMA lock is left unperturbed. */ -bool __folio_lock_or_retry(struct folio *folio, struct mm_struct *mm, - unsigned int flags) +vm_fault_t __folio_lock_or_retry(struct folio *folio, struct vm_fault *vmf) { + unsigned int flags = vmf->flags; + if (fault_flag_allow_retry_first(flags)) { /* - * CAUTION! In this case, mmap_lock is not released - * even though return 0. + * CAUTION! In this case, mmap_lock/per-VMA lock is not + * released even though returning VM_FAULT_RETRY. */ if (flags & FAULT_FLAG_RETRY_NOWAIT) - return false; + return VM_FAULT_RETRY; - mmap_read_unlock(mm); + release_fault_lock(vmf); if (flags & FAULT_FLAG_KILLABLE) folio_wait_locked_killable(folio); else folio_wait_locked(folio); - return false; + return VM_FAULT_RETRY; } if (flags & FAULT_FLAG_KILLABLE) { bool ret; ret = __folio_lock_killable(folio); if (ret) { - mmap_read_unlock(mm); - return false; + release_fault_lock(vmf); + return VM_FAULT_RETRY; } } else { __folio_lock(folio); } - return true; + return 0; } /** @@ -1855,30 +1856,15 @@ out: * * Looks up the page cache entry at @mapping & @index. * - * @fgp_flags can be zero or more of these flags: - * - * * %FGP_ACCESSED - The folio will be marked accessed. - * * %FGP_LOCK - The folio is returned locked. - * * %FGP_CREAT - If no page is present then a new page is allocated using - * @gfp and added to the page cache and the VM's LRU list. - * The page is returned locked and with an increased refcount. - * * %FGP_FOR_MMAP - The caller wants to do its own locking dance if the - * page is already in cache. If the page was allocated, unlock it before - * returning so the caller can do the same dance. - * * %FGP_WRITE - The page will be written to by the caller. - * * %FGP_NOFS - __GFP_FS will get cleared in gfp. - * * %FGP_NOWAIT - Don't get blocked by page lock. - * * %FGP_STABLE - Wait for the folio to be stable (finished writeback) - * * If %FGP_LOCK or %FGP_CREAT are specified then the function may sleep even * if the %GFP flags specified for %FGP_CREAT are atomic. * - * If there is a page cache page, it is returned with an increased refcount. + * If this function returns a folio, it is returned with an increased refcount. * * Return: The found folio or an ERR_PTR() otherwise. */ struct folio *__filemap_get_folio(struct address_space *mapping, pgoff_t index, - int fgp_flags, gfp_t gfp) + fgf_t fgp_flags, gfp_t gfp) { struct folio *folio; @@ -1920,7 +1906,9 @@ repeat: folio_wait_stable(folio); no_page: if (!folio && (fgp_flags & FGP_CREAT)) { + unsigned order = FGF_GET_ORDER(fgp_flags); int err; + if ((fgp_flags & FGP_WRITE) && mapping_can_writeback(mapping)) gfp |= __GFP_WRITE; if (fgp_flags & FGP_NOFS) @@ -1929,26 +1917,44 @@ no_page: gfp &= ~GFP_KERNEL; gfp |= GFP_NOWAIT | __GFP_NOWARN; } - - folio = filemap_alloc_folio(gfp, 0); - if (!folio) - return ERR_PTR(-ENOMEM); - if (WARN_ON_ONCE(!(fgp_flags & (FGP_LOCK | FGP_FOR_MMAP)))) fgp_flags |= FGP_LOCK; - /* Init accessed so avoid atomic mark_page_accessed later */ - if (fgp_flags & FGP_ACCESSED) - __folio_set_referenced(folio); + if (!mapping_large_folio_support(mapping)) + order = 0; + if (order > MAX_PAGECACHE_ORDER) + order = MAX_PAGECACHE_ORDER; + /* If we're not aligned, allocate a smaller folio */ + if (index & ((1UL << order) - 1)) + order = __ffs(index); - err = filemap_add_folio(mapping, folio, index, gfp); - if (unlikely(err)) { + do { + gfp_t alloc_gfp = gfp; + + err = -ENOMEM; + if (order == 1) + order = 0; + if (order > 0) + alloc_gfp |= __GFP_NORETRY | __GFP_NOWARN; + folio = filemap_alloc_folio(alloc_gfp, order); + if (!folio) + continue; + + /* Init accessed so avoid atomic mark_page_accessed later */ + if (fgp_flags & FGP_ACCESSED) + __folio_set_referenced(folio); + + err = filemap_add_folio(mapping, folio, index, gfp); + if (!err) + break; folio_put(folio); folio = NULL; - if (err == -EEXIST) - goto repeat; - } + } while (order-- > 0); + if (err == -EEXIST) + goto repeat; + if (err) + return ERR_PTR(err); /* * filemap_add_folio locks the page, and for mmap * we expect an unlocked page. @@ -2075,7 +2081,7 @@ unsigned find_lock_entries(struct address_space *mapping, pgoff_t *start, if (!xa_is_value(folio)) { if (folio->index < *start) goto put; - if (folio->index + folio_nr_pages(folio) - 1 > end) + if (folio_next_index(folio) - 1 > end) goto put; if (!folio_trylock(folio)) goto put; @@ -2167,16 +2173,6 @@ out: } EXPORT_SYMBOL(filemap_get_folios); -static inline -bool folio_more_pages(struct folio *folio, pgoff_t index, pgoff_t max) -{ - if (!folio_test_large(folio) || folio_test_hugetlb(folio)) - return false; - if (index >= max) - return false; - return index < folio->index + folio_nr_pages(folio) - 1; -} - /** * filemap_get_folios_contig - Get a batch of contiguous folios * @mapping: The address_space to search @@ -2242,7 +2238,7 @@ update_start: if (folio_test_hugetlb(folio)) *start = folio->index + 1; else - *start = folio->index + folio_nr_pages(folio); + *start = folio_next_index(folio); } out: rcu_read_unlock(); @@ -2359,7 +2355,7 @@ static void filemap_get_read_batch(struct address_space *mapping, break; if (folio_test_readahead(folio)) break; - xas_advance(&xas, folio->index + folio_nr_pages(folio) - 1); + xas_advance(&xas, folio_next_index(folio) - 1); continue; put_folio: folio_put(folio); @@ -2632,6 +2628,7 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter, int i, error = 0; bool writably_mapped; loff_t isize, end_offset; + loff_t last_pos = ra->prev_pos; if (unlikely(iocb->ki_pos >= inode->i_sb->s_maxbytes)) return 0; @@ -2682,8 +2679,8 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter, * When a read accesses the same folio several times, only * mark it as accessed the first time. */ - if (!pos_same_folio(iocb->ki_pos, ra->prev_pos - 1, - fbatch.folios[0])) + if (!pos_same_folio(iocb->ki_pos, last_pos - 1, + fbatch.folios[0])) folio_mark_accessed(fbatch.folios[0]); for (i = 0; i < folio_batch_count(&fbatch); i++) { @@ -2710,7 +2707,7 @@ ssize_t filemap_read(struct kiocb *iocb, struct iov_iter *iter, already_read += copied; iocb->ki_pos += copied; - ra->prev_pos = iocb->ki_pos; + last_pos = iocb->ki_pos; if (copied < bytes) { error = -EFAULT; @@ -2724,7 +2721,7 @@ put_folios: } while (iov_iter_count(iter) && iocb->ki_pos < isize && !error); file_accessed(filp); - + ra->prev_pos = last_pos; return already_read ? already_read : error; } EXPORT_SYMBOL_GPL(filemap_read); @@ -3434,10 +3431,10 @@ static bool filemap_map_pmd(struct vm_fault *vmf, struct folio *folio, return false; } -static struct folio *next_uptodate_page(struct folio *folio, - struct address_space *mapping, - struct xa_state *xas, pgoff_t end_pgoff) +static struct folio *next_uptodate_folio(struct xa_state *xas, + struct address_space *mapping, pgoff_t end_pgoff) { + struct folio *folio = xas_next_entry(xas, end_pgoff); unsigned long max_idx; do { @@ -3475,20 +3472,65 @@ skip: return NULL; } -static inline struct folio *first_map_page(struct address_space *mapping, - struct xa_state *xas, - pgoff_t end_pgoff) +/* + * Map page range [start_page, start_page + nr_pages) of folio. + * start_page is gotten from start by folio_page(folio, start) + */ +static vm_fault_t filemap_map_folio_range(struct vm_fault *vmf, + struct folio *folio, unsigned long start, + unsigned long addr, unsigned int nr_pages) { - return next_uptodate_page(xas_find(xas, end_pgoff), - mapping, xas, end_pgoff); -} + vm_fault_t ret = 0; + struct vm_area_struct *vma = vmf->vma; + struct file *file = vma->vm_file; + struct page *page = folio_page(folio, start); + unsigned int mmap_miss = READ_ONCE(file->f_ra.mmap_miss); + unsigned int count = 0; + pte_t *old_ptep = vmf->pte; -static inline struct folio *next_map_page(struct address_space *mapping, - struct xa_state *xas, - pgoff_t end_pgoff) -{ - return next_uptodate_page(xas_next_entry(xas, end_pgoff), - mapping, xas, end_pgoff); + do { + if (PageHWPoison(page + count)) + goto skip; + + if (mmap_miss > 0) + mmap_miss--; + + /* + * NOTE: If there're PTE markers, we'll leave them to be + * handled in the specific fault path, and it'll prohibit the + * fault-around logic. + */ + if (!pte_none(vmf->pte[count])) + goto skip; + + count++; + continue; +skip: + if (count) { + set_pte_range(vmf, folio, page, count, addr); + folio_ref_add(folio, count); + if (in_range(vmf->address, addr, count)) + ret = VM_FAULT_NOPAGE; + } + + count++; + page += count; + vmf->pte += count; + addr += count * PAGE_SIZE; + count = 0; + } while (--nr_pages > 0); + + if (count) { + set_pte_range(vmf, folio, page, count, addr); + folio_ref_add(folio, count); + if (in_range(vmf->address, addr, count)) + ret = VM_FAULT_NOPAGE; + } + + vmf->pte = old_ptep; + WRITE_ONCE(file->f_ra.mmap_miss, mmap_miss); + + return ret; } vm_fault_t filemap_map_pages(struct vm_fault *vmf, @@ -3501,12 +3543,11 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf, unsigned long addr; XA_STATE(xas, &mapping->i_pages, start_pgoff); struct folio *folio; - struct page *page; - unsigned int mmap_miss = READ_ONCE(file->f_ra.mmap_miss); vm_fault_t ret = 0; + int nr_pages = 0; rcu_read_lock(); - folio = first_map_page(mapping, &xas, end_pgoff); + folio = next_uptodate_folio(&xas, mapping, end_pgoff); if (!folio) goto out; @@ -3523,17 +3564,13 @@ vm_fault_t filemap_map_pages(struct vm_fault *vmf, goto out; } do { -again: - page = folio_file_page(folio, xas.xa_index); - if (PageHWPoison(page)) - goto unlock; - - if (mmap_miss > 0) - mmap_miss--; + unsigned long end; addr += (xas.xa_index - last_pgoff) << PAGE_SHIFT; vmf->pte += xas.xa_index - last_pgoff; last_pgoff = xas.xa_index; + end = folio->index + folio_nr_pages(folio) - 1; + nr_pages = min(end, end_pgoff) - xas.xa_index + 1; /* * NOTE: If there're PTE markers, we'll leave them to be @@ -3543,32 +3580,17 @@ again: if (!pte_none(ptep_get(vmf->pte))) goto unlock; - /* We're about to handle the fault */ - if (vmf->address == addr) - ret = VM_FAULT_NOPAGE; + ret |= filemap_map_folio_range(vmf, folio, + xas.xa_index - folio->index, addr, nr_pages); - do_set_pte(vmf, page, addr); - /* no need to invalidate: a not-present page won't be cached */ - update_mmu_cache(vma, addr, vmf->pte); - if (folio_more_pages(folio, xas.xa_index, end_pgoff)) { - xas.xa_index++; - folio_ref_inc(folio); - goto again; - } - folio_unlock(folio); - continue; unlock: - if (folio_more_pages(folio, xas.xa_index, end_pgoff)) { - xas.xa_index++; - goto again; - } folio_unlock(folio); folio_put(folio); - } while ((folio = next_map_page(mapping, &xas, end_pgoff)) != NULL); + folio = next_uptodate_folio(&xas, mapping, end_pgoff); + } while (folio); pte_unmap_unlock(vmf->pte, vmf->ptl); out: rcu_read_unlock(); - WRITE_ONCE(file->f_ra.mmap_miss, mmap_miss); return ret; } EXPORT_SYMBOL(filemap_map_pages); @@ -4072,6 +4094,8 @@ bool filemap_release_folio(struct folio *folio, gfp_t gfp) struct address_space * const mapping = folio->mapping; BUG_ON(!folio_test_locked(folio)); + if (!folio_needs_release(folio)) + return true; if (folio_test_writeback(folio)) return false; |