diff options
Diffstat (limited to 'mm/filemap.c')
| -rw-r--r-- | mm/filemap.c | 61 | 
1 files changed, 57 insertions, 4 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index 8df4797c5287..7437b2bd75c1 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -124,6 +124,15 @@   *    ->private_lock		(zap_pte_range->block_dirty_folio)   */ +static void mapping_set_update(struct xa_state *xas, +		struct address_space *mapping) +{ +	if (dax_mapping(mapping) || shmem_mapping(mapping)) +		return; +	xas_set_update(xas, workingset_update_node); +	xas_set_lru(xas, &shadow_nodes); +} +  static void page_cache_delete(struct address_space *mapping,  				   struct folio *folio, void *shadow)  { @@ -843,7 +852,7 @@ noinline int __filemap_add_folio(struct address_space *mapping,  		struct folio *folio, pgoff_t index, gfp_t gfp, void **shadowp)  {  	XA_STATE(xas, &mapping->i_pages, index); -	int huge = folio_test_hugetlb(folio); +	bool huge = folio_test_hugetlb(folio);  	bool charged = false;  	long nr = 1; @@ -1354,7 +1363,7 @@ void migration_entry_wait_on_locked(swp_entry_t entry, spinlock_t *ptl)  	unsigned long pflags;  	bool in_thrashing;  	wait_queue_head_t *q; -	struct folio *folio = page_folio(pfn_swap_entry_to_page(entry)); +	struct folio *folio = pfn_swap_entry_folio(entry);  	q = folio_waitqueue(folio);  	if (!folio_test_uptodate(folio) && folio_test_workingset(folio)) { @@ -1912,8 +1921,6 @@ no_page:  			gfp_t alloc_gfp = gfp;  			err = -ENOMEM; -			if (order == 1) -				order = 0;  			if (order > 0)  				alloc_gfp |= __GFP_NORETRY | __GFP_NOWARN;  			folio = filemap_alloc_folio(alloc_gfp, order); @@ -3174,6 +3181,48 @@ static struct file *do_async_mmap_readahead(struct vm_fault *vmf,  	return fpin;  } +static vm_fault_t filemap_fault_recheck_pte_none(struct vm_fault *vmf) +{ +	struct vm_area_struct *vma = vmf->vma; +	vm_fault_t ret = 0; +	pte_t *ptep; + +	/* +	 * We might have COW'ed a pagecache folio and might now have an mlocked +	 * anon folio mapped. The original pagecache folio is not mlocked and +	 * might have been evicted. During a read+clear/modify/write update of +	 * the PTE, such as done in do_numa_page()/change_pte_range(), we +	 * temporarily clear the PTE under PT lock and might detect it here as +	 * "none" when not holding the PT lock. +	 * +	 * Not rechecking the PTE under PT lock could result in an unexpected +	 * major fault in an mlock'ed region. Recheck only for this special +	 * scenario while holding the PT lock, to not degrade non-mlocked +	 * scenarios. Recheck the PTE without PT lock firstly, thereby reducing +	 * the number of times we hold PT lock. +	 */ +	if (!(vma->vm_flags & VM_LOCKED)) +		return 0; + +	if (!(vmf->flags & FAULT_FLAG_ORIG_PTE_VALID)) +		return 0; + +	ptep = pte_offset_map(vmf->pmd, vmf->address); +	if (unlikely(!ptep)) +		return VM_FAULT_NOPAGE; + +	if (unlikely(!pte_none(ptep_get_lockless(ptep)))) { +		ret = VM_FAULT_NOPAGE; +	} else { +		spin_lock(vmf->ptl); +		if (unlikely(!pte_none(ptep_get(ptep)))) +			ret = VM_FAULT_NOPAGE; +		spin_unlock(vmf->ptl); +	} +	pte_unmap(ptep); +	return ret; +} +  /**   * filemap_fault - read in file data for page fault handling   * @vmf:	struct vm_fault containing details of the fault @@ -3229,6 +3278,10 @@ vm_fault_t filemap_fault(struct vm_fault *vmf)  			mapping_locked = true;  		}  	} else { +		ret = filemap_fault_recheck_pte_none(vmf); +		if (unlikely(ret)) +			return ret; +  		/* No page in the page cache at all */  		count_vm_event(PGMAJFAULT);  		count_memcg_event_mm(vmf->vma->vm_mm, PGMAJFAULT);  | 
