diff options
Diffstat (limited to 'mm/migrate.c')
| -rw-r--r-- | mm/migrate.c | 92 | 
1 files changed, 43 insertions, 49 deletions
diff --git a/mm/migrate.c b/mm/migrate.c index 84381b55b2bd..f7e4bfdc13b7 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -326,7 +326,7 @@ void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep,  	page = migration_entry_to_page(entry);  	/* -	 * Once radix-tree replacement of page migration started, page_count +	 * Once page cache replacement of page migration started, page_count  	 * *must* be zero. And, we don't want to call wait_on_page_locked()  	 * against a page without get_page().  	 * So, we use get_page_unless_zero(), here. Even failed, page fault @@ -441,10 +441,10 @@ int migrate_page_move_mapping(struct address_space *mapping,  		struct buffer_head *head, enum migrate_mode mode,  		int extra_count)  { +	XA_STATE(xas, &mapping->i_pages, page_index(page));  	struct zone *oldzone, *newzone;  	int dirty;  	int expected_count = 1 + extra_count; -	void **pslot;  	/*  	 * Device public or private pages have an extra refcount as they are @@ -470,21 +470,16 @@ int migrate_page_move_mapping(struct address_space *mapping,  	oldzone = page_zone(page);  	newzone = page_zone(newpage); -	xa_lock_irq(&mapping->i_pages); - -	pslot = radix_tree_lookup_slot(&mapping->i_pages, - 					page_index(page)); +	xas_lock_irq(&xas);  	expected_count += hpage_nr_pages(page) + page_has_private(page); -	if (page_count(page) != expected_count || -		radix_tree_deref_slot_protected(pslot, -					&mapping->i_pages.xa_lock) != page) { -		xa_unlock_irq(&mapping->i_pages); +	if (page_count(page) != expected_count || xas_load(&xas) != page) { +		xas_unlock_irq(&xas);  		return -EAGAIN;  	}  	if (!page_ref_freeze(page, expected_count)) { -		xa_unlock_irq(&mapping->i_pages); +		xas_unlock_irq(&xas);  		return -EAGAIN;  	} @@ -498,7 +493,7 @@ int migrate_page_move_mapping(struct address_space *mapping,  	if (mode == MIGRATE_ASYNC && head &&  			!buffer_migrate_lock_buffers(head, mode)) {  		page_ref_unfreeze(page, expected_count); -		xa_unlock_irq(&mapping->i_pages); +		xas_unlock_irq(&xas);  		return -EAGAIN;  	} @@ -526,16 +521,13 @@ int migrate_page_move_mapping(struct address_space *mapping,  		SetPageDirty(newpage);  	} -	radix_tree_replace_slot(&mapping->i_pages, pslot, newpage); +	xas_store(&xas, newpage);  	if (PageTransHuge(page)) {  		int i; -		int index = page_index(page);  		for (i = 1; i < HPAGE_PMD_NR; i++) { -			pslot = radix_tree_lookup_slot(&mapping->i_pages, -						       index + i); -			radix_tree_replace_slot(&mapping->i_pages, pslot, -						newpage + i); +			xas_next(&xas); +			xas_store(&xas, newpage + i);  		}  	} @@ -546,7 +538,7 @@ int migrate_page_move_mapping(struct address_space *mapping,  	 */  	page_ref_unfreeze(page, expected_count - hpage_nr_pages(page)); -	xa_unlock(&mapping->i_pages); +	xas_unlock(&xas);  	/* Leave irq disabled to prevent preemption while updating stats */  	/* @@ -586,22 +578,18 @@ EXPORT_SYMBOL(migrate_page_move_mapping);  int migrate_huge_page_move_mapping(struct address_space *mapping,  				   struct page *newpage, struct page *page)  { +	XA_STATE(xas, &mapping->i_pages, page_index(page));  	int expected_count; -	void **pslot; - -	xa_lock_irq(&mapping->i_pages); - -	pslot = radix_tree_lookup_slot(&mapping->i_pages, page_index(page)); +	xas_lock_irq(&xas);  	expected_count = 2 + page_has_private(page); -	if (page_count(page) != expected_count || -		radix_tree_deref_slot_protected(pslot, &mapping->i_pages.xa_lock) != page) { -		xa_unlock_irq(&mapping->i_pages); +	if (page_count(page) != expected_count || xas_load(&xas) != page) { +		xas_unlock_irq(&xas);  		return -EAGAIN;  	}  	if (!page_ref_freeze(page, expected_count)) { -		xa_unlock_irq(&mapping->i_pages); +		xas_unlock_irq(&xas);  		return -EAGAIN;  	} @@ -610,11 +598,11 @@ int migrate_huge_page_move_mapping(struct address_space *mapping,  	get_page(newpage); -	radix_tree_replace_slot(&mapping->i_pages, pslot, newpage); +	xas_store(&xas, newpage);  	page_ref_unfreeze(page, expected_count - 1); -	xa_unlock_irq(&mapping->i_pages); +	xas_unlock_irq(&xas);  	return MIGRATEPAGE_SUCCESS;  } @@ -685,6 +673,8 @@ void migrate_page_states(struct page *newpage, struct page *page)  		SetPageActive(newpage);  	} else if (TestClearPageUnevictable(page))  		SetPageUnevictable(newpage); +	if (PageWorkingset(page)) +		SetPageWorkingset(newpage);  	if (PageChecked(page))  		SetPageChecked(newpage);  	if (PageMappedToDisk(page)) @@ -1973,8 +1963,7 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,  	int isolated = 0;  	struct page *new_page = NULL;  	int page_lru = page_is_file_cache(page); -	unsigned long mmun_start = address & HPAGE_PMD_MASK; -	unsigned long mmun_end = mmun_start + HPAGE_PMD_SIZE; +	unsigned long start = address & HPAGE_PMD_MASK;  	new_page = alloc_pages_node(node,  		(GFP_TRANSHUGE_LIGHT | __GFP_THISNODE), @@ -1997,15 +1986,15 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,  	/* anon mapping, we can simply copy page->mapping to the new page: */  	new_page->mapping = page->mapping;  	new_page->index = page->index; +	/* flush the cache before copying using the kernel virtual address */ +	flush_cache_range(vma, start, start + HPAGE_PMD_SIZE);  	migrate_page_copy(new_page, page);  	WARN_ON(PageLRU(new_page));  	/* Recheck the target PMD */ -	mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end);  	ptl = pmd_lock(mm, pmd);  	if (unlikely(!pmd_same(*pmd, entry) || !page_ref_freeze(page, 2))) {  		spin_unlock(ptl); -		mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end);  		/* Reverse changes made by migrate_page_copy() */  		if (TestClearPageActive(new_page)) @@ -2029,16 +2018,26 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,  	entry = maybe_pmd_mkwrite(pmd_mkdirty(entry), vma);  	/* -	 * Clear the old entry under pagetable lock and establish the new PTE. -	 * Any parallel GUP will either observe the old page blocking on the -	 * page lock, block on the page table lock or observe the new page. -	 * The SetPageUptodate on the new page and page_add_new_anon_rmap -	 * guarantee the copy is visible before the pagetable update. +	 * Overwrite the old entry under pagetable lock and establish +	 * the new PTE. Any parallel GUP will either observe the old +	 * page blocking on the page lock, block on the page table +	 * lock or observe the new page. The SetPageUptodate on the +	 * new page and page_add_new_anon_rmap guarantee the copy is +	 * visible before the pagetable update. +	 */ +	page_add_anon_rmap(new_page, vma, start, true); +	/* +	 * At this point the pmd is numa/protnone (i.e. non present) and the TLB +	 * has already been flushed globally.  So no TLB can be currently +	 * caching this non present pmd mapping.  There's no need to clear the +	 * pmd before doing set_pmd_at(), nor to flush the TLB after +	 * set_pmd_at().  Clearing the pmd here would introduce a race +	 * condition against MADV_DONTNEED, because MADV_DONTNEED only holds the +	 * mmap_sem for reading.  If the pmd is set to NULL at any given time, +	 * MADV_DONTNEED won't wait on the pmd lock and it'll skip clearing this +	 * pmd.  	 */ -	flush_cache_range(vma, mmun_start, mmun_end); -	page_add_anon_rmap(new_page, vma, mmun_start, true); -	pmdp_huge_clear_flush_notify(vma, mmun_start, pmd); -	set_pmd_at(mm, mmun_start, pmd, entry); +	set_pmd_at(mm, start, pmd, entry);  	update_mmu_cache_pmd(vma, address, &entry);  	page_ref_unfreeze(page, 2); @@ -2047,11 +2046,6 @@ int migrate_misplaced_transhuge_page(struct mm_struct *mm,  	set_page_owner_migrate_reason(new_page, MR_NUMA_MISPLACED);  	spin_unlock(ptl); -	/* -	 * No need to double call mmu_notifier->invalidate_range() callback as -	 * the above pmdp_huge_clear_flush_notify() did already call it. -	 */ -	mmu_notifier_invalidate_range_only_end(mm, mmun_start, mmun_end);  	/* Take an "isolate" reference and put new page on the LRU. */  	get_page(new_page); @@ -2075,7 +2069,7 @@ out_fail:  	ptl = pmd_lock(mm, pmd);  	if (pmd_same(*pmd, entry)) {  		entry = pmd_modify(entry, vma->vm_page_prot); -		set_pmd_at(mm, mmun_start, pmd, entry); +		set_pmd_at(mm, start, pmd, entry);  		update_mmu_cache_pmd(vma, address, &entry);  	}  	spin_unlock(ptl);  | 
