diff options
Diffstat (limited to 'mm/migrate.c')
-rw-r--r-- | mm/migrate.c | 264 |
1 files changed, 150 insertions, 114 deletions
diff --git a/mm/migrate.c b/mm/migrate.c index f7e4bfdc13b7..5d1839a9148d 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -327,16 +327,13 @@ void __migration_entry_wait(struct mm_struct *mm, pte_t *ptep, /* * Once page cache replacement of page migration started, page_count - * *must* be zero. And, we don't want to call wait_on_page_locked() - * against a page without get_page(). - * So, we use get_page_unless_zero(), here. Even failed, page fault - * will occur again. + * is zero; but we must not call put_and_wait_on_page_locked() without + * a ref. Use get_page_unless_zero(), and just fault again if it fails. */ if (!get_page_unless_zero(page)) goto out; pte_unmap_unlock(ptep, ptl); - wait_on_page_locked(page); - put_page(page); + put_and_wait_on_page_locked(page); return; out: pte_unmap_unlock(ptep, ptl); @@ -370,63 +367,28 @@ void pmd_migration_entry_wait(struct mm_struct *mm, pmd_t *pmd) if (!get_page_unless_zero(page)) goto unlock; spin_unlock(ptl); - wait_on_page_locked(page); - put_page(page); + put_and_wait_on_page_locked(page); return; unlock: spin_unlock(ptl); } #endif -#ifdef CONFIG_BLOCK -/* Returns true if all buffers are successfully locked */ -static bool buffer_migrate_lock_buffers(struct buffer_head *head, - enum migrate_mode mode) +static int expected_page_refs(struct page *page) { - struct buffer_head *bh = head; - - /* Simple case, sync compaction */ - if (mode != MIGRATE_ASYNC) { - do { - get_bh(bh); - lock_buffer(bh); - bh = bh->b_this_page; - - } while (bh != head); + int expected_count = 1; - return true; - } - - /* async case, we cannot block on lock_buffer so use trylock_buffer */ - do { - get_bh(bh); - if (!trylock_buffer(bh)) { - /* - * We failed to lock the buffer and cannot stall in - * async migration. Release the taken locks - */ - struct buffer_head *failed_bh = bh; - put_bh(failed_bh); - bh = head; - while (bh != failed_bh) { - unlock_buffer(bh); - put_bh(bh); - bh = bh->b_this_page; - } - return false; - } + /* + * Device public or private pages have an extra refcount as they are + * ZONE_DEVICE pages. + */ + expected_count += is_device_private_page(page); + expected_count += is_device_public_page(page); + if (page_mapping(page)) + expected_count += hpage_nr_pages(page) + page_has_private(page); - bh = bh->b_this_page; - } while (bh != head); - return true; -} -#else -static inline bool buffer_migrate_lock_buffers(struct buffer_head *head, - enum migrate_mode mode) -{ - return true; + return expected_count; } -#endif /* CONFIG_BLOCK */ /* * Replace the page in the mapping. @@ -437,21 +399,13 @@ static inline bool buffer_migrate_lock_buffers(struct buffer_head *head, * 3 for pages with a mapping and PagePrivate/PagePrivate2 set. */ int migrate_page_move_mapping(struct address_space *mapping, - struct page *newpage, struct page *page, - struct buffer_head *head, enum migrate_mode mode, + struct page *newpage, struct page *page, enum migrate_mode mode, int extra_count) { XA_STATE(xas, &mapping->i_pages, page_index(page)); struct zone *oldzone, *newzone; int dirty; - int expected_count = 1 + extra_count; - - /* - * Device public or private pages have an extra refcount as they are - * ZONE_DEVICE pages. - */ - expected_count += is_device_private_page(page); - expected_count += is_device_public_page(page); + int expected_count = expected_page_refs(page) + extra_count; if (!mapping) { /* Anonymous page without mapping */ @@ -471,8 +425,6 @@ int migrate_page_move_mapping(struct address_space *mapping, newzone = page_zone(newpage); xas_lock_irq(&xas); - - expected_count += hpage_nr_pages(page) + page_has_private(page); if (page_count(page) != expected_count || xas_load(&xas) != page) { xas_unlock_irq(&xas); return -EAGAIN; @@ -484,20 +436,6 @@ int migrate_page_move_mapping(struct address_space *mapping, } /* - * In the async migration case of moving a page with buffers, lock the - * buffers using trylock before the mapping is moved. If the mapping - * was moved, we later failed to lock the buffers and could not move - * the mapping back due to an elevated page count, we would have to - * block waiting on other references to be dropped. - */ - if (mode == MIGRATE_ASYNC && head && - !buffer_migrate_lock_buffers(head, mode)) { - page_ref_unfreeze(page, expected_count); - xas_unlock_irq(&xas); - return -EAGAIN; - } - - /* * Now we know that no one else is looking at the page: * no turning back from here. */ @@ -748,7 +686,7 @@ int migrate_page(struct address_space *mapping, BUG_ON(PageWriteback(page)); /* Writeback must be complete */ - rc = migrate_page_move_mapping(mapping, newpage, page, NULL, mode, 0); + rc = migrate_page_move_mapping(mapping, newpage, page, mode, 0); if (rc != MIGRATEPAGE_SUCCESS) return rc; @@ -762,34 +700,98 @@ int migrate_page(struct address_space *mapping, EXPORT_SYMBOL(migrate_page); #ifdef CONFIG_BLOCK -/* - * Migration function for pages with buffers. This function can only be used - * if the underlying filesystem guarantees that no other references to "page" - * exist. - */ -int buffer_migrate_page(struct address_space *mapping, - struct page *newpage, struct page *page, enum migrate_mode mode) +/* Returns true if all buffers are successfully locked */ +static bool buffer_migrate_lock_buffers(struct buffer_head *head, + enum migrate_mode mode) +{ + struct buffer_head *bh = head; + + /* Simple case, sync compaction */ + if (mode != MIGRATE_ASYNC) { + do { + get_bh(bh); + lock_buffer(bh); + bh = bh->b_this_page; + + } while (bh != head); + + return true; + } + + /* async case, we cannot block on lock_buffer so use trylock_buffer */ + do { + get_bh(bh); + if (!trylock_buffer(bh)) { + /* + * We failed to lock the buffer and cannot stall in + * async migration. Release the taken locks + */ + struct buffer_head *failed_bh = bh; + put_bh(failed_bh); + bh = head; + while (bh != failed_bh) { + unlock_buffer(bh); + put_bh(bh); + bh = bh->b_this_page; + } + return false; + } + + bh = bh->b_this_page; + } while (bh != head); + return true; +} + +static int __buffer_migrate_page(struct address_space *mapping, + struct page *newpage, struct page *page, enum migrate_mode mode, + bool check_refs) { struct buffer_head *bh, *head; int rc; + int expected_count; if (!page_has_buffers(page)) return migrate_page(mapping, newpage, page, mode); + /* Check whether page does not have extra refs before we do more work */ + expected_count = expected_page_refs(page); + if (page_count(page) != expected_count) + return -EAGAIN; + head = page_buffers(page); + if (!buffer_migrate_lock_buffers(head, mode)) + return -EAGAIN; - rc = migrate_page_move_mapping(mapping, newpage, page, head, mode, 0); + if (check_refs) { + bool busy; + bool invalidated = false; - if (rc != MIGRATEPAGE_SUCCESS) - return rc; +recheck_buffers: + busy = false; + spin_lock(&mapping->private_lock); + bh = head; + do { + if (atomic_read(&bh->b_count)) { + busy = true; + break; + } + bh = bh->b_this_page; + } while (bh != head); + spin_unlock(&mapping->private_lock); + if (busy) { + if (invalidated) { + rc = -EAGAIN; + goto unlock_buffers; + } + invalidate_bh_lrus(); + invalidated = true; + goto recheck_buffers; + } + } - /* - * In the async case, migrate_page_move_mapping locked the buffers - * with an IRQ-safe spinlock held. In the sync case, the buffers - * need to be locked now - */ - if (mode != MIGRATE_ASYNC) - BUG_ON(!buffer_migrate_lock_buffers(head, mode)); + rc = migrate_page_move_mapping(mapping, newpage, page, mode, 0); + if (rc != MIGRATEPAGE_SUCCESS) + goto unlock_buffers; ClearPagePrivate(page); set_page_private(newpage, page_private(page)); @@ -811,6 +813,8 @@ int buffer_migrate_page(struct address_space *mapping, else migrate_page_states(newpage, page); + rc = MIGRATEPAGE_SUCCESS; +unlock_buffers: bh = head; do { unlock_buffer(bh); @@ -819,9 +823,32 @@ int buffer_migrate_page(struct address_space *mapping, } while (bh != head); - return MIGRATEPAGE_SUCCESS; + return rc; +} + +/* + * Migration function for pages with buffers. This function can only be used + * if the underlying filesystem guarantees that no other references to "page" + * exist. For example attached buffer heads are accessed only under page lock. + */ +int buffer_migrate_page(struct address_space *mapping, + struct page *newpage, struct page *page, enum migrate_mode mode) +{ + return __buffer_migrate_page(mapping, newpage, page, mode, false); } EXPORT_SYMBOL(buffer_migrate_page); + +/* + * Same as above except that this variant is more careful and checks that there + * are also no buffer head references. This function is the right one for + * mappings where buffer heads are directly looked up and referenced (such as + * block device mappings). + */ +int buffer_migrate_page_norefs(struct address_space *mapping, + struct page *newpage, struct page *page, enum migrate_mode mode) +{ + return __buffer_migrate_page(mapping, newpage, page, mode, true); +} #endif /* @@ -1297,8 +1324,19 @@ static int unmap_and_move_huge_page(new_page_t get_new_page, goto put_anon; if (page_mapped(hpage)) { + struct address_space *mapping = page_mapping(hpage); + + /* + * try_to_unmap could potentially call huge_pmd_unshare. + * Because of this, take semaphore in write mode here and + * set TTU_RMAP_LOCKED to let lower levels know we have + * taken the lock. + */ + i_mmap_lock_write(mapping); try_to_unmap(hpage, - TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS); + TTU_MIGRATION|TTU_IGNORE_MLOCK|TTU_IGNORE_ACCESS| + TTU_RMAP_LOCKED); + i_mmap_unlock_write(mapping); page_was_mapped = 1; } @@ -2303,6 +2341,7 @@ next: */ static void migrate_vma_collect(struct migrate_vma *migrate) { + struct mmu_notifier_range range; struct mm_walk mm_walk; mm_walk.pmd_entry = migrate_vma_collect_pmd; @@ -2314,13 +2353,11 @@ static void migrate_vma_collect(struct migrate_vma *migrate) mm_walk.mm = migrate->vma->vm_mm; mm_walk.private = migrate; - mmu_notifier_invalidate_range_start(mm_walk.mm, - migrate->start, - migrate->end); + mmu_notifier_range_init(&range, mm_walk.mm, migrate->start, + migrate->end); + mmu_notifier_invalidate_range_start(&range); walk_page_range(migrate->start, migrate->end, &mm_walk); - mmu_notifier_invalidate_range_end(mm_walk.mm, - migrate->start, - migrate->end); + mmu_notifier_invalidate_range_end(&range); migrate->end = migrate->start + (migrate->npages << PAGE_SHIFT); } @@ -2701,9 +2738,8 @@ static void migrate_vma_pages(struct migrate_vma *migrate) { const unsigned long npages = migrate->npages; const unsigned long start = migrate->start; - struct vm_area_struct *vma = migrate->vma; - struct mm_struct *mm = vma->vm_mm; - unsigned long addr, i, mmu_start; + struct mmu_notifier_range range; + unsigned long addr, i; bool notified = false; for (i = 0, addr = start; i < npages; addr += PAGE_SIZE, i++) { @@ -2722,11 +2758,12 @@ static void migrate_vma_pages(struct migrate_vma *migrate) continue; } if (!notified) { - mmu_start = addr; notified = true; - mmu_notifier_invalidate_range_start(mm, - mmu_start, - migrate->end); + + mmu_notifier_range_init(&range, + migrate->vma->vm_mm, + addr, migrate->end); + mmu_notifier_invalidate_range_start(&range); } migrate_vma_insert_page(migrate, addr, newpage, &migrate->src[i], @@ -2767,8 +2804,7 @@ static void migrate_vma_pages(struct migrate_vma *migrate) * did already call it. */ if (notified) - mmu_notifier_invalidate_range_only_end(mm, mmu_start, - migrate->end); + mmu_notifier_invalidate_range_only_end(&range); } /* |