diff options
Diffstat (limited to 'mm/rmap.c')
-rw-r--r-- | mm/rmap.c | 127 |
1 files changed, 103 insertions, 24 deletions
diff --git a/mm/rmap.c b/mm/rmap.c index 71bd30a147cf..87b9e8ad4509 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -133,9 +133,14 @@ int anon_vma_prepare(struct vm_area_struct *vma) if (unlikely(!anon_vma)) goto out_enomem_free_avc; allocated = anon_vma; + /* + * This VMA had no anon_vma yet. This anon_vma is + * the root of any anon_vma tree that might form. + */ + anon_vma->root = anon_vma; } - spin_lock(&anon_vma->lock); + anon_vma_lock(anon_vma); /* page_table_lock to protect against threads */ spin_lock(&mm->page_table_lock); if (likely(!vma->anon_vma)) { @@ -143,12 +148,12 @@ int anon_vma_prepare(struct vm_area_struct *vma) avc->anon_vma = anon_vma; avc->vma = vma; list_add(&avc->same_vma, &vma->anon_vma_chain); - list_add(&avc->same_anon_vma, &anon_vma->head); + list_add_tail(&avc->same_anon_vma, &anon_vma->head); allocated = NULL; avc = NULL; } spin_unlock(&mm->page_table_lock); - spin_unlock(&anon_vma->lock); + anon_vma_unlock(anon_vma); if (unlikely(allocated)) anon_vma_free(allocated); @@ -171,9 +176,9 @@ static void anon_vma_chain_link(struct vm_area_struct *vma, avc->anon_vma = anon_vma; list_add(&avc->same_vma, &vma->anon_vma_chain); - spin_lock(&anon_vma->lock); + anon_vma_lock(anon_vma); list_add_tail(&avc->same_anon_vma, &anon_vma->head); - spin_unlock(&anon_vma->lock); + anon_vma_unlock(anon_vma); } /* @@ -225,9 +230,21 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma) avc = anon_vma_chain_alloc(); if (!avc) goto out_error_free_anon_vma; - anon_vma_chain_link(vma, avc, anon_vma); + + /* + * The root anon_vma's spinlock is the lock actually used when we + * lock any of the anon_vmas in this anon_vma tree. + */ + anon_vma->root = pvma->anon_vma->root; + /* + * With KSM refcounts, an anon_vma can stay around longer than the + * process it belongs to. The root anon_vma needs to be pinned + * until this anon_vma is freed, because the lock lives in the root. + */ + get_anon_vma(anon_vma->root); /* Mark this anon_vma as the one where our new (COWed) pages go. */ vma->anon_vma = anon_vma; + anon_vma_chain_link(vma, avc, anon_vma); return 0; @@ -247,22 +264,29 @@ static void anon_vma_unlink(struct anon_vma_chain *anon_vma_chain) if (!anon_vma) return; - spin_lock(&anon_vma->lock); + anon_vma_lock(anon_vma); list_del(&anon_vma_chain->same_anon_vma); /* We must garbage collect the anon_vma if it's empty */ empty = list_empty(&anon_vma->head) && !anonvma_external_refcount(anon_vma); - spin_unlock(&anon_vma->lock); + anon_vma_unlock(anon_vma); - if (empty) + if (empty) { + /* We no longer need the root anon_vma */ + if (anon_vma->root != anon_vma) + drop_anon_vma(anon_vma->root); anon_vma_free(anon_vma); + } } void unlink_anon_vmas(struct vm_area_struct *vma) { struct anon_vma_chain *avc, *next; - /* Unlink each anon_vma chained to the VMA. */ + /* + * Unlink each anon_vma chained to the VMA. This list is ordered + * from newest to oldest, ensuring the root anon_vma gets freed last. + */ list_for_each_entry_safe(avc, next, &vma->anon_vma_chain, same_vma) { anon_vma_unlink(avc); list_del(&avc->same_vma); @@ -303,7 +327,7 @@ struct anon_vma *page_lock_anon_vma(struct page *page) goto out; anon_vma = (struct anon_vma *) (anon_mapping - PAGE_MAPPING_ANON); - spin_lock(&anon_vma->lock); + anon_vma_lock(anon_vma); return anon_vma; out: rcu_read_unlock(); @@ -312,7 +336,7 @@ out: void page_unlock_anon_vma(struct anon_vma *anon_vma) { - spin_unlock(&anon_vma->lock); + anon_vma_unlock(anon_vma); rcu_read_unlock(); } @@ -343,9 +367,10 @@ vma_address(struct page *page, struct vm_area_struct *vma) */ unsigned long page_address_in_vma(struct page *page, struct vm_area_struct *vma) { - if (PageAnon(page)) - ; - else if (page->mapping && !(vma->vm_flags & VM_NONLINEAR)) { + if (PageAnon(page)) { + if (vma->anon_vma->root != page_anon_vma(page)->root) + return -EFAULT; + } else if (page->mapping && !(vma->vm_flags & VM_NONLINEAR)) { if (!vma->vm_file || vma->vm_file->f_mapping != page->mapping) return -EFAULT; @@ -753,14 +778,20 @@ static void __page_set_anon_rmap(struct page *page, * If the page isn't exclusively mapped into this vma, * we must use the _oldest_ possible anon_vma for the * page mapping! - * - * So take the last AVC chain entry in the vma, which is - * the deepest ancestor, and use the anon_vma from that. */ if (!exclusive) { - struct anon_vma_chain *avc; - avc = list_entry(vma->anon_vma_chain.prev, struct anon_vma_chain, same_vma); - anon_vma = avc->anon_vma; + if (PageAnon(page)) + return; + anon_vma = anon_vma->root; + } else { + /* + * In this case, swapped-out-but-not-discarded swap-cache + * is remapped. So, no need to update page->mapping here. + * We convice anon_vma poitned by page->mapping is not obsolete + * because vma->anon_vma is necessary to be a family of it. + */ + if (PageAnon(page)) + return; } anon_vma = (void *) anon_vma + PAGE_MAPPING_ANON; @@ -790,6 +821,7 @@ static void __page_check_anon_rmap(struct page *page, * are initially only visible via the pagetables, and the pte is locked * over the call to page_add_new_anon_rmap. */ + BUG_ON(page_anon_vma(page)->root != vma->anon_vma->root); BUG_ON(page->index != linear_page_index(vma, address)); #endif } @@ -808,6 +840,17 @@ static void __page_check_anon_rmap(struct page *page, void page_add_anon_rmap(struct page *page, struct vm_area_struct *vma, unsigned long address) { + do_page_add_anon_rmap(page, vma, address, 0); +} + +/* + * Special version of the above for do_swap_page, which often runs + * into pages that are exclusively owned by the current process. + * Everybody else should continue to use page_add_anon_rmap above. + */ +void do_page_add_anon_rmap(struct page *page, + struct vm_area_struct *vma, unsigned long address, int exclusive) +{ int first = atomic_inc_and_test(&page->_mapcount); if (first) __inc_zone_page_state(page, NR_ANON_PAGES); @@ -817,7 +860,7 @@ void page_add_anon_rmap(struct page *page, VM_BUG_ON(!PageLocked(page)); VM_BUG_ON(address < vma->vm_start || address >= vma->vm_end); if (first) - __page_set_anon_rmap(page, vma, address, 0); + __page_set_anon_rmap(page, vma, address, exclusive); else __page_check_anon_rmap(page, vma, address); } @@ -1384,6 +1427,42 @@ int try_to_munlock(struct page *page) return try_to_unmap_file(page, TTU_MUNLOCK); } +#if defined(CONFIG_KSM) || defined(CONFIG_MIGRATION) +/* + * Drop an anon_vma refcount, freeing the anon_vma and anon_vma->root + * if necessary. Be careful to do all the tests under the lock. Once + * we know we are the last user, nobody else can get a reference and we + * can do the freeing without the lock. + */ +void drop_anon_vma(struct anon_vma *anon_vma) +{ + BUG_ON(atomic_read(&anon_vma->external_refcount) <= 0); + if (atomic_dec_and_lock(&anon_vma->external_refcount, &anon_vma->root->lock)) { + struct anon_vma *root = anon_vma->root; + int empty = list_empty(&anon_vma->head); + int last_root_user = 0; + int root_empty = 0; + + /* + * The refcount on a non-root anon_vma got dropped. Drop + * the refcount on the root and check if we need to free it. + */ + if (empty && anon_vma != root) { + BUG_ON(atomic_read(&root->external_refcount) <= 0); + last_root_user = atomic_dec_and_test(&root->external_refcount); + root_empty = list_empty(&root->head); + } + anon_vma_unlock(anon_vma); + + if (empty) { + anon_vma_free(anon_vma); + if (root_empty && last_root_user) + anon_vma_free(root); + } + } +} +#endif + #ifdef CONFIG_MIGRATION /* * rmap_walk() and its helpers rmap_walk_anon() and rmap_walk_file(): @@ -1405,7 +1484,7 @@ static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *, anon_vma = page_anon_vma(page); if (!anon_vma) return ret; - spin_lock(&anon_vma->lock); + anon_vma_lock(anon_vma); list_for_each_entry(avc, &anon_vma->head, same_anon_vma) { struct vm_area_struct *vma = avc->vma; unsigned long address = vma_address(page, vma); @@ -1415,7 +1494,7 @@ static int rmap_walk_anon(struct page *page, int (*rmap_one)(struct page *, if (ret != SWAP_AGAIN) break; } - spin_unlock(&anon_vma->lock); + anon_vma_unlock(anon_vma); return ret; } |