diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/cma.c | 23 | ||||
-rw-r--r-- | mm/hugetlb.c | 39 | ||||
-rw-r--r-- | mm/khugepaged.c | 72 | ||||
-rw-r--r-- | mm/memory.c | 3 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 5 | ||||
-rw-r--r-- | mm/mmap.c | 1 | ||||
-rw-r--r-- | mm/page_alloc.c | 7 | ||||
-rw-r--r-- | mm/page_counter.c | 6 | ||||
-rw-r--r-- | mm/rmap.c | 2 | ||||
-rw-r--r-- | mm/shuffle.c | 18 | ||||
-rw-r--r-- | mm/vmalloc.c | 2 | ||||
-rw-r--r-- | mm/vmstat.c | 12 |
12 files changed, 94 insertions, 96 deletions
@@ -93,17 +93,15 @@ static void cma_clear_bitmap(struct cma *cma, unsigned long pfn, mutex_unlock(&cma->lock); } -static int __init cma_activate_area(struct cma *cma) +static void __init cma_activate_area(struct cma *cma) { unsigned long base_pfn = cma->base_pfn, pfn = base_pfn; unsigned i = cma->count >> pageblock_order; struct zone *zone; cma->bitmap = bitmap_zalloc(cma_bitmap_maxno(cma), GFP_KERNEL); - if (!cma->bitmap) { - cma->count = 0; - return -ENOMEM; - } + if (!cma->bitmap) + goto out_error; WARN_ON_ONCE(!pfn_valid(pfn)); zone = page_zone(pfn_to_page(pfn)); @@ -133,25 +131,22 @@ static int __init cma_activate_area(struct cma *cma) spin_lock_init(&cma->mem_head_lock); #endif - return 0; + return; not_in_zone: - pr_err("CMA area %s could not be activated\n", cma->name); bitmap_free(cma->bitmap); +out_error: cma->count = 0; - return -EINVAL; + pr_err("CMA area %s could not be activated\n", cma->name); + return; } static int __init cma_init_reserved_areas(void) { int i; - for (i = 0; i < cma_area_count; i++) { - int ret = cma_activate_area(&cma_areas[i]); - - if (ret) - return ret; - } + for (i = 0; i < cma_area_count; i++) + cma_activate_area(&cma_areas[i]); return 0; } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 590111ea6975..7952c6cb6f08 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -3952,7 +3952,7 @@ void __unmap_hugepage_range(struct mmu_gather *tlb, struct vm_area_struct *vma, continue; ptl = huge_pte_lock(h, mm, ptep); - if (huge_pmd_unshare(mm, &address, ptep)) { + if (huge_pmd_unshare(mm, vma, &address, ptep)) { spin_unlock(ptl); /* * We just unmapped a page of PMDs by clearing a PUD. @@ -4539,10 +4539,6 @@ vm_fault_t hugetlb_fault(struct mm_struct *mm, struct vm_area_struct *vma, } else if (unlikely(is_hugetlb_entry_hwpoisoned(entry))) return VM_FAULT_HWPOISON_LARGE | VM_FAULT_SET_HINDEX(hstate_index(h)); - } else { - ptep = huge_pte_alloc(mm, haddr, huge_page_size(h)); - if (!ptep) - return VM_FAULT_OOM; } /* @@ -5019,7 +5015,7 @@ unsigned long hugetlb_change_protection(struct vm_area_struct *vma, if (!ptep) continue; ptl = huge_pte_lock(h, mm, ptep); - if (huge_pmd_unshare(mm, &address, ptep)) { + if (huge_pmd_unshare(mm, vma, &address, ptep)) { pages++; spin_unlock(ptl); shared_pmd = true; @@ -5313,25 +5309,21 @@ static bool vma_shareable(struct vm_area_struct *vma, unsigned long addr) void adjust_range_if_pmd_sharing_possible(struct vm_area_struct *vma, unsigned long *start, unsigned long *end) { - unsigned long check_addr; + unsigned long a_start, a_end; if (!(vma->vm_flags & VM_MAYSHARE)) return; - for (check_addr = *start; check_addr < *end; check_addr += PUD_SIZE) { - unsigned long a_start = check_addr & PUD_MASK; - unsigned long a_end = a_start + PUD_SIZE; + /* Extend the range to be PUD aligned for a worst case scenario */ + a_start = ALIGN_DOWN(*start, PUD_SIZE); + a_end = ALIGN(*end, PUD_SIZE); - /* - * If sharing is possible, adjust start/end if necessary. - */ - if (range_in_vma(vma, a_start, a_end)) { - if (a_start < *start) - *start = a_start; - if (a_end > *end) - *end = a_end; - } - } + /* + * Intersect the range with the vma range, since pmd sharing won't be + * across vma after all + */ + *start = max(vma->vm_start, a_start); + *end = min(vma->vm_end, a_end); } /* @@ -5404,12 +5396,14 @@ out: * returns: 1 successfully unmapped a shared pte page * 0 the underlying pte page is not shared, or it is the last user */ -int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) +int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long *addr, pte_t *ptep) { pgd_t *pgd = pgd_offset(mm, *addr); p4d_t *p4d = p4d_offset(pgd, *addr); pud_t *pud = pud_offset(p4d, *addr); + i_mmap_assert_write_locked(vma->vm_file->f_mapping); BUG_ON(page_count(virt_to_page(ptep)) == 0); if (page_count(virt_to_page(ptep)) == 1) return 0; @@ -5427,7 +5421,8 @@ pte_t *huge_pmd_share(struct mm_struct *mm, unsigned long addr, pud_t *pud) return NULL; } -int huge_pmd_unshare(struct mm_struct *mm, unsigned long *addr, pte_t *ptep) +int huge_pmd_unshare(struct mm_struct *mm, struct vm_area_struct *vma, + unsigned long *addr, pte_t *ptep) { return 0; } diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 700f5160f3e4..1d6a9b0b6a9f 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -466,7 +466,7 @@ int __khugepaged_enter(struct mm_struct *mm) return -ENOMEM; /* __khugepaged_exit() must not run from under us */ - VM_BUG_ON_MM(khugepaged_test_exit(mm), mm); + VM_BUG_ON_MM(atomic_read(&mm->mm_users) == 0, mm); if (unlikely(test_and_set_bit(MMF_VM_HUGEPAGE, &mm->flags))) { free_mm_slot(mm_slot); return 0; @@ -1412,7 +1412,7 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) { unsigned long haddr = addr & HPAGE_PMD_MASK; struct vm_area_struct *vma = find_vma(mm, haddr); - struct page *hpage = NULL; + struct page *hpage; pte_t *start_pte, *pte; pmd_t *pmd, _pmd; spinlock_t *ptl; @@ -1432,9 +1432,17 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) if (!hugepage_vma_check(vma, vma->vm_flags | VM_HUGEPAGE)) return; + hpage = find_lock_page(vma->vm_file->f_mapping, + linear_page_index(vma, haddr)); + if (!hpage) + return; + + if (!PageHead(hpage)) + goto drop_hpage; + pmd = mm_find_pmd(mm, haddr); if (!pmd) - return; + goto drop_hpage; start_pte = pte_offset_map_lock(mm, pmd, haddr, &ptl); @@ -1453,30 +1461,11 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) page = vm_normal_page(vma, addr, *pte); - if (!page || !PageCompound(page)) - goto abort; - - if (!hpage) { - hpage = compound_head(page); - /* - * The mapping of the THP should not change. - * - * Note that uprobe, debugger, or MAP_PRIVATE may - * change the page table, but the new page will - * not pass PageCompound() check. - */ - if (WARN_ON(hpage->mapping != vma->vm_file->f_mapping)) - goto abort; - } - /* - * Confirm the page maps to the correct subpage. - * - * Note that uprobe, debugger, or MAP_PRIVATE may change - * the page table, but the new page will not pass - * PageCompound() check. + * Note that uprobe, debugger, or MAP_PRIVATE may change the + * page table, but the new page will not be a subpage of hpage. */ - if (WARN_ON(hpage + i != page)) + if (hpage + i != page) goto abort; count++; } @@ -1495,21 +1484,26 @@ void collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr) pte_unmap_unlock(start_pte, ptl); /* step 3: set proper refcount and mm_counters. */ - if (hpage) { + if (count) { page_ref_sub(hpage, count); add_mm_counter(vma->vm_mm, mm_counter_file(hpage), -count); } /* step 4: collapse pmd */ ptl = pmd_lock(vma->vm_mm, pmd); - _pmd = pmdp_collapse_flush(vma, addr, pmd); + _pmd = pmdp_collapse_flush(vma, haddr, pmd); spin_unlock(ptl); mm_dec_nr_ptes(mm); pte_free(mm, pmd_pgtable(_pmd)); + +drop_hpage: + unlock_page(hpage); + put_page(hpage); return; abort: pte_unmap_unlock(start_pte, ptl); + goto drop_hpage; } static int khugepaged_collapse_pte_mapped_thps(struct mm_slot *mm_slot) @@ -1538,6 +1532,7 @@ out: static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) { struct vm_area_struct *vma; + struct mm_struct *mm; unsigned long addr; pmd_t *pmd, _pmd; @@ -1566,7 +1561,8 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) continue; if (vma->vm_end < addr + HPAGE_PMD_SIZE) continue; - pmd = mm_find_pmd(vma->vm_mm, addr); + mm = vma->vm_mm; + pmd = mm_find_pmd(mm, addr); if (!pmd) continue; /* @@ -1576,17 +1572,19 @@ static void retract_page_tables(struct address_space *mapping, pgoff_t pgoff) * mmap_lock while holding page lock. Fault path does it in * reverse order. Trylock is a way to avoid deadlock. */ - if (mmap_write_trylock(vma->vm_mm)) { - spinlock_t *ptl = pmd_lock(vma->vm_mm, pmd); - /* assume page table is clear */ - _pmd = pmdp_collapse_flush(vma, addr, pmd); - spin_unlock(ptl); - mmap_write_unlock(vma->vm_mm); - mm_dec_nr_ptes(vma->vm_mm); - pte_free(vma->vm_mm, pmd_pgtable(_pmd)); + if (mmap_write_trylock(mm)) { + if (!khugepaged_test_exit(mm)) { + spinlock_t *ptl = pmd_lock(mm, pmd); + /* assume page table is clear */ + _pmd = pmdp_collapse_flush(vma, addr, pmd); + spin_unlock(ptl); + mm_dec_nr_ptes(mm); + pte_free(mm, pmd_pgtable(_pmd)); + } + mmap_write_unlock(mm); } else { /* Try again later */ - khugepaged_add_pte_mapped_thp(vma->vm_mm, addr); + khugepaged_add_pte_mapped_thp(mm, addr); } } i_mmap_unlock_write(mapping); diff --git a/mm/memory.c b/mm/memory.c index 3ecad55103ad..a279c1a26af7 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -4248,6 +4248,9 @@ static vm_fault_t handle_pte_fault(struct vm_fault *vmf) vmf->flags & FAULT_FLAG_WRITE)) { update_mmu_cache(vmf->vma, vmf->address, vmf->pte); } else { + /* Skip spurious TLB flush for retried page fault */ + if (vmf->flags & FAULT_FLAG_TRIED) + goto unlock; /* * This is needed only for protection faults but the arch code * is not yet telling us if this is a protection fault or not. diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index da374cd3d45b..76c75a599da3 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -1742,7 +1742,7 @@ static int __ref try_remove_memory(int nid, u64 start, u64 size) */ rc = walk_memory_blocks(start, size, NULL, check_memblock_offlined_cb); if (rc) - goto done; + return rc; /* remove memmap entry */ firmware_map_remove(start, start + size, "System RAM"); @@ -1766,9 +1766,8 @@ static int __ref try_remove_memory(int nid, u64 start, u64 size) try_offline_node(nid); -done: mem_hotplug_done(); - return rc; + return 0; } /** diff --git a/mm/mmap.c b/mm/mmap.c index 8c7ca737a19b..dcdab2675a21 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -3171,6 +3171,7 @@ void exit_mmap(struct mm_struct *mm) if (vma->vm_flags & VM_ACCOUNT) nr_accounted += vma_pages(vma); vma = remove_vma(vma); + cond_resched(); } vm_unacct_memory(nr_accounted); } diff --git a/mm/page_alloc.c b/mm/page_alloc.c index e028b87ce294..d809242f671f 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1306,6 +1306,11 @@ static void free_pcppages_bulk(struct zone *zone, int count, struct page *page, *tmp; LIST_HEAD(head); + /* + * Ensure proper count is passed which otherwise would stuck in the + * below while (list_empty(list)) loop. + */ + count = min(pcp->count, count); while (count) { struct list_head *list; @@ -7881,7 +7886,7 @@ int __meminit init_per_zone_wmark_min(void) return 0; } -core_initcall(init_per_zone_wmark_min) +postcore_initcall(init_per_zone_wmark_min) /* * min_free_kbytes_sysctl_handler - just a wrapper around proc_dointvec() so diff --git a/mm/page_counter.c b/mm/page_counter.c index c56db2d5e159..b4663844c9b3 100644 --- a/mm/page_counter.c +++ b/mm/page_counter.c @@ -72,7 +72,7 @@ void page_counter_charge(struct page_counter *counter, unsigned long nr_pages) long new; new = atomic_long_add_return(nr_pages, &c->usage); - propagate_protected_usage(counter, new); + propagate_protected_usage(c, new); /* * This is indeed racy, but we can live with some * inaccuracy in the watermark. @@ -116,7 +116,7 @@ bool page_counter_try_charge(struct page_counter *counter, new = atomic_long_add_return(nr_pages, &c->usage); if (new > c->max) { atomic_long_sub(nr_pages, &c->usage); - propagate_protected_usage(counter, new); + propagate_protected_usage(c, new); /* * This is racy, but we can live with some * inaccuracy in the failcnt. @@ -125,7 +125,7 @@ bool page_counter_try_charge(struct page_counter *counter, *fail = c; goto failed; } - propagate_protected_usage(counter, new); + propagate_protected_usage(c, new); /* * Just like with failcnt, we can live with some * inaccuracy in the watermark. diff --git a/mm/rmap.c b/mm/rmap.c index 5fe2dedce1fc..6cce9ef06753 100644 --- a/mm/rmap.c +++ b/mm/rmap.c @@ -1469,7 +1469,7 @@ static bool try_to_unmap_one(struct page *page, struct vm_area_struct *vma, * do this outside rmap routines. */ VM_BUG_ON(!(flags & TTU_RMAP_LOCKED)); - if (huge_pmd_unshare(mm, &address, pvmw.pte)) { + if (huge_pmd_unshare(mm, vma, &address, pvmw.pte)) { /* * huge_pmd_unshare unmapped an entire PMD * page. There is no way of knowing exactly diff --git a/mm/shuffle.c b/mm/shuffle.c index 44406d9977c7..dd13ab851b3e 100644 --- a/mm/shuffle.c +++ b/mm/shuffle.c @@ -58,25 +58,25 @@ module_param_call(shuffle, shuffle_store, shuffle_show, &shuffle_param, 0400); * For two pages to be swapped in the shuffle, they must be free (on a * 'free_area' lru), have the same order, and have the same migratetype. */ -static struct page * __meminit shuffle_valid_page(unsigned long pfn, int order) +static struct page * __meminit shuffle_valid_page(struct zone *zone, + unsigned long pfn, int order) { - struct page *page; + struct page *page = pfn_to_online_page(pfn); /* * Given we're dealing with randomly selected pfns in a zone we * need to ask questions like... */ - /* ...is the pfn even in the memmap? */ - if (!pfn_valid_within(pfn)) + /* ... is the page managed by the buddy? */ + if (!page) return NULL; - /* ...is the pfn in a present section or a hole? */ - if (!pfn_in_present_section(pfn)) + /* ... is the page assigned to the same zone? */ + if (page_zone(page) != zone) return NULL; /* ...is the page free and currently on a free_area list? */ - page = pfn_to_page(pfn); if (!PageBuddy(page)) return NULL; @@ -123,7 +123,7 @@ void __meminit __shuffle_zone(struct zone *z) * page_j randomly selected in the span @zone_start_pfn to * @spanned_pages. */ - page_i = shuffle_valid_page(i, order); + page_i = shuffle_valid_page(z, i, order); if (!page_i) continue; @@ -137,7 +137,7 @@ void __meminit __shuffle_zone(struct zone *z) j = z->zone_start_pfn + ALIGN_DOWN(get_random_long() % z->spanned_pages, order_pages); - page_j = shuffle_valid_page(j, order); + page_j = shuffle_valid_page(z, j, order); if (page_j && page_j != page_i) break; } diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 5a2b55c8dd9a..128d20d2d6cb 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -102,6 +102,8 @@ static void vunmap_pmd_range(pud_t *pud, unsigned long addr, unsigned long end, if (pmd_none_or_clear_bad(pmd)) continue; vunmap_pte_range(pmd, addr, next, mask); + + cond_resched(); } while (pmd++, addr = next, addr != end); } diff --git a/mm/vmstat.c b/mm/vmstat.c index 3fb23a21f6dd..7fb01d225337 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1596,12 +1596,6 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, zone->present_pages, zone_managed_pages(zone)); - /* If unpopulated, no other information is useful */ - if (!populated_zone(zone)) { - seq_putc(m, '\n'); - return; - } - seq_printf(m, "\n protection: (%ld", zone->lowmem_reserve[0]); @@ -1609,6 +1603,12 @@ static void zoneinfo_show_print(struct seq_file *m, pg_data_t *pgdat, seq_printf(m, ", %ld", zone->lowmem_reserve[i]); seq_putc(m, ')'); + /* If unpopulated, no other information is useful */ + if (!populated_zone(zone)) { + seq_putc(m, '\n'); + return; + } + for (i = 0; i < NR_VM_ZONE_STAT_ITEMS; i++) seq_printf(m, "\n %-12s %lu", zone_stat_name(i), zone_page_state(zone, i)); |