diff options
Diffstat (limited to 'mm')
-rw-r--r-- | mm/bootmem.c | 10 | ||||
-rw-r--r-- | mm/highmem.c | 2 | ||||
-rw-r--r-- | mm/memcontrol.c | 67 | ||||
-rw-r--r-- | mm/memory.c | 10 | ||||
-rw-r--r-- | mm/memory_hotplug.c | 7 | ||||
-rw-r--r-- | mm/mmap.c | 2 | ||||
-rw-r--r-- | mm/mmzone.c | 6 | ||||
-rw-r--r-- | mm/nobootmem.c | 3 | ||||
-rw-r--r-- | mm/page_alloc.c | 38 | ||||
-rw-r--r-- | mm/shmem.c | 18 | ||||
-rw-r--r-- | mm/swapfile.c | 4 | ||||
-rw-r--r-- | mm/vmscan.c | 25 |
12 files changed, 78 insertions, 114 deletions
diff --git a/mm/bootmem.c b/mm/bootmem.c index 434be4ae7a04..f468185b3b28 100644 --- a/mm/bootmem.c +++ b/mm/bootmem.c @@ -198,8 +198,6 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) int order = ilog2(BITS_PER_LONG); __free_pages_bootmem(pfn_to_page(start), order); - fixup_zone_present_pages(page_to_nid(pfn_to_page(start)), - start, start + BITS_PER_LONG); count += BITS_PER_LONG; start += BITS_PER_LONG; } else { @@ -210,9 +208,6 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) if (vec & 1) { page = pfn_to_page(start + off); __free_pages_bootmem(page, 0); - fixup_zone_present_pages( - page_to_nid(page), - start + off, start + off + 1); count++; } vec >>= 1; @@ -226,11 +221,8 @@ static unsigned long __init free_all_bootmem_core(bootmem_data_t *bdata) pages = bdata->node_low_pfn - bdata->node_min_pfn; pages = bootmem_bootmap_pages(pages); count += pages; - while (pages--) { - fixup_zone_present_pages(page_to_nid(page), - page_to_pfn(page), page_to_pfn(page) + 1); + while (pages--) __free_pages_bootmem(page++, 0); - } bdebug("nid=%td released=%lx\n", bdata - bootmem_node_data, count); diff --git a/mm/highmem.c b/mm/highmem.c index d517cd16a6eb..2da13a5c50e2 100644 --- a/mm/highmem.c +++ b/mm/highmem.c @@ -98,7 +98,7 @@ struct page *kmap_to_page(void *vaddr) { unsigned long addr = (unsigned long)vaddr; - if (addr >= PKMAP_ADDR(0) && addr <= PKMAP_ADDR(LAST_PKMAP)) { + if (addr >= PKMAP_ADDR(0) && addr < PKMAP_ADDR(LAST_PKMAP)) { int i = (addr - PKMAP_ADDR(0)) >> PAGE_SHIFT; return pte_page(pkmap_page_table[i]); } diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 7acf43bf04a2..dd39ba000b31 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -1055,12 +1055,24 @@ struct lruvec *mem_cgroup_zone_lruvec(struct zone *zone, struct mem_cgroup *memcg) { struct mem_cgroup_per_zone *mz; + struct lruvec *lruvec; - if (mem_cgroup_disabled()) - return &zone->lruvec; + if (mem_cgroup_disabled()) { + lruvec = &zone->lruvec; + goto out; + } mz = mem_cgroup_zoneinfo(memcg, zone_to_nid(zone), zone_idx(zone)); - return &mz->lruvec; + lruvec = &mz->lruvec; +out: + /* + * Since a node can be onlined after the mem_cgroup was created, + * we have to be prepared to initialize lruvec->zone here; + * and if offlined then reonlined, we need to reinitialize it. + */ + if (unlikely(lruvec->zone != zone)) + lruvec->zone = zone; + return lruvec; } /* @@ -1087,9 +1099,12 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct zone *zone) struct mem_cgroup_per_zone *mz; struct mem_cgroup *memcg; struct page_cgroup *pc; + struct lruvec *lruvec; - if (mem_cgroup_disabled()) - return &zone->lruvec; + if (mem_cgroup_disabled()) { + lruvec = &zone->lruvec; + goto out; + } pc = lookup_page_cgroup(page); memcg = pc->mem_cgroup; @@ -1107,7 +1122,16 @@ struct lruvec *mem_cgroup_page_lruvec(struct page *page, struct zone *zone) pc->mem_cgroup = memcg = root_mem_cgroup; mz = page_cgroup_zoneinfo(memcg, page); - return &mz->lruvec; + lruvec = &mz->lruvec; +out: + /* + * Since a node can be onlined after the mem_cgroup was created, + * we have to be prepared to initialize lruvec->zone here; + * and if offlined then reonlined, we need to reinitialize it. + */ + if (unlikely(lruvec->zone != zone)) + lruvec->zone = zone; + return lruvec; } /** @@ -1452,17 +1476,26 @@ static int mem_cgroup_count_children(struct mem_cgroup *memcg) static u64 mem_cgroup_get_limit(struct mem_cgroup *memcg) { u64 limit; - u64 memsw; limit = res_counter_read_u64(&memcg->res, RES_LIMIT); - limit += total_swap_pages << PAGE_SHIFT; - memsw = res_counter_read_u64(&memcg->memsw, RES_LIMIT); /* - * If memsw is finite and limits the amount of swap space available - * to this memcg, return that limit. + * Do not consider swap space if we cannot swap due to swappiness */ - return min(limit, memsw); + if (mem_cgroup_swappiness(memcg)) { + u64 memsw; + + limit += total_swap_pages << PAGE_SHIFT; + memsw = res_counter_read_u64(&memcg->memsw, RES_LIMIT); + + /* + * If memsw is finite and limits the amount of swap space + * available to this memcg, return that limit. + */ + limit = min(limit, memsw); + } + + return limit; } void mem_cgroup_out_of_memory(struct mem_cgroup *memcg, gfp_t gfp_mask, @@ -3688,17 +3721,17 @@ unsigned long mem_cgroup_soft_limit_reclaim(struct zone *zone, int order, static bool mem_cgroup_force_empty_list(struct mem_cgroup *memcg, int node, int zid, enum lru_list lru) { - struct mem_cgroup_per_zone *mz; + struct lruvec *lruvec; unsigned long flags, loop; struct list_head *list; struct page *busy; struct zone *zone; zone = &NODE_DATA(node)->node_zones[zid]; - mz = mem_cgroup_zoneinfo(memcg, node, zid); - list = &mz->lruvec.lists[lru]; + lruvec = mem_cgroup_zone_lruvec(zone, memcg); + list = &lruvec->lists[lru]; - loop = mz->lru_size[lru]; + loop = mem_cgroup_get_lru_size(lruvec, lru); /* give some margin against EBUSY etc...*/ loop += 256; busy = NULL; @@ -4736,7 +4769,7 @@ static int alloc_mem_cgroup_per_zone_info(struct mem_cgroup *memcg, int node) for (zone = 0; zone < MAX_NR_ZONES; zone++) { mz = &pn->zoneinfo[zone]; - lruvec_init(&mz->lruvec, &NODE_DATA(node)->node_zones[zone]); + lruvec_init(&mz->lruvec); mz->usage_in_excess = 0; mz->on_tree = false; mz->memcg = memcg; diff --git a/mm/memory.c b/mm/memory.c index fb135ba4aba9..221fc9ffcab1 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2527,9 +2527,8 @@ static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma, int ret = 0; int page_mkwrite = 0; struct page *dirty_page = NULL; - unsigned long mmun_start; /* For mmu_notifiers */ - unsigned long mmun_end; /* For mmu_notifiers */ - bool mmun_called = false; /* For mmu_notifiers */ + unsigned long mmun_start = 0; /* For mmu_notifiers */ + unsigned long mmun_end = 0; /* For mmu_notifiers */ old_page = vm_normal_page(vma, address, orig_pte); if (!old_page) { @@ -2708,8 +2707,7 @@ gotten: goto oom_free_new; mmun_start = address & PAGE_MASK; - mmun_end = (address & PAGE_MASK) + PAGE_SIZE; - mmun_called = true; + mmun_end = mmun_start + PAGE_SIZE; mmu_notifier_invalidate_range_start(mm, mmun_start, mmun_end); /* @@ -2778,7 +2776,7 @@ gotten: page_cache_release(new_page); unlock: pte_unmap_unlock(page_table, ptl); - if (mmun_called) + if (mmun_end > mmun_start) mmu_notifier_invalidate_range_end(mm, mmun_start, mmun_end); if (old_page) { /* diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index 56b758ae57d2..e4eeacae2b91 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -106,7 +106,6 @@ static void get_page_bootmem(unsigned long info, struct page *page, void __ref put_page_bootmem(struct page *page) { unsigned long type; - struct zone *zone; type = (unsigned long) page->lru.next; BUG_ON(type < MEMORY_HOTPLUG_MIN_BOOTMEM_TYPE || @@ -117,12 +116,6 @@ void __ref put_page_bootmem(struct page *page) set_page_private(page, 0); INIT_LIST_HEAD(&page->lru); __free_pages_bootmem(page, 0); - - zone = page_zone(page); - zone_span_writelock(zone); - zone->present_pages++; - zone_span_writeunlock(zone); - totalram_pages++; } } diff --git a/mm/mmap.c b/mm/mmap.c index 2d942353d681..9a796c41e7d9 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -334,8 +334,10 @@ void validate_mm(struct mm_struct *mm) struct vm_area_struct *vma = mm->mmap; while (vma) { struct anon_vma_chain *avc; + vma_lock_anon_vma(vma); list_for_each_entry(avc, &vma->anon_vma_chain, same_vma) anon_vma_interval_tree_verify(avc); + vma_unlock_anon_vma(vma); vma = vma->vm_next; i++; } diff --git a/mm/mmzone.c b/mm/mmzone.c index 3cef80f6ac79..4596d81b89b1 100644 --- a/mm/mmzone.c +++ b/mm/mmzone.c @@ -87,7 +87,7 @@ int memmap_valid_within(unsigned long pfn, } #endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */ -void lruvec_init(struct lruvec *lruvec, struct zone *zone) +void lruvec_init(struct lruvec *lruvec) { enum lru_list lru; @@ -95,8 +95,4 @@ void lruvec_init(struct lruvec *lruvec, struct zone *zone) for_each_lru(lru) INIT_LIST_HEAD(&lruvec->lists[lru]); - -#ifdef CONFIG_MEMCG - lruvec->zone = zone; -#endif } diff --git a/mm/nobootmem.c b/mm/nobootmem.c index 714d5d650470..bd82f6b31411 100644 --- a/mm/nobootmem.c +++ b/mm/nobootmem.c @@ -116,8 +116,6 @@ static unsigned long __init __free_memory_core(phys_addr_t start, return 0; __free_pages_memory(start_pfn, end_pfn); - fixup_zone_present_pages(pfn_to_nid(start >> PAGE_SHIFT), - start_pfn, end_pfn); return end_pfn - start_pfn; } @@ -128,7 +126,6 @@ unsigned long __init free_low_memory_core_early(int nodeid) phys_addr_t start, end, size; u64 i; - reset_zone_present_pages(); for_each_free_mem_range(i, MAX_NUMNODES, &start, &end, NULL) count += __free_memory_core(start, end); diff --git a/mm/page_alloc.c b/mm/page_alloc.c index 5b74de6702e0..bcb72c6e2b2d 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1405,7 +1405,7 @@ int capture_free_page(struct page *page, int alloc_order, int migratetype) mt = get_pageblock_migratetype(page); if (unlikely(mt != MIGRATE_ISOLATE)) - __mod_zone_freepage_state(zone, -(1UL << order), mt); + __mod_zone_freepage_state(zone, -(1UL << alloc_order), mt); if (alloc_order != order) expand(zone, page, alloc_order, order, @@ -4505,7 +4505,7 @@ static void __paginginit free_area_init_core(struct pglist_data *pgdat, zone->zone_pgdat = pgdat; zone_pcp_init(zone); - lruvec_init(&zone->lruvec, zone); + lruvec_init(&zone->lruvec); if (!size) continue; @@ -6098,37 +6098,3 @@ void dump_page(struct page *page) dump_page_flags(page->flags); mem_cgroup_print_bad_page(page); } - -/* reset zone->present_pages */ -void reset_zone_present_pages(void) -{ - struct zone *z; - int i, nid; - - for_each_node_state(nid, N_HIGH_MEMORY) { - for (i = 0; i < MAX_NR_ZONES; i++) { - z = NODE_DATA(nid)->node_zones + i; - z->present_pages = 0; - } - } -} - -/* calculate zone's present pages in buddy system */ -void fixup_zone_present_pages(int nid, unsigned long start_pfn, - unsigned long end_pfn) -{ - struct zone *z; - unsigned long zone_start_pfn, zone_end_pfn; - int i; - - for (i = 0; i < MAX_NR_ZONES; i++) { - z = NODE_DATA(nid)->node_zones + i; - zone_start_pfn = z->zone_start_pfn; - zone_end_pfn = zone_start_pfn + z->spanned_pages; - - /* if the two regions intersect */ - if (!(zone_start_pfn >= end_pfn || zone_end_pfn <= start_pfn)) - z->present_pages += min(end_pfn, zone_end_pfn) - - max(start_pfn, zone_start_pfn); - } -} diff --git a/mm/shmem.c b/mm/shmem.c index 67afba5117f2..89341b658bd0 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -643,7 +643,7 @@ static void shmem_evict_inode(struct inode *inode) kfree(info->symlink); simple_xattrs_free(&info->xattrs); - BUG_ON(inode->i_blocks); + WARN_ON(inode->i_blocks); shmem_free_inode(inode->i_sb); clear_inode(inode); } @@ -1145,8 +1145,20 @@ repeat: if (!error) { error = shmem_add_to_page_cache(page, mapping, index, gfp, swp_to_radix_entry(swap)); - /* We already confirmed swap, and make no allocation */ - VM_BUG_ON(error); + /* + * We already confirmed swap under page lock, and make + * no memory allocation here, so usually no possibility + * of error; but free_swap_and_cache() only trylocks a + * page, so it is just possible that the entry has been + * truncated or holepunched since swap was confirmed. + * shmem_undo_range() will have done some of the + * unaccounting, now delete_from_swap_cache() will do + * the rest (including mem_cgroup_uncharge_swapcache). + * Reset swap.val? No, leave it so "failed" goes back to + * "repeat": reading a hole and writing should succeed. + */ + if (error) + delete_from_swap_cache(page); } if (error) goto failed; diff --git a/mm/swapfile.c b/mm/swapfile.c index 71cd288b2001..f91a25547ffe 100644 --- a/mm/swapfile.c +++ b/mm/swapfile.c @@ -1494,9 +1494,8 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) BUG_ON(!current->mm); pathname = getname(specialfile); - err = PTR_ERR(pathname); if (IS_ERR(pathname)) - goto out; + return PTR_ERR(pathname); victim = file_open_name(pathname, O_RDWR|O_LARGEFILE, 0); err = PTR_ERR(victim); @@ -1608,6 +1607,7 @@ SYSCALL_DEFINE1(swapoff, const char __user *, specialfile) out_dput: filp_close(victim, NULL); out: + putname(pathname); return err; } diff --git a/mm/vmscan.c b/mm/vmscan.c index 8b055e9379bc..48550c66f1f2 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1760,28 +1760,6 @@ static bool in_reclaim_compaction(struct scan_control *sc) return false; } -#ifdef CONFIG_COMPACTION -/* - * If compaction is deferred for sc->order then scale the number of pages - * reclaimed based on the number of consecutive allocation failures - */ -static unsigned long scale_for_compaction(unsigned long pages_for_compaction, - struct lruvec *lruvec, struct scan_control *sc) -{ - struct zone *zone = lruvec_zone(lruvec); - - if (zone->compact_order_failed <= sc->order) - pages_for_compaction <<= zone->compact_defer_shift; - return pages_for_compaction; -} -#else -static unsigned long scale_for_compaction(unsigned long pages_for_compaction, - struct lruvec *lruvec, struct scan_control *sc) -{ - return pages_for_compaction; -} -#endif - /* * Reclaim/compaction is used for high-order allocation requests. It reclaims * order-0 pages before compacting the zone. should_continue_reclaim() returns @@ -1829,9 +1807,6 @@ static inline bool should_continue_reclaim(struct lruvec *lruvec, * inactive lists are large enough, continue reclaiming */ pages_for_compaction = (2UL << sc->order); - - pages_for_compaction = scale_for_compaction(pages_for_compaction, - lruvec, sc); inactive_lru_pages = get_lru_size(lruvec, LRU_INACTIVE_FILE); if (nr_swap_pages > 0) inactive_lru_pages += get_lru_size(lruvec, LRU_INACTIVE_ANON); |