diff options
author | Ingo Molnar <mingo@kernel.org> | 2018-04-12 10:42:34 +0300 |
---|---|---|
committer | Ingo Molnar <mingo@kernel.org> | 2018-04-12 10:42:34 +0300 |
commit | ef389b734691cdc8beb009dd402135dcdcb86a56 (patch) | |
tree | 9523a37db93cb7c7874a5f18b4d9a7014898b814 /mm | |
parent | a774635db5c430cbf21fa5d2f2df3d23aaa8e782 (diff) | |
parent | c76fc98260751e71c884dc1a18a07e427ef033b5 (diff) | |
download | linux-ef389b734691cdc8beb009dd402135dcdcb86a56.tar.xz |
Merge branch 'WIP.x86/asm' into x86/urgent, because the topic is ready
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'mm')
-rw-r--r-- | mm/Kconfig | 14 | ||||
-rw-r--r-- | mm/fadvise.c | 10 | ||||
-rw-r--r-- | mm/gup.c | 7 | ||||
-rw-r--r-- | mm/huge_memory.c | 9 | ||||
-rw-r--r-- | mm/hugetlb.c | 9 | ||||
-rw-r--r-- | mm/kasan/kasan_init.c | 2 | ||||
-rw-r--r-- | mm/khugepaged.c | 15 | ||||
-rw-r--r-- | mm/kmemleak.c | 12 | ||||
-rw-r--r-- | mm/memblock.c | 28 | ||||
-rw-r--r-- | mm/memcontrol.c | 6 | ||||
-rw-r--r-- | mm/mempolicy.c | 95 | ||||
-rw-r--r-- | mm/migrate.c | 39 | ||||
-rw-r--r-- | mm/mmap.c | 17 | ||||
-rw-r--r-- | mm/nommu.c | 37 | ||||
-rw-r--r-- | mm/page_alloc.c | 22 | ||||
-rw-r--r-- | mm/page_owner.c | 6 | ||||
-rw-r--r-- | mm/percpu-km.c | 8 | ||||
-rw-r--r-- | mm/percpu-vm.c | 18 | ||||
-rw-r--r-- | mm/percpu.c | 71 | ||||
-rw-r--r-- | mm/readahead.c | 7 | ||||
-rw-r--r-- | mm/shmem.c | 31 | ||||
-rw-r--r-- | mm/slab.c | 1 | ||||
-rw-r--r-- | mm/sparse.c | 37 | ||||
-rw-r--r-- | mm/vmscan.c | 31 | ||||
-rw-r--r-- | mm/vmstat.c | 2 | ||||
-rw-r--r-- | mm/zsmalloc.c | 13 |
26 files changed, 312 insertions, 235 deletions
diff --git a/mm/Kconfig b/mm/Kconfig index c782e8fb7235..d5004d82a1d6 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -278,13 +278,6 @@ config BOUNCE by default when ZONE_DMA or HIGHMEM is selected, but you may say n to override this. -# On the 'tile' arch, USB OHCI needs the bounce pool since tilegx will often -# have more than 4GB of memory, but we don't currently use the IOTLB to present -# a 32-bit address to OHCI. So we need to use a bounce pool instead. -config NEED_BOUNCE_POOL - bool - default y if TILE && USB_OHCI_HCD - config NR_QUICK int depends on QUICKLIST @@ -627,15 +620,14 @@ config GENERIC_EARLY_IOREMAP config MAX_STACK_SIZE_MB int "Maximum user stack size for 32-bit processes (MB)" default 80 - range 8 256 if METAG range 8 2048 depends on STACK_GROWSUP && (!64BIT || COMPAT) help This is the maximum stack size in Megabytes in the VM layout of 32-bit user processes when the stack grows upwards (currently only on parisc - and metag arch). The stack will be located at the highest memory - address minus the given value, unless the RLIMIT_STACK hard limit is - changed to a smaller value in which case that is used. + arch). The stack will be located at the highest memory address minus + the given value, unless the RLIMIT_STACK hard limit is changed to a + smaller value in which case that is used. A sane initial value is 80 MB. diff --git a/mm/fadvise.c b/mm/fadvise.c index 767887f5f3bf..afa41491d324 100644 --- a/mm/fadvise.c +++ b/mm/fadvise.c @@ -26,7 +26,8 @@ * POSIX_FADV_WILLNEED could set PG_Referenced, and POSIX_FADV_NOREUSE could * deactivate the pages and clear PG_Referenced. */ -SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice) + +int ksys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice) { struct fd f = fdget(fd); struct inode *inode; @@ -185,11 +186,16 @@ out: return ret; } +SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice) +{ + return ksys_fadvise64_64(fd, offset, len, advice); +} + #ifdef __ARCH_WANT_SYS_FADVISE64 SYSCALL_DEFINE4(fadvise64, int, fd, loff_t, offset, size_t, len, int, advice) { - return sys_fadvise64_64(fd, offset, len, advice); + return ksys_fadvise64_64(fd, offset, len, advice); } #endif @@ -516,7 +516,7 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma, } if (ret & VM_FAULT_RETRY) { - if (nonblocking) + if (nonblocking && !(fault_flags & FAULT_FLAG_RETRY_NOWAIT)) *nonblocking = 0; return -EBUSY; } @@ -890,7 +890,10 @@ static __always_inline long __get_user_pages_locked(struct task_struct *tsk, break; } if (*locked) { - /* VM_FAULT_RETRY didn't trigger */ + /* + * VM_FAULT_RETRY didn't trigger or it was a + * FOLL_NOWAIT. + */ if (!pages_done) pages_done = ret; break; diff --git a/mm/huge_memory.c b/mm/huge_memory.c index 87ab9b8f56b5..5a68730eebd6 100644 --- a/mm/huge_memory.c +++ b/mm/huge_memory.c @@ -555,7 +555,8 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page, VM_BUG_ON_PAGE(!PageCompound(page), page); - if (mem_cgroup_try_charge(page, vma->vm_mm, gfp, &memcg, true)) { + if (mem_cgroup_try_charge(page, vma->vm_mm, gfp | __GFP_NORETRY, &memcg, + true)) { put_page(page); count_vm_event(THP_FAULT_FALLBACK); return VM_FAULT_FALLBACK; @@ -1316,7 +1317,7 @@ alloc: } if (unlikely(mem_cgroup_try_charge(new_page, vma->vm_mm, - huge_gfp, &memcg, true))) { + huge_gfp | __GFP_NORETRY, &memcg, true))) { put_page(new_page); split_huge_pmd(vma, vmf->pmd, vmf->address); if (page) @@ -2783,11 +2784,13 @@ static unsigned long deferred_split_scan(struct shrinker *shrink, list_for_each_safe(pos, next, &list) { page = list_entry((void *)pos, struct page, mapping); - lock_page(page); + if (!trylock_page(page)) + goto next; /* split_huge_page() removes page from list on success */ if (!split_huge_page(page)) split++; unlock_page(page); +next: put_page(page); } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 7c204e3d132b..976bbc5646fe 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -18,6 +18,7 @@ #include <linux/bootmem.h> #include <linux/sysfs.h> #include <linux/slab.h> +#include <linux/mmdebug.h> #include <linux/sched/signal.h> #include <linux/rmap.h> #include <linux/string_helpers.h> @@ -1583,7 +1584,7 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask, page = NULL; } else { h->surplus_huge_pages++; - h->nr_huge_pages_node[page_to_nid(page)]++; + h->surplus_huge_pages_node[page_to_nid(page)]++; } out_unlock: @@ -4374,6 +4375,12 @@ int hugetlb_reserve_pages(struct inode *inode, struct resv_map *resv_map; long gbl_reserve; + /* This should never happen */ + if (from > to) { + VM_WARN(1, "%s called with a negative range\n", __func__); + return -EINVAL; + } + /* * Only apply hugepage reservation if asked. At fault time, an * attempt will be made for VM_NORESERVE to allocate a page diff --git a/mm/kasan/kasan_init.c b/mm/kasan/kasan_init.c index 554e4c0f23a2..f436246ccc79 100644 --- a/mm/kasan/kasan_init.c +++ b/mm/kasan/kasan_init.c @@ -31,7 +31,7 @@ unsigned char kasan_zero_page[PAGE_SIZE] __page_aligned_bss; #if CONFIG_PGTABLE_LEVELS > 4 -p4d_t kasan_zero_p4d[PTRS_PER_P4D] __page_aligned_bss; +p4d_t kasan_zero_p4d[MAX_PTRS_PER_P4D] __page_aligned_bss; #endif #if CONFIG_PGTABLE_LEVELS > 3 pud_t kasan_zero_pud[PTRS_PER_PUD] __page_aligned_bss; diff --git a/mm/khugepaged.c b/mm/khugepaged.c index b7e2268dfc9a..e42568284e06 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -530,7 +530,12 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma, goto out; } - VM_BUG_ON_PAGE(PageCompound(page), page); + /* TODO: teach khugepaged to collapse THP mapped with pte */ + if (PageCompound(page)) { + result = SCAN_PAGE_COMPOUND; + goto out; + } + VM_BUG_ON_PAGE(!PageAnon(page), page); /* @@ -960,7 +965,9 @@ static void collapse_huge_page(struct mm_struct *mm, goto out_nolock; } - if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp, &memcg, true))) { + /* Do not oom kill for khugepaged charges */ + if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp | __GFP_NORETRY, + &memcg, true))) { result = SCAN_CGROUP_CHARGE_FAIL; goto out_nolock; } @@ -1319,7 +1326,9 @@ static void collapse_shmem(struct mm_struct *mm, goto out; } - if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp, &memcg, true))) { + /* Do not oom kill for khugepaged charges */ + if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp | __GFP_NORETRY, + &memcg, true))) { result = SCAN_CGROUP_CHARGE_FAIL; goto out; } diff --git a/mm/kmemleak.c b/mm/kmemleak.c index e83987c55a08..46c2290a08f1 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -1657,8 +1657,7 @@ static void start_scan_thread(void) } /* - * Stop the automatic memory scanning thread. This function must be called - * with the scan_mutex held. + * Stop the automatic memory scanning thread. */ static void stop_scan_thread(void) { @@ -1921,12 +1920,15 @@ static void kmemleak_do_cleanup(struct work_struct *work) { stop_scan_thread(); + mutex_lock(&scan_mutex); /* - * Once the scan thread has stopped, it is safe to no longer track - * object freeing. Ordering of the scan thread stopping and the memory - * accesses below is guaranteed by the kthread_stop() function. + * Once it is made sure that kmemleak_scan has stopped, it is safe to no + * longer track object freeing. Ordering of the scan thread stopping and + * the memory accesses below is guaranteed by the kthread_stop() + * function. */ kmemleak_free_enabled = 0; + mutex_unlock(&scan_mutex); if (!kmemleak_found_leaks) __kmemleak_do_cleanup(); diff --git a/mm/memblock.c b/mm/memblock.c index 5a9ca2a1751b..48376bd33274 100644 --- a/mm/memblock.c +++ b/mm/memblock.c @@ -1101,34 +1101,6 @@ void __init_memblock __next_mem_pfn_range(int *idx, int nid, *out_nid = r->nid; } -unsigned long __init_memblock memblock_next_valid_pfn(unsigned long pfn, - unsigned long max_pfn) -{ - struct memblock_type *type = &memblock.memory; - unsigned int right = type->cnt; - unsigned int mid, left = 0; - phys_addr_t addr = PFN_PHYS(pfn + 1); - - do { - mid = (right + left) / 2; - - if (addr < type->regions[mid].base) - right = mid; - else if (addr >= (type->regions[mid].base + - type->regions[mid].size)) - left = mid + 1; - else { - /* addr is within the region, so pfn + 1 is valid */ - return min(pfn + 1, max_pfn); - } - } while (left < right); - - if (right == type->cnt) - return max_pfn; - else - return min(PHYS_PFN(type->regions[right].base), max_pfn); -} - /** * memblock_set_node - set node ID on memblock regions * @base: base of area to set node ID for diff --git a/mm/memcontrol.c b/mm/memcontrol.c index 670e99b68aa6..9ec024b862ac 100644 --- a/mm/memcontrol.c +++ b/mm/memcontrol.c @@ -714,9 +714,9 @@ static struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm) * invocations for reference counting, or use mem_cgroup_iter_break() * to cancel a hierarchy walk before the round-trip is complete. * - * Reclaimers can specify a zone and a priority level in @reclaim to + * Reclaimers can specify a node and a priority level in @reclaim to * divide up the memcgs in the hierarchy among all concurrent - * reclaimers operating on the same zone and priority. + * reclaimers operating on the same node and priority. */ struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root, struct mem_cgroup *prev, @@ -2299,7 +2299,7 @@ void memcg_kmem_put_cache(struct kmem_cache *cachep) } /** - * memcg_kmem_charge: charge a kmem page + * memcg_kmem_charge_memcg: charge a kmem page * @page: page to charge * @gfp: reclaim mode * @order: allocation order diff --git a/mm/mempolicy.c b/mm/mempolicy.c index d879f1d8a44a..01cbb7078d6c 100644 --- a/mm/mempolicy.c +++ b/mm/mempolicy.c @@ -1336,9 +1336,9 @@ static int copy_nodes_to_user(unsigned long __user *mask, unsigned long maxnode, return copy_to_user(mask, nodes_addr(*nodes), copy) ? -EFAULT : 0; } -SYSCALL_DEFINE6(mbind, unsigned long, start, unsigned long, len, - unsigned long, mode, const unsigned long __user *, nmask, - unsigned long, maxnode, unsigned, flags) +static long kernel_mbind(unsigned long start, unsigned long len, + unsigned long mode, const unsigned long __user *nmask, + unsigned long maxnode, unsigned int flags) { nodemask_t nodes; int err; @@ -1357,9 +1357,16 @@ SYSCALL_DEFINE6(mbind, unsigned long, start, unsigned long, len, return do_mbind(start, len, mode, mode_flags, &nodes, flags); } +SYSCALL_DEFINE6(mbind, unsigned long, start, unsigned long, len, + unsigned long, mode, const unsigned long __user *, nmask, + unsigned long, maxnode, unsigned int, flags) +{ + return kernel_mbind(start, len, mode, nmask, maxnode, flags); +} + /* Set the process memory policy */ -SYSCALL_DEFINE3(set_mempolicy, int, mode, const unsigned long __user *, nmask, - unsigned long, maxnode) +static long kernel_set_mempolicy(int mode, const unsigned long __user *nmask, + unsigned long maxnode) { int err; nodemask_t nodes; @@ -1377,9 +1384,15 @@ SYSCALL_DEFINE3(set_mempolicy, int, mode, const unsigned long __user *, nmask, return do_set_mempolicy(mode, flags, &nodes); } -SYSCALL_DEFINE4(migrate_pages, pid_t, pid, unsigned long, maxnode, - const unsigned long __user *, old_nodes, - const unsigned long __user *, new_nodes) +SYSCALL_DEFINE3(set_mempolicy, int, mode, const unsigned long __user *, nmask, + unsigned long, maxnode) +{ + return kernel_set_mempolicy(mode, nmask, maxnode); +} + +static int kernel_migrate_pages(pid_t pid, unsigned long maxnode, + const unsigned long __user *old_nodes, + const unsigned long __user *new_nodes) { struct mm_struct *mm = NULL; struct task_struct *task; @@ -1469,11 +1482,20 @@ out_put: } +SYSCALL_DEFINE4(migrate_pages, pid_t, pid, unsigned long, maxnode, + const unsigned long __user *, old_nodes, + const unsigned long __user *, new_nodes) +{ + return kernel_migrate_pages(pid, maxnode, old_nodes, new_nodes); +} + /* Retrieve NUMA policy */ -SYSCALL_DEFINE5(get_mempolicy, int __user *, policy, - unsigned long __user *, nmask, unsigned long, maxnode, - unsigned long, addr, unsigned long, flags) +static int kernel_get_mempolicy(int __user *policy, + unsigned long __user *nmask, + unsigned long maxnode, + unsigned long addr, + unsigned long flags) { int err; int uninitialized_var(pval); @@ -1496,6 +1518,13 @@ SYSCALL_DEFINE5(get_mempolicy, int __user *, policy, return err; } +SYSCALL_DEFINE5(get_mempolicy, int __user *, policy, + unsigned long __user *, nmask, unsigned long, maxnode, + unsigned long, addr, unsigned long, flags) +{ + return kernel_get_mempolicy(policy, nmask, maxnode, addr, flags); +} + #ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE5(get_mempolicy, int __user *, policy, @@ -1514,7 +1543,7 @@ COMPAT_SYSCALL_DEFINE5(get_mempolicy, int __user *, policy, if (nmask) nm = compat_alloc_user_space(alloc_size); - err = sys_get_mempolicy(policy, nm, nr_bits+1, addr, flags); + err = kernel_get_mempolicy(policy, nm, nr_bits+1, addr, flags); if (!err && nmask) { unsigned long copy_size; @@ -1546,7 +1575,7 @@ COMPAT_SYSCALL_DEFINE3(set_mempolicy, int, mode, compat_ulong_t __user *, nmask, return -EFAULT; } - return sys_set_mempolicy(mode, nm, nr_bits+1); + return kernel_set_mempolicy(mode, nm, nr_bits+1); } COMPAT_SYSCALL_DEFINE6(mbind, compat_ulong_t, start, compat_ulong_t, len, @@ -1568,10 +1597,43 @@ COMPAT_SYSCALL_DEFINE6(mbind, compat_ulong_t, start, compat_ulong_t, len, return -EFAULT; } - return sys_mbind(start, len, mode, nm, nr_bits+1, flags); + return kernel_mbind(start, len, mode, nm, nr_bits+1, flags); } -#endif +COMPAT_SYSCALL_DEFINE4(migrate_pages, compat_pid_t, pid, + compat_ulong_t, maxnode, + const compat_ulong_t __user *, old_nodes, + const compat_ulong_t __user *, new_nodes) +{ + unsigned long __user *old = NULL; + unsigned long __user *new = NULL; + nodemask_t tmp_mask; + unsigned long nr_bits; + unsigned long size; + + nr_bits = min_t(unsigned long, maxnode - 1, MAX_NUMNODES); + size = ALIGN(nr_bits, BITS_PER_LONG) / 8; + if (old_nodes) { + if (compat_get_bitmap(nodes_addr(tmp_mask), old_nodes, nr_bits)) + return -EFAULT; + old = compat_alloc_user_space(new_nodes ? size * 2 : size); + if (new_nodes) + new = old + size / sizeof(unsigned long); + if (copy_to_user(old, nodes_addr(tmp_mask), size)) + return -EFAULT; + } + if (new_nodes) { + if (compat_get_bitmap(nodes_addr(tmp_mask), new_nodes, nr_bits)) + return -EFAULT; + if (new == NULL) + new = compat_alloc_user_space(size); + if (copy_to_user(new, nodes_addr(tmp_mask), size)) + return -EFAULT; + } + return kernel_migrate_pages(pid, nr_bits + 1, old, new); +} + +#endif /* CONFIG_COMPAT */ struct mempolicy *__get_vma_policy(struct vm_area_struct *vma, unsigned long addr) @@ -2124,6 +2186,9 @@ bool __mpol_equal(struct mempolicy *a, struct mempolicy *b) case MPOL_INTERLEAVE: return !!nodes_equal(a->v.nodes, b->v.nodes); case MPOL_PREFERRED: + /* a's ->flags is the same as b's */ + if (a->flags & MPOL_F_LOCAL) + return true; return a->v.preferred_node == b->v.preferred_node; default: BUG(); diff --git a/mm/migrate.c b/mm/migrate.c index 1e5525a25691..003886606a22 100644 --- a/mm/migrate.c +++ b/mm/migrate.c @@ -34,6 +34,7 @@ #include <linux/backing-dev.h> #include <linux/compaction.h> #include <linux/syscalls.h> +#include <linux/compat.h> #include <linux/hugetlb.h> #include <linux/hugetlb_cgroup.h> #include <linux/gfp.h> @@ -1745,10 +1746,10 @@ static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages, * Move a list of pages in the address space of the currently executing * process. */ -SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages, - const void __user * __user *, pages, - const int __user *, nodes, - int __user *, status, int, flags) +static int kernel_move_pages(pid_t pid, unsigned long nr_pages, + const void __user * __user *pages, + const int __user *nodes, + int __user *status, int flags) { struct task_struct *task; struct mm_struct *mm; @@ -1807,6 +1808,36 @@ out: return err; } +SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages, + const void __user * __user *, pages, + const int __user *, nodes, + int __user *, status, int, flags) +{ + return kernel_move_pages(pid, nr_pages, pages, nodes, status, flags); +} + +#ifdef CONFIG_COMPAT +COMPAT_SYSCALL_DEFINE6(move_pages, pid_t, pid, compat_ulong_t, nr_pages, + compat_uptr_t __user *, pages32, + const int __user *, nodes, + int __user *, status, + int, flags) +{ + const void __user * __user *pages; + int i; + + pages = compat_alloc_user_space(nr_pages * sizeof(void *)); + for (i = 0; i < nr_pages; i++) { + compat_uptr_t p; + + if (get_user(p, pages32 + i) || + put_user(compat_ptr(p), pages + i)) + return -EFAULT; + } + return kernel_move_pages(pid, nr_pages, pages, nodes, status, flags); +} +#endif /* CONFIG_COMPAT */ + #ifdef CONFIG_NUMA_BALANCING /* * Returns true if this is a safe migration target node for misplaced NUMA diff --git a/mm/mmap.c b/mm/mmap.c index 9efdc021ad22..aa0dc8231c0d 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1488,9 +1488,9 @@ unsigned long do_mmap(struct file *file, unsigned long addr, return addr; } -SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, - unsigned long, prot, unsigned long, flags, - unsigned long, fd, unsigned long, pgoff) +unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long pgoff) { struct file *file = NULL; unsigned long retval; @@ -1537,6 +1537,13 @@ out_fput: return retval; } +SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, + unsigned long, prot, unsigned long, flags, + unsigned long, fd, unsigned long, pgoff) +{ + return ksys_mmap_pgoff(addr, len, prot, flags, fd, pgoff); +} + #ifdef __ARCH_WANT_SYS_OLD_MMAP struct mmap_arg_struct { unsigned long addr; @@ -1556,8 +1563,8 @@ SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg) if (offset_in_page(a.offset)) return -EINVAL; - return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, - a.offset >> PAGE_SHIFT); + return ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, + a.offset >> PAGE_SHIFT); } #endif /* __ARCH_WANT_SYS_OLD_MMAP */ diff --git a/mm/nommu.c b/mm/nommu.c index ebb6e618dade..4f8720243ae7 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -663,22 +663,6 @@ static void put_nommu_region(struct vm_region *region) } /* - * update protection on a vma - */ -static void protect_vma(struct vm_area_struct *vma, unsigned long flags) -{ -#ifdef CONFIG_MPU - struct mm_struct *mm = vma->vm_mm; - long start = vma->vm_start & PAGE_MASK; - while (start < vma->vm_end) { - protect_page(mm, start, flags); - start += PAGE_SIZE; - } - update_protections(mm); -#endif -} - -/* * add a VMA into a process's mm_struct in the appropriate place in the list * and tree and add to the address space's page tree also if not an anonymous * page @@ -695,8 +679,6 @@ static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma) mm->map_count++; vma->vm_mm = mm; - protect_vma(vma, vma->vm_flags); - /* add the VMA to the mapping */ if (vma->vm_file) { mapping = vma->vm_file->f_mapping; @@ -757,8 +739,6 @@ static void delete_vma_from_mm(struct vm_area_struct *vma) struct mm_struct *mm = vma->vm_mm; struct task_struct *curr = current; - protect_vma(vma, 0); - mm->map_count--; for (i = 0; i < VMACACHE_SIZE; i++) { /* if the vma is cached, invalidate the entire cache */ @@ -1423,9 +1403,9 @@ error_getting_region: return -ENOMEM; } -SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, - unsigned long, prot, unsigned long, flags, - unsigned long, fd, unsigned long, pgoff) +unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len, + unsigned long prot, unsigned long flags, + unsigned long fd, unsigned long pgoff) { struct file *file = NULL; unsigned long retval = -EBADF; @@ -1447,6 +1427,13 @@ out: return retval; } +SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len, + unsigned long, prot, unsigned long, flags, + unsigned long, fd, unsigned long, pgoff) +{ + return ksys_mmap_pgoff(addr, len, prot, flags, fd, pgoff); +} + #ifdef __ARCH_WANT_SYS_OLD_MMAP struct mmap_arg_struct { unsigned long addr; @@ -1466,8 +1453,8 @@ SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg) if (offset_in_page(a.offset)) return -EINVAL; - return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, - a.offset >> PAGE_SHIFT); + return ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd, + a.offset >> PAGE_SHIFT); } #endif /* __ARCH_WANT_SYS_OLD_MMAP */ diff --git a/mm/page_alloc.c b/mm/page_alloc.c index cb416723538f..4ea018263210 100644 --- a/mm/page_alloc.c +++ b/mm/page_alloc.c @@ -1910,7 +1910,9 @@ static int move_freepages(struct zone *zone, * Remove at a later date when no bug reports exist related to * grouping pages by mobility */ - VM_BUG_ON(page_zone(start_page) != page_zone(end_page)); + VM_BUG_ON(pfn_valid(page_to_pfn(start_page)) && + pfn_valid(page_to_pfn(end_page)) && + page_zone(start_page) != page_zone(end_page)); #endif if (num_movable) @@ -3594,7 +3596,7 @@ static bool __need_fs_reclaim(gfp_t gfp_mask) return false; /* this guy won't enter reclaim */ - if ((current->flags & PF_MEMALLOC) && !(gfp_mask & __GFP_NOMEMALLOC)) + if (current->flags & PF_MEMALLOC) return false; /* We're only interested __GFP_FS allocations for now */ @@ -5354,17 +5356,8 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone, if (context != MEMMAP_EARLY) goto not_early; - if (!early_pfn_valid(pfn)) { -#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP - /* - * Skip to the pfn preceding the next valid one (or - * end_pfn), such that we hit a valid pfn (or end_pfn) - * on our next iteration of the loop. - */ - pfn = memblock_next_valid_pfn(pfn, end_pfn) - 1; -#endif + if (!early_pfn_valid(pfn)) continue; - } if (!early_pfn_in_nid(pfn, nid)) continue; if (!update_defer_init(pgdat, pfn, end_pfn, &nr_initialised)) @@ -6199,10 +6192,7 @@ static void __ref alloc_node_mem_map(struct pglist_data *pgdat) end = pgdat_end_pfn(pgdat); end = ALIGN(end, MAX_ORDER_NR_PAGES); size = (end - start) * sizeof(struct page); - map = alloc_remap(pgdat->node_id, size); - if (!map) - map = memblock_virt_alloc_node_nopanic(size, - pgdat->node_id); + map = memblock_virt_alloc_node_nopanic(size, pgdat->node_id); pgdat->node_mem_map = map + offset; } pr_debug("%s: node %d, pgdat %08lx, node_mem_map %08lx\n", diff --git a/mm/page_owner.c b/mm/page_owner.c index 9886c6073828..7172e0a80e13 100644 --- a/mm/page_owner.c +++ b/mm/page_owner.c @@ -123,13 +123,13 @@ void __reset_page_owner(struct page *page, unsigned int order) static inline bool check_recursive_alloc(struct stack_trace *trace, unsigned long ip) { - int i, count; + int i; if (!trace->nr_entries) return false; - for (i = 0, count = 0; i < trace->nr_entries; i++) { - if (trace->entries[i] == ip && ++count == 2) + for (i = 0; i < trace->nr_entries; i++) { + if (trace->entries[i] == ip) return true; } diff --git a/mm/percpu-km.c b/mm/percpu-km.c index d2a76642c4ae..38de70ab1a0d 100644 --- a/mm/percpu-km.c +++ b/mm/percpu-km.c @@ -34,7 +34,7 @@ #include <linux/log2.h> static int pcpu_populate_chunk(struct pcpu_chunk *chunk, - int page_start, int page_end) + int page_start, int page_end, gfp_t gfp) { return 0; } @@ -45,18 +45,18 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, /* nada */ } -static struct pcpu_chunk *pcpu_create_chunk(void) +static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp) { const int nr_pages = pcpu_group_sizes[0] >> PAGE_SHIFT; struct pcpu_chunk *chunk; struct page *pages; int i; - chunk = pcpu_alloc_chunk(); + chunk = pcpu_alloc_chunk(gfp); if (!chunk) return NULL; - pages = alloc_pages(GFP_KERNEL, order_base_2(nr_pages)); + pages = alloc_pages(gfp, order_base_2(nr_pages)); if (!pages) { pcpu_free_chunk(chunk); return NULL; diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c index 9158e5a81391..d8078de912de 100644 --- a/mm/percpu-vm.c +++ b/mm/percpu-vm.c @@ -37,7 +37,7 @@ static struct page **pcpu_get_pages(void) lockdep_assert_held(&pcpu_alloc_mutex); if (!pages) - pages = pcpu_mem_zalloc(pages_size); + pages = pcpu_mem_zalloc(pages_size, GFP_KERNEL); return pages; } @@ -73,18 +73,21 @@ static void pcpu_free_pages(struct pcpu_chunk *chunk, * @pages: array to put the allocated pages into, indexed by pcpu_page_idx() * @page_start: page index of the first page to be allocated * @page_end: page index of the last page to be allocated + 1 + * @gfp: allocation flags passed to the underlying allocator * * Allocate pages [@page_start,@page_end) into @pages for all units. * The allocation is for @chunk. Percpu core doesn't care about the * content of @pages and will pass it verbatim to pcpu_map_pages(). */ static int pcpu_alloc_pages(struct pcpu_chunk *chunk, - struct page **pages, int page_start, int page_end) + struct page **pages, int page_start, int page_end, + gfp_t gfp) { - const gfp_t gfp = GFP_KERNEL | __GFP_HIGHMEM; unsigned int cpu, tcpu; int i; + gfp |= __GFP_HIGHMEM; + for_each_possible_cpu(cpu) { for (i = page_start; i < page_end; i++) { struct page **pagep = &pages[pcpu_page_idx(cpu, i)]; @@ -262,6 +265,7 @@ static void pcpu_post_map_flush(struct pcpu_chunk *chunk, * @chunk: chunk of interest * @page_start: the start page * @page_end: the end page + * @gfp: allocation flags passed to the underlying memory allocator * * For each cpu, populate and map pages [@page_start,@page_end) into * @chunk. @@ -270,7 +274,7 @@ static void pcpu_post_map_flush(struct pcpu_chunk *chunk, * pcpu_alloc_mutex, does GFP_KERNEL allocation. */ static int pcpu_populate_chunk(struct pcpu_chunk *chunk, - int page_start, int page_end) + int page_start, int page_end, gfp_t gfp) { struct page **pages; @@ -278,7 +282,7 @@ static int pcpu_populate_chunk(struct pcpu_chunk *chunk, if (!pages) return -ENOMEM; - if (pcpu_alloc_pages(chunk, pages, page_start, page_end)) + if (pcpu_alloc_pages(chunk, pages, page_start, page_end, gfp)) return -ENOMEM; if (pcpu_map_pages(chunk, pages, page_start, page_end)) { @@ -325,12 +329,12 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, pcpu_free_pages(chunk, pages, page_start, page_end); } -static struct pcpu_chunk *pcpu_create_chunk(void) +static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp) { struct pcpu_chunk *chunk; struct vm_struct **vms; - chunk = pcpu_alloc_chunk(); + chunk = pcpu_alloc_chunk(gfp); if (!chunk) return NULL; diff --git a/mm/percpu.c b/mm/percpu.c index 50e7fdf84055..0b6480979ac7 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -80,6 +80,7 @@ #include <linux/vmalloc.h> #include <linux/workqueue.h> #include <linux/kmemleak.h> +#include <linux/sched.h> #include <asm/cacheflush.h> #include <asm/sections.h> @@ -447,26 +448,25 @@ static void pcpu_next_fit_region(struct pcpu_chunk *chunk, int alloc_bits, /** * pcpu_mem_zalloc - allocate memory * @size: bytes to allocate + * @gfp: allocation flags * * Allocate @size bytes. If @size is smaller than PAGE_SIZE, - * kzalloc() is used; otherwise, vzalloc() is used. The returned - * memory is always zeroed. - * - * CONTEXT: - * Does GFP_KERNEL allocation. + * kzalloc() is used; otherwise, the equivalent of vzalloc() is used. + * This is to facilitate passing through whitelisted flags. The + * returned memory is always zeroed. * * RETURNS: * Pointer to the allocated area on success, NULL on failure. */ -static void *pcpu_mem_zalloc(size_t size) +static void *pcpu_mem_zalloc(size_t size, gfp_t gfp) { if (WARN_ON_ONCE(!slab_is_available())) return NULL; if (size <= PAGE_SIZE) - return kzalloc(size, GFP_KERNEL); + return kzalloc(size, gfp); else - return vzalloc(size); + return __vmalloc(size, gfp | __GFP_ZERO, PAGE_KERNEL); } /** @@ -1154,12 +1154,12 @@ static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr, return chunk; } -static struct pcpu_chunk *pcpu_alloc_chunk(void) +static struct pcpu_chunk *pcpu_alloc_chunk(gfp_t gfp) { struct pcpu_chunk *chunk; int region_bits; - chunk = pcpu_mem_zalloc(pcpu_chunk_struct_size); + chunk = pcpu_mem_zalloc(pcpu_chunk_struct_size, gfp); if (!chunk) return NULL; @@ -1168,17 +1168,17 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void) region_bits = pcpu_chunk_map_bits(chunk); chunk->alloc_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits) * - sizeof(chunk->alloc_map[0])); + sizeof(chunk->alloc_map[0]), gfp); if (!chunk->alloc_map) goto alloc_map_fail; chunk->bound_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits + 1) * - sizeof(chunk->bound_map[0])); + sizeof(chunk->bound_map[0]), gfp); if (!chunk->bound_map) goto bound_map_fail; chunk->md_blocks = pcpu_mem_zalloc(pcpu_chunk_nr_blocks(chunk) * - sizeof(chunk->md_blocks[0])); + sizeof(chunk->md_blocks[0]), gfp); if (!chunk->md_blocks) goto md_blocks_fail; @@ -1277,9 +1277,11 @@ static void pcpu_chunk_depopulated(struct pcpu_chunk *chunk, * pcpu_addr_to_page - translate address to physical address * pcpu_verify_alloc_info - check alloc_info is acceptable during init */ -static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size); -static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size); -static struct pcpu_chunk *pcpu_create_chunk(void); +static int pcpu_populate_chunk(struct pcpu_chunk *chunk, + int page_start, int page_end, gfp_t gfp); +static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, + int page_start, int page_end); +static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp); static void pcpu_destroy_chunk(struct pcpu_chunk *chunk); static struct page *pcpu_addr_to_page(void *addr); static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai); @@ -1339,6 +1341,8 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr) static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, gfp_t gfp) { + /* whitelisted flags that can be passed to the backing allocators */ + gfp_t pcpu_gfp = gfp & (GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN); bool is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL; bool do_warn = !(gfp & __GFP_NOWARN); static int warn_limit = 10; @@ -1369,8 +1373,17 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved, return NULL; } - if (!is_atomic) - mutex_lock(&pcpu_alloc_mutex); + if (!is_atomic) { + /* + * pcpu_balance_workfn() allocates memory under this mutex, + * and it may wait for memory reclaim. Allow current task + * to become OOM victim, in case of memory pressure. + */ + if (gfp & __GFP_NOFAIL) + mutex_lock(&pcpu_alloc_mutex); + else if (mutex_lock_killable(&pcpu_alloc_mutex)) + return NULL; + } spin_lock_irqsave(&pcpu_lock, flags); @@ -1421,7 +1434,7 @@ restart: } if (list_empty(&pcpu_slot[pcpu_nr_slots - 1])) { - chunk = pcpu_create_chunk(); + chunk = pcpu_create_chunk(pcpu_gfp); if (!chunk) { err = "failed to allocate new chunk"; goto fail; @@ -1450,7 +1463,7 @@ area_found: page_start, page_end) { WARN_ON(chunk->immutable); - ret = pcpu_populate_chunk(chunk, rs, re); + ret = pcpu_populate_chunk(chunk, rs, re, pcpu_gfp); spin_lock_irqsave(&pcpu_lock, flags); if (ret) { @@ -1561,10 +1574,17 @@ void __percpu *__alloc_reserved_percpu(size_t size, size_t align) * pcpu_balance_workfn - manage the amount of free chunks and populated pages * @work: unused * - * Reclaim all fully free chunks except for the first one. + * Reclaim all fully free chunks except for the first one. This is also + * responsible for maintaining the pool of empty populated pages. However, + * it is possible that this is called when physical memory is scarce causing + * OOM killer to be triggered. We should avoid doing so until an actual + * allocation causes the failure as it is possible that requests can be + * serviced from already backed regions. */ static void pcpu_balance_workfn(struct work_struct *work) { + /* gfp flags passed to underlying allocators */ + const gfp_t gfp = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN; LIST_HEAD(to_free); struct list_head *free_head = &pcpu_slot[pcpu_nr_slots - 1]; struct pcpu_chunk *chunk, *next; @@ -1600,6 +1620,7 @@ static void pcpu_balance_workfn(struct work_struct *work) spin_unlock_irq(&pcpu_lock); } pcpu_destroy_chunk(chunk); + cond_resched(); } /* @@ -1645,7 +1666,7 @@ retry_pop: chunk->nr_pages) { int nr = min(re - rs, nr_to_pop); - ret = pcpu_populate_chunk(chunk, rs, rs + nr); + ret = pcpu_populate_chunk(chunk, rs, rs + nr, gfp); if (!ret) { nr_to_pop -= nr; spin_lock_irq(&pcpu_lock); @@ -1662,7 +1683,7 @@ retry_pop: if (nr_to_pop) { /* ran out of chunks to populate, create a new one and retry */ - chunk = pcpu_create_chunk(); + chunk = pcpu_create_chunk(gfp); if (chunk) { spin_lock_irq(&pcpu_lock); pcpu_chunk_relocate(chunk, -1); @@ -2719,11 +2740,7 @@ void __init setup_per_cpu_areas(void) if (pcpu_setup_first_chunk(ai, fc) < 0) panic("Failed to initialize percpu areas."); -#ifdef CONFIG_CRIS -#warning "the CRIS architecture has physical and virtual addresses confused" -#else pcpu_free_alloc_info(ai); -#endif } #endif /* CONFIG_SMP */ diff --git a/mm/readahead.c b/mm/readahead.c index c4ca70239233..4d57b4644f98 100644 --- a/mm/readahead.c +++ b/mm/readahead.c @@ -573,7 +573,7 @@ do_readahead(struct address_space *mapping, struct file *filp, return force_page_cache_readahead(mapping, filp, index, nr); } -SYSCALL_DEFINE3(readahead, int, fd, loff_t, offset, size_t, count) +ssize_t ksys_readahead(int fd, loff_t offset, size_t count) { ssize_t ret; struct fd f; @@ -592,3 +592,8 @@ SYSCALL_DEFINE3(readahead, int, fd, loff_t, offset, size_t, count) } return ret; } + +SYSCALL_DEFINE3(readahead, int, fd, loff_t, offset, size_t, count) +{ + return ksys_readahead(fd, offset, count); +} diff --git a/mm/shmem.c b/mm/shmem.c index 1907688b75ee..b85919243399 100644 --- a/mm/shmem.c +++ b/mm/shmem.c @@ -493,36 +493,45 @@ next: info = list_entry(pos, struct shmem_inode_info, shrinklist); inode = &info->vfs_inode; - if (nr_to_split && split >= nr_to_split) { - iput(inode); - continue; - } + if (nr_to_split && split >= nr_to_split) + goto leave; - page = find_lock_page(inode->i_mapping, + page = find_get_page(inode->i_mapping, (inode->i_size & HPAGE_PMD_MASK) >> PAGE_SHIFT); if (!page) goto drop; + /* No huge page at the end of the file: nothing to split */ if (!PageTransHuge(page)) { - unlock_page(page); put_page(page); goto drop; } + /* + * Leave the inode on the list if we failed to lock + * the page at this time. + * + * Waiting for the lock may lead to deadlock in the + * reclaim path. + */ + if (!trylock_page(page)) { + put_page(page); + goto leave; + } + ret = split_huge_page(page); unlock_page(page); put_page(page); - if (ret) { - /* split failed: leave it on the list */ - iput(inode); - continue; - } + /* If split failed leave the inode on the list */ + if (ret) + goto leave; split++; drop: list_del_init(&info->shrinklist); removed++; +leave: iput(inode); } diff --git a/mm/slab.c b/mm/slab.c index 324446621b3e..9095c3945425 100644 --- a/mm/slab.c +++ b/mm/slab.c @@ -1283,6 +1283,7 @@ void __init kmem_cache_init(void) nr_node_ids * sizeof(struct kmem_cache_node *), SLAB_HWCACHE_ALIGN, 0, 0); list_add(&kmem_cache->list, &slab_caches); + memcg_link_cache(kmem_cache); slab_state = PARTIAL; /* diff --git a/mm/sparse.c b/mm/sparse.c index 7af5e7a92528..58cab483e81b 100644 --- a/mm/sparse.c +++ b/mm/sparse.c @@ -236,28 +236,6 @@ void __init memory_present(int nid, unsigned long start, unsigned long end) } /* - * Only used by the i386 NUMA architecures, but relatively - * generic code. - */ -unsigned long __init node_memmap_size_bytes(int nid, unsigned long start_pfn, - unsigned long end_pfn) -{ - unsigned long pfn; - unsigned long nr_pages = 0; - - mminit_validate_memmodel_limits(&start_pfn, &end_pfn); - for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) { - if (nid != early_pfn_to_nid(pfn)) - continue; - - if (pfn_present(pfn)) - nr_pages += PAGES_PER_SECTION; - } - - return nr_pages * sizeof(struct page); -} - -/* * Subtle, we encode the real pfn into the mem_map such that * the identity pfn - section_mem_map will return the actual * physical page frame number. @@ -427,10 +405,6 @@ struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid, struct page *map; unsigned long size; - map = alloc_remap(nid, sizeof(struct page) * PAGES_PER_SECTION); - if (map) - return map; - size = PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION); map = memblock_virt_alloc_try_nid(size, PAGE_SIZE, __pa(MAX_DMA_ADDRESS), @@ -446,17 +420,6 @@ void __init sparse_mem_maps_populate_node(struct page **map_map, unsigned long pnum; unsigned long size = sizeof(struct page) * PAGES_PER_SECTION; - map = alloc_remap(nodeid, size * map_count); - if (map) { - for (pnum = pnum_begin; pnum < pnum_end; pnum++) { - if (!present_section_nr(pnum)) - continue; - map_map[pnum] = map; - map += size; - } - return; - } - size = PAGE_ALIGN(size); map = memblock_virt_alloc_try_nid_raw(size * map_count, PAGE_SIZE, __pa(MAX_DMA_ADDRESS), diff --git a/mm/vmscan.c b/mm/vmscan.c index bee53495a829..cd5dc3faaa57 100644 --- a/mm/vmscan.c +++ b/mm/vmscan.c @@ -1780,6 +1780,20 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, set_bit(PGDAT_WRITEBACK, &pgdat->flags); /* + * If dirty pages are scanned that are not queued for IO, it + * implies that flushers are not doing their job. This can + * happen when memory pressure pushes dirty pages to the end of + * the LRU before the dirty limits are breached and the dirty + * data has expired. It can also happen when the proportion of + * dirty pages grows not through writes but through memory + * pressure reclaiming all the clean cache. And in some cases, + * the flushers simply cannot keep up with the allocation + * rate. Nudge the flusher threads in case they are asleep. + */ + if (stat.nr_unqueued_dirty == nr_taken) + wakeup_flusher_threads(WB_REASON_VMSCAN); + + /* * Legacy memcg will stall in page writeback so avoid forcibly * stalling here. */ @@ -1791,22 +1805,9 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec, if (stat.nr_dirty && stat.nr_dirty == stat.nr_congested) set_bit(PGDAT_CONGESTED, &pgdat->flags); - /* - * If dirty pages are scanned that are not queued for IO, it - * implies that flushers are not doing their job. This can - * happen when memory pressure pushes dirty pages to the end of - * the LRU before the dirty limits are breached and the dirty - * data has expired. It can also happen when the proportion of - * dirty pages grows not through writes but through memory - * pressure reclaiming all the clean cache. And in some cases, - * the flushers simply cannot keep up with the allocation - * rate. Nudge the flusher threads in case they are asleep, but - * also allow kswapd to start writing pages during reclaim. - */ - if (stat.nr_unqueued_dirty == nr_taken) { - wakeup_flusher_threads(WB_REASON_VMSCAN); + /* Allow kswapd to start writing pages during reclaim. */ + if (stat.nr_unqueued_dirty == nr_taken) set_bit(PGDAT_DIRTY, &pgdat->flags); - } /* * If kswapd scans pages marked marked for immediate diff --git a/mm/vmstat.c b/mm/vmstat.c index 40b2db6db6b1..33581be705f0 100644 --- a/mm/vmstat.c +++ b/mm/vmstat.c @@ -1839,9 +1839,11 @@ static void vmstat_update(struct work_struct *w) * to occur in the future. Keep on running the * update worker thread. */ + preempt_disable(); queue_delayed_work_on(smp_processor_id(), mm_percpu_wq, this_cpu_ptr(&vmstat_work), round_jiffies_relative(sysctl_stat_interval)); + preempt_enable(); } } diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c index c3013505c305..b7f61cd1c709 100644 --- a/mm/zsmalloc.c +++ b/mm/zsmalloc.c @@ -84,18 +84,19 @@ * This is made more complicated by various memory models and PAE. */ -#ifndef MAX_PHYSMEM_BITS -#ifdef CONFIG_HIGHMEM64G -#define MAX_PHYSMEM_BITS 36 -#else /* !CONFIG_HIGHMEM64G */ +#ifndef MAX_POSSIBLE_PHYSMEM_BITS +#ifdef MAX_PHYSMEM_BITS +#define MAX_POSSIBLE_PHYSMEM_BITS MAX_PHYSMEM_BITS +#else /* * If this definition of MAX_PHYSMEM_BITS is used, OBJ_INDEX_BITS will just * be PAGE_SHIFT */ -#define MAX_PHYSMEM_BITS BITS_PER_LONG +#define MAX_POSSIBLE_PHYSMEM_BITS BITS_PER_LONG #endif #endif -#define _PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT) + +#define _PFN_BITS (MAX_POSSIBLE_PHYSMEM_BITS - PAGE_SHIFT) /* * Memory for allocating for handle keeps object position by |