summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
authorIngo Molnar <mingo@kernel.org>2018-04-12 10:42:34 +0300
committerIngo Molnar <mingo@kernel.org>2018-04-12 10:42:34 +0300
commitef389b734691cdc8beb009dd402135dcdcb86a56 (patch)
tree9523a37db93cb7c7874a5f18b4d9a7014898b814 /mm
parenta774635db5c430cbf21fa5d2f2df3d23aaa8e782 (diff)
parentc76fc98260751e71c884dc1a18a07e427ef033b5 (diff)
downloadlinux-ef389b734691cdc8beb009dd402135dcdcb86a56.tar.xz
Merge branch 'WIP.x86/asm' into x86/urgent, because the topic is ready
Signed-off-by: Ingo Molnar <mingo@kernel.org>
Diffstat (limited to 'mm')
-rw-r--r--mm/Kconfig14
-rw-r--r--mm/fadvise.c10
-rw-r--r--mm/gup.c7
-rw-r--r--mm/huge_memory.c9
-rw-r--r--mm/hugetlb.c9
-rw-r--r--mm/kasan/kasan_init.c2
-rw-r--r--mm/khugepaged.c15
-rw-r--r--mm/kmemleak.c12
-rw-r--r--mm/memblock.c28
-rw-r--r--mm/memcontrol.c6
-rw-r--r--mm/mempolicy.c95
-rw-r--r--mm/migrate.c39
-rw-r--r--mm/mmap.c17
-rw-r--r--mm/nommu.c37
-rw-r--r--mm/page_alloc.c22
-rw-r--r--mm/page_owner.c6
-rw-r--r--mm/percpu-km.c8
-rw-r--r--mm/percpu-vm.c18
-rw-r--r--mm/percpu.c71
-rw-r--r--mm/readahead.c7
-rw-r--r--mm/shmem.c31
-rw-r--r--mm/slab.c1
-rw-r--r--mm/sparse.c37
-rw-r--r--mm/vmscan.c31
-rw-r--r--mm/vmstat.c2
-rw-r--r--mm/zsmalloc.c13
26 files changed, 312 insertions, 235 deletions
diff --git a/mm/Kconfig b/mm/Kconfig
index c782e8fb7235..d5004d82a1d6 100644
--- a/mm/Kconfig
+++ b/mm/Kconfig
@@ -278,13 +278,6 @@ config BOUNCE
by default when ZONE_DMA or HIGHMEM is selected, but you
may say n to override this.
-# On the 'tile' arch, USB OHCI needs the bounce pool since tilegx will often
-# have more than 4GB of memory, but we don't currently use the IOTLB to present
-# a 32-bit address to OHCI. So we need to use a bounce pool instead.
-config NEED_BOUNCE_POOL
- bool
- default y if TILE && USB_OHCI_HCD
-
config NR_QUICK
int
depends on QUICKLIST
@@ -627,15 +620,14 @@ config GENERIC_EARLY_IOREMAP
config MAX_STACK_SIZE_MB
int "Maximum user stack size for 32-bit processes (MB)"
default 80
- range 8 256 if METAG
range 8 2048
depends on STACK_GROWSUP && (!64BIT || COMPAT)
help
This is the maximum stack size in Megabytes in the VM layout of 32-bit
user processes when the stack grows upwards (currently only on parisc
- and metag arch). The stack will be located at the highest memory
- address minus the given value, unless the RLIMIT_STACK hard limit is
- changed to a smaller value in which case that is used.
+ arch). The stack will be located at the highest memory address minus
+ the given value, unless the RLIMIT_STACK hard limit is changed to a
+ smaller value in which case that is used.
A sane initial value is 80 MB.
diff --git a/mm/fadvise.c b/mm/fadvise.c
index 767887f5f3bf..afa41491d324 100644
--- a/mm/fadvise.c
+++ b/mm/fadvise.c
@@ -26,7 +26,8 @@
* POSIX_FADV_WILLNEED could set PG_Referenced, and POSIX_FADV_NOREUSE could
* deactivate the pages and clear PG_Referenced.
*/
-SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice)
+
+int ksys_fadvise64_64(int fd, loff_t offset, loff_t len, int advice)
{
struct fd f = fdget(fd);
struct inode *inode;
@@ -185,11 +186,16 @@ out:
return ret;
}
+SYSCALL_DEFINE4(fadvise64_64, int, fd, loff_t, offset, loff_t, len, int, advice)
+{
+ return ksys_fadvise64_64(fd, offset, len, advice);
+}
+
#ifdef __ARCH_WANT_SYS_FADVISE64
SYSCALL_DEFINE4(fadvise64, int, fd, loff_t, offset, size_t, len, int, advice)
{
- return sys_fadvise64_64(fd, offset, len, advice);
+ return ksys_fadvise64_64(fd, offset, len, advice);
}
#endif
diff --git a/mm/gup.c b/mm/gup.c
index 1b46e6e74881..6afae32571ca 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -516,7 +516,7 @@ static int faultin_page(struct task_struct *tsk, struct vm_area_struct *vma,
}
if (ret & VM_FAULT_RETRY) {
- if (nonblocking)
+ if (nonblocking && !(fault_flags & FAULT_FLAG_RETRY_NOWAIT))
*nonblocking = 0;
return -EBUSY;
}
@@ -890,7 +890,10 @@ static __always_inline long __get_user_pages_locked(struct task_struct *tsk,
break;
}
if (*locked) {
- /* VM_FAULT_RETRY didn't trigger */
+ /*
+ * VM_FAULT_RETRY didn't trigger or it was a
+ * FOLL_NOWAIT.
+ */
if (!pages_done)
pages_done = ret;
break;
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 87ab9b8f56b5..5a68730eebd6 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -555,7 +555,8 @@ static int __do_huge_pmd_anonymous_page(struct vm_fault *vmf, struct page *page,
VM_BUG_ON_PAGE(!PageCompound(page), page);
- if (mem_cgroup_try_charge(page, vma->vm_mm, gfp, &memcg, true)) {
+ if (mem_cgroup_try_charge(page, vma->vm_mm, gfp | __GFP_NORETRY, &memcg,
+ true)) {
put_page(page);
count_vm_event(THP_FAULT_FALLBACK);
return VM_FAULT_FALLBACK;
@@ -1316,7 +1317,7 @@ alloc:
}
if (unlikely(mem_cgroup_try_charge(new_page, vma->vm_mm,
- huge_gfp, &memcg, true))) {
+ huge_gfp | __GFP_NORETRY, &memcg, true))) {
put_page(new_page);
split_huge_pmd(vma, vmf->pmd, vmf->address);
if (page)
@@ -2783,11 +2784,13 @@ static unsigned long deferred_split_scan(struct shrinker *shrink,
list_for_each_safe(pos, next, &list) {
page = list_entry((void *)pos, struct page, mapping);
- lock_page(page);
+ if (!trylock_page(page))
+ goto next;
/* split_huge_page() removes page from list on success */
if (!split_huge_page(page))
split++;
unlock_page(page);
+next:
put_page(page);
}
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 7c204e3d132b..976bbc5646fe 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -18,6 +18,7 @@
#include <linux/bootmem.h>
#include <linux/sysfs.h>
#include <linux/slab.h>
+#include <linux/mmdebug.h>
#include <linux/sched/signal.h>
#include <linux/rmap.h>
#include <linux/string_helpers.h>
@@ -1583,7 +1584,7 @@ static struct page *alloc_surplus_huge_page(struct hstate *h, gfp_t gfp_mask,
page = NULL;
} else {
h->surplus_huge_pages++;
- h->nr_huge_pages_node[page_to_nid(page)]++;
+ h->surplus_huge_pages_node[page_to_nid(page)]++;
}
out_unlock:
@@ -4374,6 +4375,12 @@ int hugetlb_reserve_pages(struct inode *inode,
struct resv_map *resv_map;
long gbl_reserve;
+ /* This should never happen */
+ if (from > to) {
+ VM_WARN(1, "%s called with a negative range\n", __func__);
+ return -EINVAL;
+ }
+
/*
* Only apply hugepage reservation if asked. At fault time, an
* attempt will be made for VM_NORESERVE to allocate a page
diff --git a/mm/kasan/kasan_init.c b/mm/kasan/kasan_init.c
index 554e4c0f23a2..f436246ccc79 100644
--- a/mm/kasan/kasan_init.c
+++ b/mm/kasan/kasan_init.c
@@ -31,7 +31,7 @@
unsigned char kasan_zero_page[PAGE_SIZE] __page_aligned_bss;
#if CONFIG_PGTABLE_LEVELS > 4
-p4d_t kasan_zero_p4d[PTRS_PER_P4D] __page_aligned_bss;
+p4d_t kasan_zero_p4d[MAX_PTRS_PER_P4D] __page_aligned_bss;
#endif
#if CONFIG_PGTABLE_LEVELS > 3
pud_t kasan_zero_pud[PTRS_PER_PUD] __page_aligned_bss;
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index b7e2268dfc9a..e42568284e06 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -530,7 +530,12 @@ static int __collapse_huge_page_isolate(struct vm_area_struct *vma,
goto out;
}
- VM_BUG_ON_PAGE(PageCompound(page), page);
+ /* TODO: teach khugepaged to collapse THP mapped with pte */
+ if (PageCompound(page)) {
+ result = SCAN_PAGE_COMPOUND;
+ goto out;
+ }
+
VM_BUG_ON_PAGE(!PageAnon(page), page);
/*
@@ -960,7 +965,9 @@ static void collapse_huge_page(struct mm_struct *mm,
goto out_nolock;
}
- if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp, &memcg, true))) {
+ /* Do not oom kill for khugepaged charges */
+ if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp | __GFP_NORETRY,
+ &memcg, true))) {
result = SCAN_CGROUP_CHARGE_FAIL;
goto out_nolock;
}
@@ -1319,7 +1326,9 @@ static void collapse_shmem(struct mm_struct *mm,
goto out;
}
- if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp, &memcg, true))) {
+ /* Do not oom kill for khugepaged charges */
+ if (unlikely(mem_cgroup_try_charge(new_page, mm, gfp | __GFP_NORETRY,
+ &memcg, true))) {
result = SCAN_CGROUP_CHARGE_FAIL;
goto out;
}
diff --git a/mm/kmemleak.c b/mm/kmemleak.c
index e83987c55a08..46c2290a08f1 100644
--- a/mm/kmemleak.c
+++ b/mm/kmemleak.c
@@ -1657,8 +1657,7 @@ static void start_scan_thread(void)
}
/*
- * Stop the automatic memory scanning thread. This function must be called
- * with the scan_mutex held.
+ * Stop the automatic memory scanning thread.
*/
static void stop_scan_thread(void)
{
@@ -1921,12 +1920,15 @@ static void kmemleak_do_cleanup(struct work_struct *work)
{
stop_scan_thread();
+ mutex_lock(&scan_mutex);
/*
- * Once the scan thread has stopped, it is safe to no longer track
- * object freeing. Ordering of the scan thread stopping and the memory
- * accesses below is guaranteed by the kthread_stop() function.
+ * Once it is made sure that kmemleak_scan has stopped, it is safe to no
+ * longer track object freeing. Ordering of the scan thread stopping and
+ * the memory accesses below is guaranteed by the kthread_stop()
+ * function.
*/
kmemleak_free_enabled = 0;
+ mutex_unlock(&scan_mutex);
if (!kmemleak_found_leaks)
__kmemleak_do_cleanup();
diff --git a/mm/memblock.c b/mm/memblock.c
index 5a9ca2a1751b..48376bd33274 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -1101,34 +1101,6 @@ void __init_memblock __next_mem_pfn_range(int *idx, int nid,
*out_nid = r->nid;
}
-unsigned long __init_memblock memblock_next_valid_pfn(unsigned long pfn,
- unsigned long max_pfn)
-{
- struct memblock_type *type = &memblock.memory;
- unsigned int right = type->cnt;
- unsigned int mid, left = 0;
- phys_addr_t addr = PFN_PHYS(pfn + 1);
-
- do {
- mid = (right + left) / 2;
-
- if (addr < type->regions[mid].base)
- right = mid;
- else if (addr >= (type->regions[mid].base +
- type->regions[mid].size))
- left = mid + 1;
- else {
- /* addr is within the region, so pfn + 1 is valid */
- return min(pfn + 1, max_pfn);
- }
- } while (left < right);
-
- if (right == type->cnt)
- return max_pfn;
- else
- return min(PHYS_PFN(type->regions[right].base), max_pfn);
-}
-
/**
* memblock_set_node - set node ID on memblock regions
* @base: base of area to set node ID for
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 670e99b68aa6..9ec024b862ac 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -714,9 +714,9 @@ static struct mem_cgroup *get_mem_cgroup_from_mm(struct mm_struct *mm)
* invocations for reference counting, or use mem_cgroup_iter_break()
* to cancel a hierarchy walk before the round-trip is complete.
*
- * Reclaimers can specify a zone and a priority level in @reclaim to
+ * Reclaimers can specify a node and a priority level in @reclaim to
* divide up the memcgs in the hierarchy among all concurrent
- * reclaimers operating on the same zone and priority.
+ * reclaimers operating on the same node and priority.
*/
struct mem_cgroup *mem_cgroup_iter(struct mem_cgroup *root,
struct mem_cgroup *prev,
@@ -2299,7 +2299,7 @@ void memcg_kmem_put_cache(struct kmem_cache *cachep)
}
/**
- * memcg_kmem_charge: charge a kmem page
+ * memcg_kmem_charge_memcg: charge a kmem page
* @page: page to charge
* @gfp: reclaim mode
* @order: allocation order
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index d879f1d8a44a..01cbb7078d6c 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1336,9 +1336,9 @@ static int copy_nodes_to_user(unsigned long __user *mask, unsigned long maxnode,
return copy_to_user(mask, nodes_addr(*nodes), copy) ? -EFAULT : 0;
}
-SYSCALL_DEFINE6(mbind, unsigned long, start, unsigned long, len,
- unsigned long, mode, const unsigned long __user *, nmask,
- unsigned long, maxnode, unsigned, flags)
+static long kernel_mbind(unsigned long start, unsigned long len,
+ unsigned long mode, const unsigned long __user *nmask,
+ unsigned long maxnode, unsigned int flags)
{
nodemask_t nodes;
int err;
@@ -1357,9 +1357,16 @@ SYSCALL_DEFINE6(mbind, unsigned long, start, unsigned long, len,
return do_mbind(start, len, mode, mode_flags, &nodes, flags);
}
+SYSCALL_DEFINE6(mbind, unsigned long, start, unsigned long, len,
+ unsigned long, mode, const unsigned long __user *, nmask,
+ unsigned long, maxnode, unsigned int, flags)
+{
+ return kernel_mbind(start, len, mode, nmask, maxnode, flags);
+}
+
/* Set the process memory policy */
-SYSCALL_DEFINE3(set_mempolicy, int, mode, const unsigned long __user *, nmask,
- unsigned long, maxnode)
+static long kernel_set_mempolicy(int mode, const unsigned long __user *nmask,
+ unsigned long maxnode)
{
int err;
nodemask_t nodes;
@@ -1377,9 +1384,15 @@ SYSCALL_DEFINE3(set_mempolicy, int, mode, const unsigned long __user *, nmask,
return do_set_mempolicy(mode, flags, &nodes);
}
-SYSCALL_DEFINE4(migrate_pages, pid_t, pid, unsigned long, maxnode,
- const unsigned long __user *, old_nodes,
- const unsigned long __user *, new_nodes)
+SYSCALL_DEFINE3(set_mempolicy, int, mode, const unsigned long __user *, nmask,
+ unsigned long, maxnode)
+{
+ return kernel_set_mempolicy(mode, nmask, maxnode);
+}
+
+static int kernel_migrate_pages(pid_t pid, unsigned long maxnode,
+ const unsigned long __user *old_nodes,
+ const unsigned long __user *new_nodes)
{
struct mm_struct *mm = NULL;
struct task_struct *task;
@@ -1469,11 +1482,20 @@ out_put:
}
+SYSCALL_DEFINE4(migrate_pages, pid_t, pid, unsigned long, maxnode,
+ const unsigned long __user *, old_nodes,
+ const unsigned long __user *, new_nodes)
+{
+ return kernel_migrate_pages(pid, maxnode, old_nodes, new_nodes);
+}
+
/* Retrieve NUMA policy */
-SYSCALL_DEFINE5(get_mempolicy, int __user *, policy,
- unsigned long __user *, nmask, unsigned long, maxnode,
- unsigned long, addr, unsigned long, flags)
+static int kernel_get_mempolicy(int __user *policy,
+ unsigned long __user *nmask,
+ unsigned long maxnode,
+ unsigned long addr,
+ unsigned long flags)
{
int err;
int uninitialized_var(pval);
@@ -1496,6 +1518,13 @@ SYSCALL_DEFINE5(get_mempolicy, int __user *, policy,
return err;
}
+SYSCALL_DEFINE5(get_mempolicy, int __user *, policy,
+ unsigned long __user *, nmask, unsigned long, maxnode,
+ unsigned long, addr, unsigned long, flags)
+{
+ return kernel_get_mempolicy(policy, nmask, maxnode, addr, flags);
+}
+
#ifdef CONFIG_COMPAT
COMPAT_SYSCALL_DEFINE5(get_mempolicy, int __user *, policy,
@@ -1514,7 +1543,7 @@ COMPAT_SYSCALL_DEFINE5(get_mempolicy, int __user *, policy,
if (nmask)
nm = compat_alloc_user_space(alloc_size);
- err = sys_get_mempolicy(policy, nm, nr_bits+1, addr, flags);
+ err = kernel_get_mempolicy(policy, nm, nr_bits+1, addr, flags);
if (!err && nmask) {
unsigned long copy_size;
@@ -1546,7 +1575,7 @@ COMPAT_SYSCALL_DEFINE3(set_mempolicy, int, mode, compat_ulong_t __user *, nmask,
return -EFAULT;
}
- return sys_set_mempolicy(mode, nm, nr_bits+1);
+ return kernel_set_mempolicy(mode, nm, nr_bits+1);
}
COMPAT_SYSCALL_DEFINE6(mbind, compat_ulong_t, start, compat_ulong_t, len,
@@ -1568,10 +1597,43 @@ COMPAT_SYSCALL_DEFINE6(mbind, compat_ulong_t, start, compat_ulong_t, len,
return -EFAULT;
}
- return sys_mbind(start, len, mode, nm, nr_bits+1, flags);
+ return kernel_mbind(start, len, mode, nm, nr_bits+1, flags);
}
-#endif
+COMPAT_SYSCALL_DEFINE4(migrate_pages, compat_pid_t, pid,
+ compat_ulong_t, maxnode,
+ const compat_ulong_t __user *, old_nodes,
+ const compat_ulong_t __user *, new_nodes)
+{
+ unsigned long __user *old = NULL;
+ unsigned long __user *new = NULL;
+ nodemask_t tmp_mask;
+ unsigned long nr_bits;
+ unsigned long size;
+
+ nr_bits = min_t(unsigned long, maxnode - 1, MAX_NUMNODES);
+ size = ALIGN(nr_bits, BITS_PER_LONG) / 8;
+ if (old_nodes) {
+ if (compat_get_bitmap(nodes_addr(tmp_mask), old_nodes, nr_bits))
+ return -EFAULT;
+ old = compat_alloc_user_space(new_nodes ? size * 2 : size);
+ if (new_nodes)
+ new = old + size / sizeof(unsigned long);
+ if (copy_to_user(old, nodes_addr(tmp_mask), size))
+ return -EFAULT;
+ }
+ if (new_nodes) {
+ if (compat_get_bitmap(nodes_addr(tmp_mask), new_nodes, nr_bits))
+ return -EFAULT;
+ if (new == NULL)
+ new = compat_alloc_user_space(size);
+ if (copy_to_user(new, nodes_addr(tmp_mask), size))
+ return -EFAULT;
+ }
+ return kernel_migrate_pages(pid, nr_bits + 1, old, new);
+}
+
+#endif /* CONFIG_COMPAT */
struct mempolicy *__get_vma_policy(struct vm_area_struct *vma,
unsigned long addr)
@@ -2124,6 +2186,9 @@ bool __mpol_equal(struct mempolicy *a, struct mempolicy *b)
case MPOL_INTERLEAVE:
return !!nodes_equal(a->v.nodes, b->v.nodes);
case MPOL_PREFERRED:
+ /* a's ->flags is the same as b's */
+ if (a->flags & MPOL_F_LOCAL)
+ return true;
return a->v.preferred_node == b->v.preferred_node;
default:
BUG();
diff --git a/mm/migrate.c b/mm/migrate.c
index 1e5525a25691..003886606a22 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -34,6 +34,7 @@
#include <linux/backing-dev.h>
#include <linux/compaction.h>
#include <linux/syscalls.h>
+#include <linux/compat.h>
#include <linux/hugetlb.h>
#include <linux/hugetlb_cgroup.h>
#include <linux/gfp.h>
@@ -1745,10 +1746,10 @@ static int do_pages_stat(struct mm_struct *mm, unsigned long nr_pages,
* Move a list of pages in the address space of the currently executing
* process.
*/
-SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
- const void __user * __user *, pages,
- const int __user *, nodes,
- int __user *, status, int, flags)
+static int kernel_move_pages(pid_t pid, unsigned long nr_pages,
+ const void __user * __user *pages,
+ const int __user *nodes,
+ int __user *status, int flags)
{
struct task_struct *task;
struct mm_struct *mm;
@@ -1807,6 +1808,36 @@ out:
return err;
}
+SYSCALL_DEFINE6(move_pages, pid_t, pid, unsigned long, nr_pages,
+ const void __user * __user *, pages,
+ const int __user *, nodes,
+ int __user *, status, int, flags)
+{
+ return kernel_move_pages(pid, nr_pages, pages, nodes, status, flags);
+}
+
+#ifdef CONFIG_COMPAT
+COMPAT_SYSCALL_DEFINE6(move_pages, pid_t, pid, compat_ulong_t, nr_pages,
+ compat_uptr_t __user *, pages32,
+ const int __user *, nodes,
+ int __user *, status,
+ int, flags)
+{
+ const void __user * __user *pages;
+ int i;
+
+ pages = compat_alloc_user_space(nr_pages * sizeof(void *));
+ for (i = 0; i < nr_pages; i++) {
+ compat_uptr_t p;
+
+ if (get_user(p, pages32 + i) ||
+ put_user(compat_ptr(p), pages + i))
+ return -EFAULT;
+ }
+ return kernel_move_pages(pid, nr_pages, pages, nodes, status, flags);
+}
+#endif /* CONFIG_COMPAT */
+
#ifdef CONFIG_NUMA_BALANCING
/*
* Returns true if this is a safe migration target node for misplaced NUMA
diff --git a/mm/mmap.c b/mm/mmap.c
index 9efdc021ad22..aa0dc8231c0d 100644
--- a/mm/mmap.c
+++ b/mm/mmap.c
@@ -1488,9 +1488,9 @@ unsigned long do_mmap(struct file *file, unsigned long addr,
return addr;
}
-SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
- unsigned long, prot, unsigned long, flags,
- unsigned long, fd, unsigned long, pgoff)
+unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
+ unsigned long prot, unsigned long flags,
+ unsigned long fd, unsigned long pgoff)
{
struct file *file = NULL;
unsigned long retval;
@@ -1537,6 +1537,13 @@ out_fput:
return retval;
}
+SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
+ unsigned long, prot, unsigned long, flags,
+ unsigned long, fd, unsigned long, pgoff)
+{
+ return ksys_mmap_pgoff(addr, len, prot, flags, fd, pgoff);
+}
+
#ifdef __ARCH_WANT_SYS_OLD_MMAP
struct mmap_arg_struct {
unsigned long addr;
@@ -1556,8 +1563,8 @@ SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
if (offset_in_page(a.offset))
return -EINVAL;
- return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
- a.offset >> PAGE_SHIFT);
+ return ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
+ a.offset >> PAGE_SHIFT);
}
#endif /* __ARCH_WANT_SYS_OLD_MMAP */
diff --git a/mm/nommu.c b/mm/nommu.c
index ebb6e618dade..4f8720243ae7 100644
--- a/mm/nommu.c
+++ b/mm/nommu.c
@@ -663,22 +663,6 @@ static void put_nommu_region(struct vm_region *region)
}
/*
- * update protection on a vma
- */
-static void protect_vma(struct vm_area_struct *vma, unsigned long flags)
-{
-#ifdef CONFIG_MPU
- struct mm_struct *mm = vma->vm_mm;
- long start = vma->vm_start & PAGE_MASK;
- while (start < vma->vm_end) {
- protect_page(mm, start, flags);
- start += PAGE_SIZE;
- }
- update_protections(mm);
-#endif
-}
-
-/*
* add a VMA into a process's mm_struct in the appropriate place in the list
* and tree and add to the address space's page tree also if not an anonymous
* page
@@ -695,8 +679,6 @@ static void add_vma_to_mm(struct mm_struct *mm, struct vm_area_struct *vma)
mm->map_count++;
vma->vm_mm = mm;
- protect_vma(vma, vma->vm_flags);
-
/* add the VMA to the mapping */
if (vma->vm_file) {
mapping = vma->vm_file->f_mapping;
@@ -757,8 +739,6 @@ static void delete_vma_from_mm(struct vm_area_struct *vma)
struct mm_struct *mm = vma->vm_mm;
struct task_struct *curr = current;
- protect_vma(vma, 0);
-
mm->map_count--;
for (i = 0; i < VMACACHE_SIZE; i++) {
/* if the vma is cached, invalidate the entire cache */
@@ -1423,9 +1403,9 @@ error_getting_region:
return -ENOMEM;
}
-SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
- unsigned long, prot, unsigned long, flags,
- unsigned long, fd, unsigned long, pgoff)
+unsigned long ksys_mmap_pgoff(unsigned long addr, unsigned long len,
+ unsigned long prot, unsigned long flags,
+ unsigned long fd, unsigned long pgoff)
{
struct file *file = NULL;
unsigned long retval = -EBADF;
@@ -1447,6 +1427,13 @@ out:
return retval;
}
+SYSCALL_DEFINE6(mmap_pgoff, unsigned long, addr, unsigned long, len,
+ unsigned long, prot, unsigned long, flags,
+ unsigned long, fd, unsigned long, pgoff)
+{
+ return ksys_mmap_pgoff(addr, len, prot, flags, fd, pgoff);
+}
+
#ifdef __ARCH_WANT_SYS_OLD_MMAP
struct mmap_arg_struct {
unsigned long addr;
@@ -1466,8 +1453,8 @@ SYSCALL_DEFINE1(old_mmap, struct mmap_arg_struct __user *, arg)
if (offset_in_page(a.offset))
return -EINVAL;
- return sys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
- a.offset >> PAGE_SHIFT);
+ return ksys_mmap_pgoff(a.addr, a.len, a.prot, a.flags, a.fd,
+ a.offset >> PAGE_SHIFT);
}
#endif /* __ARCH_WANT_SYS_OLD_MMAP */
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index cb416723538f..4ea018263210 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1910,7 +1910,9 @@ static int move_freepages(struct zone *zone,
* Remove at a later date when no bug reports exist related to
* grouping pages by mobility
*/
- VM_BUG_ON(page_zone(start_page) != page_zone(end_page));
+ VM_BUG_ON(pfn_valid(page_to_pfn(start_page)) &&
+ pfn_valid(page_to_pfn(end_page)) &&
+ page_zone(start_page) != page_zone(end_page));
#endif
if (num_movable)
@@ -3594,7 +3596,7 @@ static bool __need_fs_reclaim(gfp_t gfp_mask)
return false;
/* this guy won't enter reclaim */
- if ((current->flags & PF_MEMALLOC) && !(gfp_mask & __GFP_NOMEMALLOC))
+ if (current->flags & PF_MEMALLOC)
return false;
/* We're only interested __GFP_FS allocations for now */
@@ -5354,17 +5356,8 @@ void __meminit memmap_init_zone(unsigned long size, int nid, unsigned long zone,
if (context != MEMMAP_EARLY)
goto not_early;
- if (!early_pfn_valid(pfn)) {
-#ifdef CONFIG_HAVE_MEMBLOCK_NODE_MAP
- /*
- * Skip to the pfn preceding the next valid one (or
- * end_pfn), such that we hit a valid pfn (or end_pfn)
- * on our next iteration of the loop.
- */
- pfn = memblock_next_valid_pfn(pfn, end_pfn) - 1;
-#endif
+ if (!early_pfn_valid(pfn))
continue;
- }
if (!early_pfn_in_nid(pfn, nid))
continue;
if (!update_defer_init(pgdat, pfn, end_pfn, &nr_initialised))
@@ -6199,10 +6192,7 @@ static void __ref alloc_node_mem_map(struct pglist_data *pgdat)
end = pgdat_end_pfn(pgdat);
end = ALIGN(end, MAX_ORDER_NR_PAGES);
size = (end - start) * sizeof(struct page);
- map = alloc_remap(pgdat->node_id, size);
- if (!map)
- map = memblock_virt_alloc_node_nopanic(size,
- pgdat->node_id);
+ map = memblock_virt_alloc_node_nopanic(size, pgdat->node_id);
pgdat->node_mem_map = map + offset;
}
pr_debug("%s: node %d, pgdat %08lx, node_mem_map %08lx\n",
diff --git a/mm/page_owner.c b/mm/page_owner.c
index 9886c6073828..7172e0a80e13 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -123,13 +123,13 @@ void __reset_page_owner(struct page *page, unsigned int order)
static inline bool check_recursive_alloc(struct stack_trace *trace,
unsigned long ip)
{
- int i, count;
+ int i;
if (!trace->nr_entries)
return false;
- for (i = 0, count = 0; i < trace->nr_entries; i++) {
- if (trace->entries[i] == ip && ++count == 2)
+ for (i = 0; i < trace->nr_entries; i++) {
+ if (trace->entries[i] == ip)
return true;
}
diff --git a/mm/percpu-km.c b/mm/percpu-km.c
index d2a76642c4ae..38de70ab1a0d 100644
--- a/mm/percpu-km.c
+++ b/mm/percpu-km.c
@@ -34,7 +34,7 @@
#include <linux/log2.h>
static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
- int page_start, int page_end)
+ int page_start, int page_end, gfp_t gfp)
{
return 0;
}
@@ -45,18 +45,18 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
/* nada */
}
-static struct pcpu_chunk *pcpu_create_chunk(void)
+static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp)
{
const int nr_pages = pcpu_group_sizes[0] >> PAGE_SHIFT;
struct pcpu_chunk *chunk;
struct page *pages;
int i;
- chunk = pcpu_alloc_chunk();
+ chunk = pcpu_alloc_chunk(gfp);
if (!chunk)
return NULL;
- pages = alloc_pages(GFP_KERNEL, order_base_2(nr_pages));
+ pages = alloc_pages(gfp, order_base_2(nr_pages));
if (!pages) {
pcpu_free_chunk(chunk);
return NULL;
diff --git a/mm/percpu-vm.c b/mm/percpu-vm.c
index 9158e5a81391..d8078de912de 100644
--- a/mm/percpu-vm.c
+++ b/mm/percpu-vm.c
@@ -37,7 +37,7 @@ static struct page **pcpu_get_pages(void)
lockdep_assert_held(&pcpu_alloc_mutex);
if (!pages)
- pages = pcpu_mem_zalloc(pages_size);
+ pages = pcpu_mem_zalloc(pages_size, GFP_KERNEL);
return pages;
}
@@ -73,18 +73,21 @@ static void pcpu_free_pages(struct pcpu_chunk *chunk,
* @pages: array to put the allocated pages into, indexed by pcpu_page_idx()
* @page_start: page index of the first page to be allocated
* @page_end: page index of the last page to be allocated + 1
+ * @gfp: allocation flags passed to the underlying allocator
*
* Allocate pages [@page_start,@page_end) into @pages for all units.
* The allocation is for @chunk. Percpu core doesn't care about the
* content of @pages and will pass it verbatim to pcpu_map_pages().
*/
static int pcpu_alloc_pages(struct pcpu_chunk *chunk,
- struct page **pages, int page_start, int page_end)
+ struct page **pages, int page_start, int page_end,
+ gfp_t gfp)
{
- const gfp_t gfp = GFP_KERNEL | __GFP_HIGHMEM;
unsigned int cpu, tcpu;
int i;
+ gfp |= __GFP_HIGHMEM;
+
for_each_possible_cpu(cpu) {
for (i = page_start; i < page_end; i++) {
struct page **pagep = &pages[pcpu_page_idx(cpu, i)];
@@ -262,6 +265,7 @@ static void pcpu_post_map_flush(struct pcpu_chunk *chunk,
* @chunk: chunk of interest
* @page_start: the start page
* @page_end: the end page
+ * @gfp: allocation flags passed to the underlying memory allocator
*
* For each cpu, populate and map pages [@page_start,@page_end) into
* @chunk.
@@ -270,7 +274,7 @@ static void pcpu_post_map_flush(struct pcpu_chunk *chunk,
* pcpu_alloc_mutex, does GFP_KERNEL allocation.
*/
static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
- int page_start, int page_end)
+ int page_start, int page_end, gfp_t gfp)
{
struct page **pages;
@@ -278,7 +282,7 @@ static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
if (!pages)
return -ENOMEM;
- if (pcpu_alloc_pages(chunk, pages, page_start, page_end))
+ if (pcpu_alloc_pages(chunk, pages, page_start, page_end, gfp))
return -ENOMEM;
if (pcpu_map_pages(chunk, pages, page_start, page_end)) {
@@ -325,12 +329,12 @@ static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
pcpu_free_pages(chunk, pages, page_start, page_end);
}
-static struct pcpu_chunk *pcpu_create_chunk(void)
+static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp)
{
struct pcpu_chunk *chunk;
struct vm_struct **vms;
- chunk = pcpu_alloc_chunk();
+ chunk = pcpu_alloc_chunk(gfp);
if (!chunk)
return NULL;
diff --git a/mm/percpu.c b/mm/percpu.c
index 50e7fdf84055..0b6480979ac7 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -80,6 +80,7 @@
#include <linux/vmalloc.h>
#include <linux/workqueue.h>
#include <linux/kmemleak.h>
+#include <linux/sched.h>
#include <asm/cacheflush.h>
#include <asm/sections.h>
@@ -447,26 +448,25 @@ static void pcpu_next_fit_region(struct pcpu_chunk *chunk, int alloc_bits,
/**
* pcpu_mem_zalloc - allocate memory
* @size: bytes to allocate
+ * @gfp: allocation flags
*
* Allocate @size bytes. If @size is smaller than PAGE_SIZE,
- * kzalloc() is used; otherwise, vzalloc() is used. The returned
- * memory is always zeroed.
- *
- * CONTEXT:
- * Does GFP_KERNEL allocation.
+ * kzalloc() is used; otherwise, the equivalent of vzalloc() is used.
+ * This is to facilitate passing through whitelisted flags. The
+ * returned memory is always zeroed.
*
* RETURNS:
* Pointer to the allocated area on success, NULL on failure.
*/
-static void *pcpu_mem_zalloc(size_t size)
+static void *pcpu_mem_zalloc(size_t size, gfp_t gfp)
{
if (WARN_ON_ONCE(!slab_is_available()))
return NULL;
if (size <= PAGE_SIZE)
- return kzalloc(size, GFP_KERNEL);
+ return kzalloc(size, gfp);
else
- return vzalloc(size);
+ return __vmalloc(size, gfp | __GFP_ZERO, PAGE_KERNEL);
}
/**
@@ -1154,12 +1154,12 @@ static struct pcpu_chunk * __init pcpu_alloc_first_chunk(unsigned long tmp_addr,
return chunk;
}
-static struct pcpu_chunk *pcpu_alloc_chunk(void)
+static struct pcpu_chunk *pcpu_alloc_chunk(gfp_t gfp)
{
struct pcpu_chunk *chunk;
int region_bits;
- chunk = pcpu_mem_zalloc(pcpu_chunk_struct_size);
+ chunk = pcpu_mem_zalloc(pcpu_chunk_struct_size, gfp);
if (!chunk)
return NULL;
@@ -1168,17 +1168,17 @@ static struct pcpu_chunk *pcpu_alloc_chunk(void)
region_bits = pcpu_chunk_map_bits(chunk);
chunk->alloc_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits) *
- sizeof(chunk->alloc_map[0]));
+ sizeof(chunk->alloc_map[0]), gfp);
if (!chunk->alloc_map)
goto alloc_map_fail;
chunk->bound_map = pcpu_mem_zalloc(BITS_TO_LONGS(region_bits + 1) *
- sizeof(chunk->bound_map[0]));
+ sizeof(chunk->bound_map[0]), gfp);
if (!chunk->bound_map)
goto bound_map_fail;
chunk->md_blocks = pcpu_mem_zalloc(pcpu_chunk_nr_blocks(chunk) *
- sizeof(chunk->md_blocks[0]));
+ sizeof(chunk->md_blocks[0]), gfp);
if (!chunk->md_blocks)
goto md_blocks_fail;
@@ -1277,9 +1277,11 @@ static void pcpu_chunk_depopulated(struct pcpu_chunk *chunk,
* pcpu_addr_to_page - translate address to physical address
* pcpu_verify_alloc_info - check alloc_info is acceptable during init
*/
-static int pcpu_populate_chunk(struct pcpu_chunk *chunk, int off, int size);
-static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk, int off, int size);
-static struct pcpu_chunk *pcpu_create_chunk(void);
+static int pcpu_populate_chunk(struct pcpu_chunk *chunk,
+ int page_start, int page_end, gfp_t gfp);
+static void pcpu_depopulate_chunk(struct pcpu_chunk *chunk,
+ int page_start, int page_end);
+static struct pcpu_chunk *pcpu_create_chunk(gfp_t gfp);
static void pcpu_destroy_chunk(struct pcpu_chunk *chunk);
static struct page *pcpu_addr_to_page(void *addr);
static int __init pcpu_verify_alloc_info(const struct pcpu_alloc_info *ai);
@@ -1339,6 +1341,8 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
gfp_t gfp)
{
+ /* whitelisted flags that can be passed to the backing allocators */
+ gfp_t pcpu_gfp = gfp & (GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN);
bool is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL;
bool do_warn = !(gfp & __GFP_NOWARN);
static int warn_limit = 10;
@@ -1369,8 +1373,17 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
return NULL;
}
- if (!is_atomic)
- mutex_lock(&pcpu_alloc_mutex);
+ if (!is_atomic) {
+ /*
+ * pcpu_balance_workfn() allocates memory under this mutex,
+ * and it may wait for memory reclaim. Allow current task
+ * to become OOM victim, in case of memory pressure.
+ */
+ if (gfp & __GFP_NOFAIL)
+ mutex_lock(&pcpu_alloc_mutex);
+ else if (mutex_lock_killable(&pcpu_alloc_mutex))
+ return NULL;
+ }
spin_lock_irqsave(&pcpu_lock, flags);
@@ -1421,7 +1434,7 @@ restart:
}
if (list_empty(&pcpu_slot[pcpu_nr_slots - 1])) {
- chunk = pcpu_create_chunk();
+ chunk = pcpu_create_chunk(pcpu_gfp);
if (!chunk) {
err = "failed to allocate new chunk";
goto fail;
@@ -1450,7 +1463,7 @@ area_found:
page_start, page_end) {
WARN_ON(chunk->immutable);
- ret = pcpu_populate_chunk(chunk, rs, re);
+ ret = pcpu_populate_chunk(chunk, rs, re, pcpu_gfp);
spin_lock_irqsave(&pcpu_lock, flags);
if (ret) {
@@ -1561,10 +1574,17 @@ void __percpu *__alloc_reserved_percpu(size_t size, size_t align)
* pcpu_balance_workfn - manage the amount of free chunks and populated pages
* @work: unused
*
- * Reclaim all fully free chunks except for the first one.
+ * Reclaim all fully free chunks except for the first one. This is also
+ * responsible for maintaining the pool of empty populated pages. However,
+ * it is possible that this is called when physical memory is scarce causing
+ * OOM killer to be triggered. We should avoid doing so until an actual
+ * allocation causes the failure as it is possible that requests can be
+ * serviced from already backed regions.
*/
static void pcpu_balance_workfn(struct work_struct *work)
{
+ /* gfp flags passed to underlying allocators */
+ const gfp_t gfp = GFP_KERNEL | __GFP_NORETRY | __GFP_NOWARN;
LIST_HEAD(to_free);
struct list_head *free_head = &pcpu_slot[pcpu_nr_slots - 1];
struct pcpu_chunk *chunk, *next;
@@ -1600,6 +1620,7 @@ static void pcpu_balance_workfn(struct work_struct *work)
spin_unlock_irq(&pcpu_lock);
}
pcpu_destroy_chunk(chunk);
+ cond_resched();
}
/*
@@ -1645,7 +1666,7 @@ retry_pop:
chunk->nr_pages) {
int nr = min(re - rs, nr_to_pop);
- ret = pcpu_populate_chunk(chunk, rs, rs + nr);
+ ret = pcpu_populate_chunk(chunk, rs, rs + nr, gfp);
if (!ret) {
nr_to_pop -= nr;
spin_lock_irq(&pcpu_lock);
@@ -1662,7 +1683,7 @@ retry_pop:
if (nr_to_pop) {
/* ran out of chunks to populate, create a new one and retry */
- chunk = pcpu_create_chunk();
+ chunk = pcpu_create_chunk(gfp);
if (chunk) {
spin_lock_irq(&pcpu_lock);
pcpu_chunk_relocate(chunk, -1);
@@ -2719,11 +2740,7 @@ void __init setup_per_cpu_areas(void)
if (pcpu_setup_first_chunk(ai, fc) < 0)
panic("Failed to initialize percpu areas.");
-#ifdef CONFIG_CRIS
-#warning "the CRIS architecture has physical and virtual addresses confused"
-#else
pcpu_free_alloc_info(ai);
-#endif
}
#endif /* CONFIG_SMP */
diff --git a/mm/readahead.c b/mm/readahead.c
index c4ca70239233..4d57b4644f98 100644
--- a/mm/readahead.c
+++ b/mm/readahead.c
@@ -573,7 +573,7 @@ do_readahead(struct address_space *mapping, struct file *filp,
return force_page_cache_readahead(mapping, filp, index, nr);
}
-SYSCALL_DEFINE3(readahead, int, fd, loff_t, offset, size_t, count)
+ssize_t ksys_readahead(int fd, loff_t offset, size_t count)
{
ssize_t ret;
struct fd f;
@@ -592,3 +592,8 @@ SYSCALL_DEFINE3(readahead, int, fd, loff_t, offset, size_t, count)
}
return ret;
}
+
+SYSCALL_DEFINE3(readahead, int, fd, loff_t, offset, size_t, count)
+{
+ return ksys_readahead(fd, offset, count);
+}
diff --git a/mm/shmem.c b/mm/shmem.c
index 1907688b75ee..b85919243399 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -493,36 +493,45 @@ next:
info = list_entry(pos, struct shmem_inode_info, shrinklist);
inode = &info->vfs_inode;
- if (nr_to_split && split >= nr_to_split) {
- iput(inode);
- continue;
- }
+ if (nr_to_split && split >= nr_to_split)
+ goto leave;
- page = find_lock_page(inode->i_mapping,
+ page = find_get_page(inode->i_mapping,
(inode->i_size & HPAGE_PMD_MASK) >> PAGE_SHIFT);
if (!page)
goto drop;
+ /* No huge page at the end of the file: nothing to split */
if (!PageTransHuge(page)) {
- unlock_page(page);
put_page(page);
goto drop;
}
+ /*
+ * Leave the inode on the list if we failed to lock
+ * the page at this time.
+ *
+ * Waiting for the lock may lead to deadlock in the
+ * reclaim path.
+ */
+ if (!trylock_page(page)) {
+ put_page(page);
+ goto leave;
+ }
+
ret = split_huge_page(page);
unlock_page(page);
put_page(page);
- if (ret) {
- /* split failed: leave it on the list */
- iput(inode);
- continue;
- }
+ /* If split failed leave the inode on the list */
+ if (ret)
+ goto leave;
split++;
drop:
list_del_init(&info->shrinklist);
removed++;
+leave:
iput(inode);
}
diff --git a/mm/slab.c b/mm/slab.c
index 324446621b3e..9095c3945425 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1283,6 +1283,7 @@ void __init kmem_cache_init(void)
nr_node_ids * sizeof(struct kmem_cache_node *),
SLAB_HWCACHE_ALIGN, 0, 0);
list_add(&kmem_cache->list, &slab_caches);
+ memcg_link_cache(kmem_cache);
slab_state = PARTIAL;
/*
diff --git a/mm/sparse.c b/mm/sparse.c
index 7af5e7a92528..58cab483e81b 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -236,28 +236,6 @@ void __init memory_present(int nid, unsigned long start, unsigned long end)
}
/*
- * Only used by the i386 NUMA architecures, but relatively
- * generic code.
- */
-unsigned long __init node_memmap_size_bytes(int nid, unsigned long start_pfn,
- unsigned long end_pfn)
-{
- unsigned long pfn;
- unsigned long nr_pages = 0;
-
- mminit_validate_memmodel_limits(&start_pfn, &end_pfn);
- for (pfn = start_pfn; pfn < end_pfn; pfn += PAGES_PER_SECTION) {
- if (nid != early_pfn_to_nid(pfn))
- continue;
-
- if (pfn_present(pfn))
- nr_pages += PAGES_PER_SECTION;
- }
-
- return nr_pages * sizeof(struct page);
-}
-
-/*
* Subtle, we encode the real pfn into the mem_map such that
* the identity pfn - section_mem_map will return the actual
* physical page frame number.
@@ -427,10 +405,6 @@ struct page __init *sparse_mem_map_populate(unsigned long pnum, int nid,
struct page *map;
unsigned long size;
- map = alloc_remap(nid, sizeof(struct page) * PAGES_PER_SECTION);
- if (map)
- return map;
-
size = PAGE_ALIGN(sizeof(struct page) * PAGES_PER_SECTION);
map = memblock_virt_alloc_try_nid(size,
PAGE_SIZE, __pa(MAX_DMA_ADDRESS),
@@ -446,17 +420,6 @@ void __init sparse_mem_maps_populate_node(struct page **map_map,
unsigned long pnum;
unsigned long size = sizeof(struct page) * PAGES_PER_SECTION;
- map = alloc_remap(nodeid, size * map_count);
- if (map) {
- for (pnum = pnum_begin; pnum < pnum_end; pnum++) {
- if (!present_section_nr(pnum))
- continue;
- map_map[pnum] = map;
- map += size;
- }
- return;
- }
-
size = PAGE_ALIGN(size);
map = memblock_virt_alloc_try_nid_raw(size * map_count,
PAGE_SIZE, __pa(MAX_DMA_ADDRESS),
diff --git a/mm/vmscan.c b/mm/vmscan.c
index bee53495a829..cd5dc3faaa57 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1780,6 +1780,20 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
set_bit(PGDAT_WRITEBACK, &pgdat->flags);
/*
+ * If dirty pages are scanned that are not queued for IO, it
+ * implies that flushers are not doing their job. This can
+ * happen when memory pressure pushes dirty pages to the end of
+ * the LRU before the dirty limits are breached and the dirty
+ * data has expired. It can also happen when the proportion of
+ * dirty pages grows not through writes but through memory
+ * pressure reclaiming all the clean cache. And in some cases,
+ * the flushers simply cannot keep up with the allocation
+ * rate. Nudge the flusher threads in case they are asleep.
+ */
+ if (stat.nr_unqueued_dirty == nr_taken)
+ wakeup_flusher_threads(WB_REASON_VMSCAN);
+
+ /*
* Legacy memcg will stall in page writeback so avoid forcibly
* stalling here.
*/
@@ -1791,22 +1805,9 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
if (stat.nr_dirty && stat.nr_dirty == stat.nr_congested)
set_bit(PGDAT_CONGESTED, &pgdat->flags);
- /*
- * If dirty pages are scanned that are not queued for IO, it
- * implies that flushers are not doing their job. This can
- * happen when memory pressure pushes dirty pages to the end of
- * the LRU before the dirty limits are breached and the dirty
- * data has expired. It can also happen when the proportion of
- * dirty pages grows not through writes but through memory
- * pressure reclaiming all the clean cache. And in some cases,
- * the flushers simply cannot keep up with the allocation
- * rate. Nudge the flusher threads in case they are asleep, but
- * also allow kswapd to start writing pages during reclaim.
- */
- if (stat.nr_unqueued_dirty == nr_taken) {
- wakeup_flusher_threads(WB_REASON_VMSCAN);
+ /* Allow kswapd to start writing pages during reclaim. */
+ if (stat.nr_unqueued_dirty == nr_taken)
set_bit(PGDAT_DIRTY, &pgdat->flags);
- }
/*
* If kswapd scans pages marked marked for immediate
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 40b2db6db6b1..33581be705f0 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -1839,9 +1839,11 @@ static void vmstat_update(struct work_struct *w)
* to occur in the future. Keep on running the
* update worker thread.
*/
+ preempt_disable();
queue_delayed_work_on(smp_processor_id(), mm_percpu_wq,
this_cpu_ptr(&vmstat_work),
round_jiffies_relative(sysctl_stat_interval));
+ preempt_enable();
}
}
diff --git a/mm/zsmalloc.c b/mm/zsmalloc.c
index c3013505c305..b7f61cd1c709 100644
--- a/mm/zsmalloc.c
+++ b/mm/zsmalloc.c
@@ -84,18 +84,19 @@
* This is made more complicated by various memory models and PAE.
*/
-#ifndef MAX_PHYSMEM_BITS
-#ifdef CONFIG_HIGHMEM64G
-#define MAX_PHYSMEM_BITS 36
-#else /* !CONFIG_HIGHMEM64G */
+#ifndef MAX_POSSIBLE_PHYSMEM_BITS
+#ifdef MAX_PHYSMEM_BITS
+#define MAX_POSSIBLE_PHYSMEM_BITS MAX_PHYSMEM_BITS
+#else
/*
* If this definition of MAX_PHYSMEM_BITS is used, OBJ_INDEX_BITS will just
* be PAGE_SHIFT
*/
-#define MAX_PHYSMEM_BITS BITS_PER_LONG
+#define MAX_POSSIBLE_PHYSMEM_BITS BITS_PER_LONG
#endif
#endif
-#define _PFN_BITS (MAX_PHYSMEM_BITS - PAGE_SHIFT)
+
+#define _PFN_BITS (MAX_POSSIBLE_PHYSMEM_BITS - PAGE_SHIFT)
/*
* Memory for allocating for handle keeps object position by