summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/Makefile1
-rw-r--r--mm/backing-dev.c20
-rw-r--r--mm/bootmem.c1
-rw-r--r--mm/cma.c2
-rw-r--r--mm/cma.h1
-rw-r--r--mm/cma_debug.c1
-rw-r--r--mm/compaction.c14
-rw-r--r--mm/debug.c1
-rw-r--r--mm/debug_page_ref.c1
-rw-r--r--mm/early_ioremap.c1
-rw-r--r--mm/fadvise.c1
-rw-r--r--mm/failslab.c1
-rw-r--r--mm/filemap.c8
-rw-r--r--mm/frame_vector.c1
-rw-r--r--mm/gup.c97
-rw-r--r--mm/highmem.c1
-rw-r--r--mm/huge_memory.c13
-rw-r--r--mm/hugetlb.c32
-rw-r--r--mm/init-mm.c1
-rw-r--r--mm/kasan/Makefile1
-rw-r--r--mm/kasan/kasan.h1
-rw-r--r--mm/khugepaged.c1
-rw-r--r--mm/kmemcheck.c1
-rw-r--r--mm/ksm.c5
-rw-r--r--mm/list_lru.c12
-rw-r--r--mm/madvise.c20
-rw-r--r--mm/memcontrol.c38
-rw-r--r--mm/memory.c8
-rw-r--r--mm/memory_hotplug.c7
-rw-r--r--mm/mempolicy.c7
-rw-r--r--mm/mempool.c1
-rw-r--r--mm/memtest.c1
-rw-r--r--mm/migrate.c4
-rw-r--r--mm/mincore.c1
-rw-r--r--mm/mlock.c1
-rw-r--r--mm/mmzone.c1
-rw-r--r--mm/mprotect.c1
-rw-r--r--mm/mremap.c1
-rw-r--r--mm/msync.c1
-rw-r--r--mm/nobootmem.c1
-rw-r--r--mm/oom_kill.c16
-rw-r--r--mm/page-writeback.c36
-rw-r--r--mm/page_alloc.c3
-rw-r--r--mm/page_counter.c1
-rw-r--r--mm/page_ext.c1
-rw-r--r--mm/page_idle.c1
-rw-r--r--mm/page_io.c3
-rw-r--r--mm/page_isolation.c1
-rw-r--r--mm/page_owner.c1
-rw-r--r--mm/page_poison.c1
-rw-r--r--mm/page_vma_mapped.c29
-rw-r--r--mm/pagewalk.c7
-rw-r--r--mm/percpu-internal.h1
-rw-r--r--mm/percpu.c15
-rw-r--r--mm/pgtable-generic.c1
-rw-r--r--mm/quicklist.c1
-rw-r--r--mm/rodata_test.c2
-rw-r--r--mm/slab.c1
-rw-r--r--mm/slab.h3
-rw-r--r--mm/slab_common.c23
-rw-r--r--mm/slob.c1
-rw-r--r--mm/slub.c1
-rw-r--r--mm/sparse-vmemmap.c1
-rw-r--r--mm/sparse.c28
-rw-r--r--mm/swap.c4
-rw-r--r--mm/swap_cgroup.c1
-rw-r--r--mm/swap_slots.c1
-rw-r--r--mm/swap_state.c53
-rw-r--r--mm/swapfile.c23
-rw-r--r--mm/vmacache.c1
-rw-r--r--mm/vmalloc.c6
-rw-r--r--mm/vmscan.c3
-rw-r--r--mm/workingset.c1
-rw-r--r--mm/z3fold.c10
74 files changed, 365 insertions, 228 deletions
diff --git a/mm/Makefile b/mm/Makefile
index e3ac3aeb533b..4659b93cba43 100644
--- a/mm/Makefile
+++ b/mm/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
#
# Makefile for the linux memory manager.
#
diff --git a/mm/backing-dev.c b/mm/backing-dev.c
index e19606bb41a0..74b52dfd5852 100644
--- a/mm/backing-dev.c
+++ b/mm/backing-dev.c
@@ -1072,23 +1072,3 @@ out:
return ret;
}
EXPORT_SYMBOL(wait_iff_congested);
-
-int pdflush_proc_obsolete(struct ctl_table *table, int write,
- void __user *buffer, size_t *lenp, loff_t *ppos)
-{
- char kbuf[] = "0\n";
-
- if (*ppos || *lenp < sizeof(kbuf)) {
- *lenp = 0;
- return 0;
- }
-
- if (copy_to_user(buffer, kbuf, sizeof(kbuf)))
- return -EFAULT;
- pr_warn_once("%s exported in /proc is scheduled for removal\n",
- table->procname);
-
- *lenp = 2;
- *ppos += *lenp;
- return 2;
-}
diff --git a/mm/bootmem.c b/mm/bootmem.c
index 9fedb27c6451..6aef64254203 100644
--- a/mm/bootmem.c
+++ b/mm/bootmem.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* bootmem - A boot-time physical memory allocator and configurator
*
diff --git a/mm/cma.c b/mm/cma.c
index c0da318c020e..022e52bd8370 100644
--- a/mm/cma.c
+++ b/mm/cma.c
@@ -460,7 +460,7 @@ struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align,
trace_cma_alloc(pfn, page, count, align);
- if (ret) {
+ if (ret && !(gfp_mask & __GFP_NOWARN)) {
pr_info("%s: alloc failed, req-size: %zu pages, ret: %d\n",
__func__, count, ret);
cma_debug_show_areas(cma);
diff --git a/mm/cma.h b/mm/cma.h
index 49861286279d..33c0b517733c 100644
--- a/mm/cma.h
+++ b/mm/cma.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __MM_CMA_H__
#define __MM_CMA_H__
diff --git a/mm/cma_debug.c b/mm/cma_debug.c
index c03ccbc405a0..275df8b5b22e 100644
--- a/mm/cma_debug.c
+++ b/mm/cma_debug.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* CMA DebugFS Interface
*
diff --git a/mm/compaction.c b/mm/compaction.c
index fb548e4c7bd4..85395dc6eb13 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* linux/mm/compaction.c
*
@@ -1999,17 +2000,14 @@ void wakeup_kcompactd(pg_data_t *pgdat, int order, int classzone_idx)
if (pgdat->kcompactd_max_order < order)
pgdat->kcompactd_max_order = order;
- /*
- * Pairs with implicit barrier in wait_event_freezable()
- * such that wakeups are not missed in the lockless
- * waitqueue_active() call.
- */
- smp_acquire__after_ctrl_dep();
-
if (pgdat->kcompactd_classzone_idx > classzone_idx)
pgdat->kcompactd_classzone_idx = classzone_idx;
- if (!waitqueue_active(&pgdat->kcompactd_wait))
+ /*
+ * Pairs with implicit barrier in wait_event_freezable()
+ * such that wakeups are not missed.
+ */
+ if (!wq_has_sleeper(&pgdat->kcompactd_wait))
return;
if (!kcompactd_node_suitable(pgdat))
diff --git a/mm/debug.c b/mm/debug.c
index 5715448ab0b5..6726bec731c9 100644
--- a/mm/debug.c
+++ b/mm/debug.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* mm/debug.c
*
diff --git a/mm/debug_page_ref.c b/mm/debug_page_ref.c
index 1aef3d562e52..f3b2c9d3ece2 100644
--- a/mm/debug_page_ref.c
+++ b/mm/debug_page_ref.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/mm_types.h>
#include <linux/tracepoint.h>
diff --git a/mm/early_ioremap.c b/mm/early_ioremap.c
index b1dd4a948fc0..d04ac1ec0559 100644
--- a/mm/early_ioremap.c
+++ b/mm/early_ioremap.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Provide common bits of early_ioremap() support for architectures needing
* temporary mappings during boot before ioremap() is available.
diff --git a/mm/fadvise.c b/mm/fadvise.c
index 702f239cd6db..ec70d6e4b86d 100644
--- a/mm/fadvise.c
+++ b/mm/fadvise.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* mm/fadvise.c
*
diff --git a/mm/failslab.c b/mm/failslab.c
index b0fac98cd938..8087d976a809 100644
--- a/mm/failslab.c
+++ b/mm/failslab.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/fault-inject.h>
#include <linux/slab.h>
#include <linux/mm.h>
diff --git a/mm/filemap.c b/mm/filemap.c
index db250d0e0565..594d73fef8b4 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -620,6 +620,14 @@ int file_check_and_advance_wb_err(struct file *file)
trace_file_check_and_advance_wb_err(file, old);
spin_unlock(&file->f_lock);
}
+
+ /*
+ * We're mostly using this function as a drop in replacement for
+ * filemap_check_errors. Clear AS_EIO/AS_ENOSPC to emulate the effect
+ * that the legacy code would have had on these flags.
+ */
+ clear_bit(AS_EIO, &mapping->flags);
+ clear_bit(AS_ENOSPC, &mapping->flags);
return err;
}
EXPORT_SYMBOL(file_check_and_advance_wb_err);
diff --git a/mm/frame_vector.c b/mm/frame_vector.c
index 72ebec18629c..2f98df0d460e 100644
--- a/mm/frame_vector.c
+++ b/mm/frame_vector.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/kernel.h>
#include <linux/errno.h>
#include <linux/err.h>
diff --git a/mm/gup.c b/mm/gup.c
index b2b4d4263768..dfcde13f289a 100644
--- a/mm/gup.c
+++ b/mm/gup.c
@@ -1643,6 +1643,47 @@ static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
return 1;
}
+static void gup_pgd_range(unsigned long addr, unsigned long end,
+ int write, struct page **pages, int *nr)
+{
+ unsigned long next;
+ pgd_t *pgdp;
+
+ pgdp = pgd_offset(current->mm, addr);
+ do {
+ pgd_t pgd = READ_ONCE(*pgdp);
+
+ next = pgd_addr_end(addr, end);
+ if (pgd_none(pgd))
+ return;
+ if (unlikely(pgd_huge(pgd))) {
+ if (!gup_huge_pgd(pgd, pgdp, addr, next, write,
+ pages, nr))
+ return;
+ } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
+ if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
+ PGDIR_SHIFT, next, write, pages, nr))
+ return;
+ } else if (!gup_p4d_range(pgd, addr, next, write, pages, nr))
+ return;
+ } while (pgdp++, addr = next, addr != end);
+}
+
+#ifndef gup_fast_permitted
+/*
+ * Check if it's allowed to use __get_user_pages_fast() for the range, or
+ * we need to fall back to the slow version:
+ */
+bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
+{
+ unsigned long len, end;
+
+ len = (unsigned long) nr_pages << PAGE_SHIFT;
+ end = start + len;
+ return end >= start;
+}
+#endif
+
/*
* Like get_user_pages_fast() except it's IRQ-safe in that it won't fall back to
* the regular GUP. It will only return non-negative values.
@@ -1650,10 +1691,8 @@ static int gup_p4d_range(pgd_t pgd, unsigned long addr, unsigned long end,
int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
struct page **pages)
{
- struct mm_struct *mm = current->mm;
unsigned long addr, len, end;
- unsigned long next, flags;
- pgd_t *pgdp;
+ unsigned long flags;
int nr = 0;
start &= PAGE_MASK;
@@ -1677,45 +1716,15 @@ int __get_user_pages_fast(unsigned long start, int nr_pages, int write,
* block IPIs that come from THPs splitting.
*/
- local_irq_save(flags);
- pgdp = pgd_offset(mm, addr);
- do {
- pgd_t pgd = READ_ONCE(*pgdp);
-
- next = pgd_addr_end(addr, end);
- if (pgd_none(pgd))
- break;
- if (unlikely(pgd_huge(pgd))) {
- if (!gup_huge_pgd(pgd, pgdp, addr, next, write,
- pages, &nr))
- break;
- } else if (unlikely(is_hugepd(__hugepd(pgd_val(pgd))))) {
- if (!gup_huge_pd(__hugepd(pgd_val(pgd)), addr,
- PGDIR_SHIFT, next, write, pages, &nr))
- break;
- } else if (!gup_p4d_range(pgd, addr, next, write, pages, &nr))
- break;
- } while (pgdp++, addr = next, addr != end);
- local_irq_restore(flags);
+ if (gup_fast_permitted(start, nr_pages, write)) {
+ local_irq_save(flags);
+ gup_pgd_range(addr, end, write, pages, &nr);
+ local_irq_restore(flags);
+ }
return nr;
}
-#ifndef gup_fast_permitted
-/*
- * Check if it's allowed to use __get_user_pages_fast() for the range, or
- * we need to fall back to the slow version:
- */
-bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
-{
- unsigned long len, end;
-
- len = (unsigned long) nr_pages << PAGE_SHIFT;
- end = start + len;
- return end >= start;
-}
-#endif
-
/**
* get_user_pages_fast() - pin user pages in memory
* @start: starting user address
@@ -1735,12 +1744,22 @@ bool gup_fast_permitted(unsigned long start, int nr_pages, int write)
int get_user_pages_fast(unsigned long start, int nr_pages, int write,
struct page **pages)
{
+ unsigned long addr, len, end;
int nr = 0, ret = 0;
start &= PAGE_MASK;
+ addr = start;
+ len = (unsigned long) nr_pages << PAGE_SHIFT;
+ end = start + len;
+
+ if (unlikely(!access_ok(write ? VERIFY_WRITE : VERIFY_READ,
+ (void __user *)start, len)))
+ return 0;
if (gup_fast_permitted(start, nr_pages, write)) {
- nr = __get_user_pages_fast(start, nr_pages, write, pages);
+ local_irq_disable();
+ gup_pgd_range(addr, end, write, pages, &nr);
+ local_irq_enable();
ret = nr;
}
diff --git a/mm/highmem.c b/mm/highmem.c
index 50b4ca6787f0..59db3223a5d6 100644
--- a/mm/highmem.c
+++ b/mm/highmem.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* High memory handling common code and variables.
*
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 269b5df58543..003f7bcd0952 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -39,10 +39,10 @@
#include "internal.h"
/*
- * By default transparent hugepage support is disabled in order that avoid
- * to risk increase the memory footprint of applications without a guaranteed
- * benefit. When transparent hugepage support is enabled, is for all mappings,
- * and khugepaged scans all mappings.
+ * By default, transparent hugepage support is disabled in order to avoid
+ * risking an increased memory footprint for applications that are not
+ * guaranteed to benefit from it. When transparent hugepage support is
+ * enabled, it is for all mappings, and khugepaged scans all mappings.
* Defrag is invoked by khugepaged hugepage allocations and by page faults
* for all hugepage allocations.
*/
@@ -941,6 +941,9 @@ int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
pmd = pmd_swp_mksoft_dirty(pmd);
set_pmd_at(src_mm, addr, src_pmd, pmd);
}
+ add_mm_counter(dst_mm, MM_ANONPAGES, HPAGE_PMD_NR);
+ atomic_long_inc(&dst_mm->nr_ptes);
+ pgtable_trans_huge_deposit(dst_mm, dst_pmd, pgtable);
set_pmd_at(dst_mm, addr, dst_pmd, pmd);
ret = 0;
goto out_unlock;
@@ -2715,7 +2718,7 @@ static unsigned long deferred_split_count(struct shrinker *shrink,
struct shrink_control *sc)
{
struct pglist_data *pgdata = NODE_DATA(sc->nid);
- return ACCESS_ONCE(pgdata->split_queue_len);
+ return READ_ONCE(pgdata->split_queue_len);
}
static unsigned long deferred_split_scan(struct shrinker *shrink,
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index 424b0ef08a60..2d2ff5e8bf2b 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3984,6 +3984,9 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
unsigned long src_addr,
struct page **pagep)
{
+ struct address_space *mapping;
+ pgoff_t idx;
+ unsigned long size;
int vm_shared = dst_vma->vm_flags & VM_SHARED;
struct hstate *h = hstate_vma(dst_vma);
pte_t _dst_pte;
@@ -4021,13 +4024,24 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
__SetPageUptodate(page);
set_page_huge_active(page);
+ mapping = dst_vma->vm_file->f_mapping;
+ idx = vma_hugecache_offset(h, dst_vma, dst_addr);
+
/*
* If shared, add to page cache
*/
if (vm_shared) {
- struct address_space *mapping = dst_vma->vm_file->f_mapping;
- pgoff_t idx = vma_hugecache_offset(h, dst_vma, dst_addr);
+ size = i_size_read(mapping->host) >> huge_page_shift(h);
+ ret = -EFAULT;
+ if (idx >= size)
+ goto out_release_nounlock;
+ /*
+ * Serialization between remove_inode_hugepages() and
+ * huge_add_to_page_cache() below happens through the
+ * hugetlb_fault_mutex_table that here must be hold by
+ * the caller.
+ */
ret = huge_add_to_page_cache(page, mapping, idx);
if (ret)
goto out_release_nounlock;
@@ -4036,6 +4050,20 @@ int hugetlb_mcopy_atomic_pte(struct mm_struct *dst_mm,
ptl = huge_pte_lockptr(h, dst_mm, dst_pte);
spin_lock(ptl);
+ /*
+ * Recheck the i_size after holding PT lock to make sure not
+ * to leave any page mapped (as page_mapped()) beyond the end
+ * of the i_size (remove_inode_hugepages() is strict about
+ * enforcing that). If we bail out here, we'll also leave a
+ * page in the radix tree in the vm_shared case beyond the end
+ * of the i_size, but remove_inode_hugepages() will take care
+ * of it as soon as we drop the hugetlb_fault_mutex_table.
+ */
+ size = i_size_read(mapping->host) >> huge_page_shift(h);
+ ret = -EFAULT;
+ if (idx >= size)
+ goto out_release_unlock;
+
ret = -EEXIST;
if (!huge_pte_none(huge_ptep_get(dst_pte)))
goto out_release_unlock;
diff --git a/mm/init-mm.c b/mm/init-mm.c
index 975e49f00f34..f94d5d15ebc0 100644
--- a/mm/init-mm.c
+++ b/mm/init-mm.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/mm_types.h>
#include <linux/rbtree.h>
#include <linux/rwsem.h>
diff --git a/mm/kasan/Makefile b/mm/kasan/Makefile
index 2976a9ee104f..3289db38bc87 100644
--- a/mm/kasan/Makefile
+++ b/mm/kasan/Makefile
@@ -1,3 +1,4 @@
+# SPDX-License-Identifier: GPL-2.0
KASAN_SANITIZE := n
UBSAN_SANITIZE_kasan.o := n
KCOV_INSTRUMENT := n
diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h
index 1229298cce64..c70851a9a6a4 100644
--- a/mm/kasan/kasan.h
+++ b/mm/kasan/kasan.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef __MM_KASAN_KASAN_H
#define __MM_KASAN_KASAN_H
diff --git a/mm/khugepaged.c b/mm/khugepaged.c
index c01f177a1120..43cb3043311b 100644
--- a/mm/khugepaged.c
+++ b/mm/khugepaged.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
#include <linux/mm.h>
diff --git a/mm/kmemcheck.c b/mm/kmemcheck.c
index 2d5959c5f7c5..800d64b854ea 100644
--- a/mm/kmemcheck.c
+++ b/mm/kmemcheck.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/gfp.h>
#include <linux/mm_types.h>
#include <linux/mm.h>
diff --git a/mm/ksm.c b/mm/ksm.c
index 15dd7415f7b3..6cb60f46cce5 100644
--- a/mm/ksm.c
+++ b/mm/ksm.c
@@ -1990,6 +1990,7 @@ static void stable_tree_append(struct rmap_item *rmap_item,
*/
static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
{
+ struct mm_struct *mm = rmap_item->mm;
struct rmap_item *tree_rmap_item;
struct page *tree_page = NULL;
struct stable_node *stable_node;
@@ -2062,9 +2063,11 @@ static void cmp_and_merge_page(struct page *page, struct rmap_item *rmap_item)
if (ksm_use_zero_pages && (checksum == zero_checksum)) {
struct vm_area_struct *vma;
- vma = find_mergeable_vma(rmap_item->mm, rmap_item->address);
+ down_read(&mm->mmap_sem);
+ vma = find_mergeable_vma(mm, rmap_item->address);
err = try_to_merge_one_page(vma, page,
ZERO_PAGE(rmap_item->address));
+ up_read(&mm->mmap_sem);
/*
* In case of failure, the page was not really empty, so we
* need to continue. Otherwise we're done.
diff --git a/mm/list_lru.c b/mm/list_lru.c
index 7a40fa2be858..f141f0c80ff3 100644
--- a/mm/list_lru.c
+++ b/mm/list_lru.c
@@ -325,12 +325,12 @@ static int memcg_init_list_lru_node(struct list_lru_node *nlru)
{
int size = memcg_nr_cache_ids;
- nlru->memcg_lrus = kmalloc(size * sizeof(void *), GFP_KERNEL);
+ nlru->memcg_lrus = kvmalloc(size * sizeof(void *), GFP_KERNEL);
if (!nlru->memcg_lrus)
return -ENOMEM;
if (__memcg_init_list_lru_node(nlru->memcg_lrus, 0, size)) {
- kfree(nlru->memcg_lrus);
+ kvfree(nlru->memcg_lrus);
return -ENOMEM;
}
@@ -340,7 +340,7 @@ static int memcg_init_list_lru_node(struct list_lru_node *nlru)
static void memcg_destroy_list_lru_node(struct list_lru_node *nlru)
{
__memcg_destroy_list_lru_node(nlru->memcg_lrus, 0, memcg_nr_cache_ids);
- kfree(nlru->memcg_lrus);
+ kvfree(nlru->memcg_lrus);
}
static int memcg_update_list_lru_node(struct list_lru_node *nlru,
@@ -351,12 +351,12 @@ static int memcg_update_list_lru_node(struct list_lru_node *nlru,
BUG_ON(old_size > new_size);
old = nlru->memcg_lrus;
- new = kmalloc(new_size * sizeof(void *), GFP_KERNEL);
+ new = kvmalloc(new_size * sizeof(void *), GFP_KERNEL);
if (!new)
return -ENOMEM;
if (__memcg_init_list_lru_node(new, old_size, new_size)) {
- kfree(new);
+ kvfree(new);
return -ENOMEM;
}
@@ -373,7 +373,7 @@ static int memcg_update_list_lru_node(struct list_lru_node *nlru,
nlru->memcg_lrus = new;
spin_unlock_irq(&nlru->lock);
- kfree(old);
+ kvfree(old);
return 0;
}
diff --git a/mm/madvise.c b/mm/madvise.c
index 21261ff0466f..375cf32087e4 100644
--- a/mm/madvise.c
+++ b/mm/madvise.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* linux/mm/madvise.c
*
@@ -625,18 +626,26 @@ static int madvise_inject_error(int behavior,
{
struct page *page;
struct zone *zone;
+ unsigned int order;
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- for (; start < end; start += PAGE_SIZE <<
- compound_order(compound_head(page))) {
+
+ for (; start < end; start += PAGE_SIZE << order) {
int ret;
ret = get_user_pages_fast(start, 1, 0, &page);
if (ret != 1)
return ret;
+ /*
+ * When soft offlining hugepages, after migrating the page
+ * we dissolve it, therefore in the second loop "page" will
+ * no longer be a compound page, and order will be 0.
+ */
+ order = compound_order(compound_head(page));
+
if (PageHWPoison(page)) {
put_page(page);
continue;
@@ -749,6 +758,9 @@ madvise_behavior_valid(int behavior)
* MADV_DONTFORK - omit this area from child's address space when forking:
* typically, to avoid COWing pages pinned by get_user_pages().
* MADV_DOFORK - cancel MADV_DONTFORK: no longer omit this area when forking.
+ * MADV_WIPEONFORK - present the child process with zero-filled memory in this
+ * range after a fork.
+ * MADV_KEEPONFORK - undo the effect of MADV_WIPEONFORK
* MADV_HWPOISON - trigger memory error handler as if the given memory range
* were corrupted by unrecoverable hardware memory failure.
* MADV_SOFT_OFFLINE - try to soft-offline the given range of memory.
@@ -769,7 +781,9 @@ madvise_behavior_valid(int behavior)
* zero - success
* -EINVAL - start + len < 0, start is not page-aligned,
* "behavior" is not a valid value, or application
- * is attempting to release locked or shared pages.
+ * is attempting to release locked or shared pages,
+ * or the specified address range includes file, Huge TLB,
+ * MAP_SHARED or VMPFNMAP range.
* -ENOMEM - addresses in the specified range are not currently
* mapped, or are outside the AS of the process.
* -EIO - an I/O error occurred while paging in data.
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 15af3da5af02..661f046ad318 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -1777,6 +1777,10 @@ static void drain_local_stock(struct work_struct *dummy)
struct memcg_stock_pcp *stock;
unsigned long flags;
+ /*
+ * The only protection from memory hotplug vs. drain_stock races is
+ * that we always operate on local CPU stock here with IRQ disabled
+ */
local_irq_save(flags);
stock = this_cpu_ptr(&memcg_stock);
@@ -1821,27 +1825,33 @@ static void drain_all_stock(struct mem_cgroup *root_memcg)
/* If someone's already draining, avoid adding running more workers. */
if (!mutex_trylock(&percpu_charge_mutex))
return;
- /* Notify other cpus that system-wide "drain" is running */
- get_online_cpus();
+ /*
+ * Notify other cpus that system-wide "drain" is running
+ * We do not care about races with the cpu hotplug because cpu down
+ * as well as workers from this path always operate on the local
+ * per-cpu data. CPU up doesn't touch memcg_stock at all.
+ */
curcpu = get_cpu();
for_each_online_cpu(cpu) {
struct memcg_stock_pcp *stock = &per_cpu(memcg_stock, cpu);
struct mem_cgroup *memcg;
memcg = stock->cached;
- if (!memcg || !stock->nr_pages)
+ if (!memcg || !stock->nr_pages || !css_tryget(&memcg->css))
continue;
- if (!mem_cgroup_is_descendant(memcg, root_memcg))
+ if (!mem_cgroup_is_descendant(memcg, root_memcg)) {
+ css_put(&memcg->css);
continue;
+ }
if (!test_and_set_bit(FLUSHING_CACHED_CHARGE, &stock->flags)) {
if (cpu == curcpu)
drain_local_stock(&stock->work);
else
schedule_work_on(cpu, &stock->work);
}
+ css_put(&memcg->css);
}
put_cpu();
- put_online_cpus();
mutex_unlock(&percpu_charge_mutex);
}
@@ -5648,7 +5658,8 @@ static void uncharge_batch(const struct uncharge_gather *ug)
static void uncharge_page(struct page *page, struct uncharge_gather *ug)
{
VM_BUG_ON_PAGE(PageLRU(page), page);
- VM_BUG_ON_PAGE(!PageHWPoison(page) && page_count(page), page);
+ VM_BUG_ON_PAGE(page_count(page) && !is_zone_device_page(page) &&
+ !PageHWPoison(page) , page);
if (!page->mem_cgroup)
return;
@@ -5817,21 +5828,6 @@ void mem_cgroup_sk_alloc(struct sock *sk)
if (!mem_cgroup_sockets_enabled)
return;
- /*
- * Socket cloning can throw us here with sk_memcg already
- * filled. It won't however, necessarily happen from
- * process context. So the test for root memcg given
- * the current task's memcg won't help us in this case.
- *
- * Respecting the original socket's memcg is a better
- * decision in this case.
- */
- if (sk->sk_memcg) {
- BUG_ON(mem_cgroup_is_root(sk->sk_memcg));
- css_get(&sk->sk_memcg->css);
- return;
- }
-
rcu_read_lock();
memcg = mem_cgroup_from_task(current);
if (memcg == root_mem_cgroup)
diff --git a/mm/memory.c b/mm/memory.c
index ec4e15494901..cae514e7dcfc 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -845,7 +845,7 @@ struct page *_vm_normal_page(struct vm_area_struct *vma, unsigned long addr,
* vm_normal_page() so that we do not have to special case all
* call site of vm_normal_page().
*/
- if (likely(pfn < highest_memmap_pfn)) {
+ if (likely(pfn <= highest_memmap_pfn)) {
struct page *page = pfn_to_page(pfn);
if (is_device_public_page(page)) {
@@ -3891,9 +3891,9 @@ static int handle_pte_fault(struct vm_fault *vmf)
/*
* some architectures can have larger ptes than wordsize,
* e.g.ppc44x-defconfig has CONFIG_PTE_64BIT=y and
- * CONFIG_32BIT=y, so READ_ONCE or ACCESS_ONCE cannot guarantee
- * atomic accesses. The code below just needs a consistent
- * view for the ifs and we later double check anyway with the
+ * CONFIG_32BIT=y, so READ_ONCE cannot guarantee atomic
+ * accesses. The code below just needs a consistent view
+ * for the ifs and we later double check anyway with the
* ptl lock held. So here a barrier will do.
*/
barrier();
diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c
index e882cb6da994..d4b5f29906b9 100644
--- a/mm/memory_hotplug.c
+++ b/mm/memory_hotplug.c
@@ -328,6 +328,7 @@ int __ref __add_pages(int nid, unsigned long phys_start_pfn,
if (err && (err != -EEXIST))
break;
err = 0;
+ cond_resched();
}
vmemmap_populate_print_last();
out:
@@ -337,7 +338,7 @@ EXPORT_SYMBOL_GPL(__add_pages);
#ifdef CONFIG_MEMORY_HOTREMOVE
/* find the smallest valid pfn in the range [start_pfn, end_pfn) */
-static int find_smallest_section_pfn(int nid, struct zone *zone,
+static unsigned long find_smallest_section_pfn(int nid, struct zone *zone,
unsigned long start_pfn,
unsigned long end_pfn)
{
@@ -362,7 +363,7 @@ static int find_smallest_section_pfn(int nid, struct zone *zone,
}
/* find the biggest valid pfn in the range [start_pfn, end_pfn). */
-static int find_biggest_section_pfn(int nid, struct zone *zone,
+static unsigned long find_biggest_section_pfn(int nid, struct zone *zone,
unsigned long start_pfn,
unsigned long end_pfn)
{
@@ -550,7 +551,7 @@ static int __remove_section(struct zone *zone, struct mem_section *ms,
return ret;
scn_nr = __section_nr(ms);
- start_pfn = section_nr_to_pfn(scn_nr);
+ start_pfn = section_nr_to_pfn((unsigned long)scn_nr);
__remove_zone(zone, start_pfn);
sparse_remove_one_section(zone, ms, map_offset);
diff --git a/mm/mempolicy.c b/mm/mempolicy.c
index 006ba625c0b8..a2af6d58a68f 100644
--- a/mm/mempolicy.c
+++ b/mm/mempolicy.c
@@ -1920,8 +1920,11 @@ static struct page *alloc_page_interleave(gfp_t gfp, unsigned order,
struct page *page;
page = __alloc_pages(gfp, order, nid);
- if (page && page_to_nid(page) == nid)
- inc_zone_page_state(page, NUMA_INTERLEAVE_HIT);
+ if (page && page_to_nid(page) == nid) {
+ preempt_disable();
+ __inc_numa_state(page_zone(page), NUMA_INTERLEAVE_HIT);
+ preempt_enable();
+ }
return page;
}
diff --git a/mm/mempool.c b/mm/mempool.c
index 1c0294858527..c4a23cdae3f0 100644
--- a/mm/mempool.c
+++ b/mm/mempool.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* linux/mm/mempool.c
*
diff --git a/mm/memtest.c b/mm/memtest.c
index 8eaa4c3a5f65..f53ace709ccd 100644
--- a/mm/memtest.c
+++ b/mm/memtest.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/kernel.h>
#include <linux/types.h>
#include <linux/init.h>
diff --git a/mm/migrate.c b/mm/migrate.c
index 6954c1435833..1236449b4777 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Memory Migration functionality - linux/mm/migrate.c
*
@@ -2146,8 +2147,9 @@ static int migrate_vma_collect_hole(unsigned long start,
unsigned long addr;
for (addr = start & PAGE_MASK; addr < end; addr += PAGE_SIZE) {
- migrate->src[migrate->npages++] = MIGRATE_PFN_MIGRATE;
+ migrate->src[migrate->npages] = MIGRATE_PFN_MIGRATE;
migrate->dst[migrate->npages] = 0;
+ migrate->npages++;
migrate->cpages++;
}
diff --git a/mm/mincore.c b/mm/mincore.c
index c5687c45c326..fc37afe226e6 100644
--- a/mm/mincore.c
+++ b/mm/mincore.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* linux/mm/mincore.c
*
diff --git a/mm/mlock.c b/mm/mlock.c
index dfc6f1912176..46af369c13e5 100644
--- a/mm/mlock.c
+++ b/mm/mlock.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* linux/mm/mlock.c
*
diff --git a/mm/mmzone.c b/mm/mmzone.c
index a51c0a67ea3d..4686fdc23bb9 100644
--- a/mm/mmzone.c
+++ b/mm/mmzone.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* linux/mm/mmzone.c
*
diff --git a/mm/mprotect.c b/mm/mprotect.c
index 6d3e2f082290..ec39f730a0bf 100644
--- a/mm/mprotect.c
+++ b/mm/mprotect.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* mm/mprotect.c
*
diff --git a/mm/mremap.c b/mm/mremap.c
index cfec004c4ff9..049470aa1e3e 100644
--- a/mm/mremap.c
+++ b/mm/mremap.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* mm/mremap.c
*
diff --git a/mm/msync.c b/mm/msync.c
index 24e612fefa04..ef30a429623a 100644
--- a/mm/msync.c
+++ b/mm/msync.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* linux/mm/msync.c
*
diff --git a/mm/nobootmem.c b/mm/nobootmem.c
index 3637809a18d0..9b02fda0886b 100644
--- a/mm/nobootmem.c
+++ b/mm/nobootmem.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* bootmem - A boot-time physical memory allocator and configurator
*
diff --git a/mm/oom_kill.c b/mm/oom_kill.c
index 99736e026712..dee0f75c3013 100644
--- a/mm/oom_kill.c
+++ b/mm/oom_kill.c
@@ -40,6 +40,7 @@
#include <linux/ratelimit.h>
#include <linux/kthread.h>
#include <linux/init.h>
+#include <linux/mmu_notifier.h>
#include <asm/tlb.h>
#include "internal.h"
@@ -495,6 +496,21 @@ static bool __oom_reap_task_mm(struct task_struct *tsk, struct mm_struct *mm)
}
/*
+ * If the mm has notifiers then we would need to invalidate them around
+ * unmap_page_range and that is risky because notifiers can sleep and
+ * what they do is basically undeterministic. So let's have a short
+ * sleep to give the oom victim some more time.
+ * TODO: we really want to get rid of this ugly hack and make sure that
+ * notifiers cannot block for unbounded amount of time and add
+ * mmu_notifier_invalidate_range_{start,end} around unmap_page_range
+ */
+ if (mm_has_notifiers(mm)) {
+ up_read(&mm->mmap_sem);
+ schedule_timeout_idle(HZ);
+ goto unlock_oom;
+ }
+
+ /*
* MMF_OOM_SKIP is set by exit_mmap when the OOM reaper can't
* work on the mm anymore. The check for MMF_OOM_SKIP must run
* under mmap_sem for reading because it serializes against the
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 0b9c5cbe8eba..c518c845f202 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -1972,31 +1972,31 @@ bool wb_over_bg_thresh(struct bdi_writeback *wb)
int dirty_writeback_centisecs_handler(struct ctl_table *table, int write,
void __user *buffer, size_t *length, loff_t *ppos)
{
- proc_dointvec(table, write, buffer, length, ppos);
- return 0;
+ unsigned int old_interval = dirty_writeback_interval;
+ int ret;
+
+ ret = proc_dointvec(table, write, buffer, length, ppos);
+
+ /*
+ * Writing 0 to dirty_writeback_interval will disable periodic writeback
+ * and a different non-zero value will wakeup the writeback threads.
+ * wb_wakeup_delayed() would be more appropriate, but it's a pain to
+ * iterate over all bdis and wbs.
+ * The reason we do this is to make the change take effect immediately.
+ */
+ if (!ret && write && dirty_writeback_interval &&
+ dirty_writeback_interval != old_interval)
+ wakeup_flusher_threads(WB_REASON_PERIODIC);
+
+ return ret;
}
#ifdef CONFIG_BLOCK
void laptop_mode_timer_fn(unsigned long data)
{
struct request_queue *q = (struct request_queue *)data;
- int nr_pages = global_node_page_state(NR_FILE_DIRTY) +
- global_node_page_state(NR_UNSTABLE_NFS);
- struct bdi_writeback *wb;
- /*
- * We want to write everything out, not just down to the dirty
- * threshold
- */
- if (!bdi_has_dirty_io(q->backing_dev_info))
- return;
-
- rcu_read_lock();
- list_for_each_entry_rcu(wb, &q->backing_dev_info->wb_list, bdi_node)
- if (wb_has_dirty_io(wb))
- wb_start_writeback(wb, nr_pages, true,
- WB_REASON_LAPTOP_TIMER);
- rcu_read_unlock();
+ wakeup_flusher_threads_bdi(q->backing_dev_info, WB_REASON_LAPTOP_TIMER);
}
/*
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index c841af88836a..77e4d3c5c57b 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -1190,7 +1190,7 @@ static void __meminit __init_single_pfn(unsigned long pfn, unsigned long zone,
}
#ifdef CONFIG_DEFERRED_STRUCT_PAGE_INIT
-static void init_reserved_page(unsigned long pfn)
+static void __meminit init_reserved_page(unsigned long pfn)
{
pg_data_t *pgdat;
int nid, zid;
@@ -5367,6 +5367,7 @@ not_early:
__init_single_page(page, pfn, zone, nid);
set_pageblock_migratetype(page, MIGRATE_MOVABLE);
+ cond_resched();
} else {
__init_single_pfn(pfn, zone, nid);
}
diff --git a/mm/page_counter.c b/mm/page_counter.c
index 7c6a63d2c27f..2a8df3ad60a4 100644
--- a/mm/page_counter.c
+++ b/mm/page_counter.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Lockless hierarchical page accounting & limiting
*
diff --git a/mm/page_ext.c b/mm/page_ext.c
index 32f18911deda..4f0367d472c4 100644
--- a/mm/page_ext.c
+++ b/mm/page_ext.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/mm.h>
#include <linux/mmzone.h>
#include <linux/bootmem.h>
diff --git a/mm/page_idle.c b/mm/page_idle.c
index 4bd03a8d809e..0a49374e6931 100644
--- a/mm/page_idle.c
+++ b/mm/page_idle.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/init.h>
#include <linux/bootmem.h>
#include <linux/fs.h>
diff --git a/mm/page_io.c b/mm/page_io.c
index 21502d341a67..cd52b9cc169b 100644
--- a/mm/page_io.c
+++ b/mm/page_io.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* linux/mm/page_io.c
*
@@ -407,7 +408,7 @@ int swap_readpage(struct page *page, bool do_poll)
if (!READ_ONCE(bio->bi_private))
break;
- if (!blk_mq_poll(disk->queue, qc))
+ if (!blk_poll(disk->queue, qc))
break;
}
__set_current_state(TASK_RUNNING);
diff --git a/mm/page_isolation.c b/mm/page_isolation.c
index 757410d9f758..44f213935bf6 100644
--- a/mm/page_isolation.c
+++ b/mm/page_isolation.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* linux/mm/page_isolation.c
*/
diff --git a/mm/page_owner.c b/mm/page_owner.c
index 57abca62d4db..4f44b95b9d1e 100644
--- a/mm/page_owner.c
+++ b/mm/page_owner.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/debugfs.h>
#include <linux/mm.h>
#include <linux/slab.h>
diff --git a/mm/page_poison.c b/mm/page_poison.c
index be19e989ccff..e83fd44867de 100644
--- a/mm/page_poison.c
+++ b/mm/page_poison.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/kernel.h>
#include <linux/string.h>
#include <linux/mm.h>
diff --git a/mm/page_vma_mapped.c b/mm/page_vma_mapped.c
index 6a03946469a9..d22b84310f6d 100644
--- a/mm/page_vma_mapped.c
+++ b/mm/page_vma_mapped.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/mm.h>
#include <linux/rmap.h>
#include <linux/hugetlb.h>
@@ -6,17 +7,6 @@
#include "internal.h"
-static inline bool check_pmd(struct page_vma_mapped_walk *pvmw)
-{
- pmd_t pmde;
- /*
- * Make sure we don't re-load pmd between present and !trans_huge check.
- * We need a consistent view.
- */
- pmde = READ_ONCE(*pvmw->pmd);
- return pmd_present(pmde) && !pmd_trans_huge(pmde);
-}
-
static inline bool not_found(struct page_vma_mapped_walk *pvmw)
{
page_vma_mapped_walk_done(pvmw);
@@ -116,6 +106,7 @@ bool page_vma_mapped_walk(struct page_vma_mapped_walk *pvmw)
pgd_t *pgd;
p4d_t *p4d;
pud_t *pud;
+ pmd_t pmde;
/* The only possible pmd mapping has been handled on last iteration */
if (pvmw->pmd && !pvmw->pte)
@@ -148,7 +139,13 @@ restart:
if (!pud_present(*pud))
return false;
pvmw->pmd = pmd_offset(pud, pvmw->address);
- if (pmd_trans_huge(*pvmw->pmd) || is_pmd_migration_entry(*pvmw->pmd)) {
+ /*
+ * Make sure the pmd value isn't cached in a register by the
+ * compiler and used as a stale value after we've observed a
+ * subsequent update.
+ */
+ pmde = READ_ONCE(*pvmw->pmd);
+ if (pmd_trans_huge(pmde) || is_pmd_migration_entry(pmde)) {
pvmw->ptl = pmd_lock(mm, pvmw->pmd);
if (likely(pmd_trans_huge(*pvmw->pmd))) {
if (pvmw->flags & PVMW_MIGRATION)
@@ -167,17 +164,15 @@ restart:
return not_found(pvmw);
return true;
}
- } else
- WARN_ONCE(1, "Non present huge pmd without pmd migration enabled!");
+ }
return not_found(pvmw);
} else {
/* THP pmd was split under us: handle on pte level */
spin_unlock(pvmw->ptl);
pvmw->ptl = NULL;
}
- } else {
- if (!check_pmd(pvmw))
- return false;
+ } else if (!pmd_present(pmde)) {
+ return false;
}
if (!map_pte(pvmw))
goto next_pte;
diff --git a/mm/pagewalk.c b/mm/pagewalk.c
index 1a4197965415..23a3e415ac2c 100644
--- a/mm/pagewalk.c
+++ b/mm/pagewalk.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/mm.h>
#include <linux/highmem.h>
#include <linux/sched.h>
@@ -187,8 +188,12 @@ static int walk_hugetlb_range(unsigned long addr, unsigned long end,
do {
next = hugetlb_entry_end(h, addr, end);
pte = huge_pte_offset(walk->mm, addr & hmask, sz);
- if (pte && walk->hugetlb_entry)
+
+ if (pte)
err = walk->hugetlb_entry(pte, hmask, addr, next, walk);
+ else if (walk->pte_hole)
+ err = walk->pte_hole(addr, next, walk);
+
if (err)
break;
} while (addr = next, addr != end);
diff --git a/mm/percpu-internal.h b/mm/percpu-internal.h
index 7065faf74b46..b1739dc06b73 100644
--- a/mm/percpu-internal.h
+++ b/mm/percpu-internal.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef _MM_PERCPU_INTERNAL_H
#define _MM_PERCPU_INTERNAL_H
diff --git a/mm/percpu.c b/mm/percpu.c
index b9691d946fdc..79e3549cab0f 100644
--- a/mm/percpu.c
+++ b/mm/percpu.c
@@ -1329,7 +1329,9 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
* @gfp: allocation flags
*
* Allocate percpu area of @size bytes aligned at @align. If @gfp doesn't
- * contain %GFP_KERNEL, the allocation is atomic.
+ * contain %GFP_KERNEL, the allocation is atomic. If @gfp has __GFP_NOWARN
+ * then no warning will be triggered on invalid or failed allocation
+ * requests.
*
* RETURNS:
* Percpu pointer to the allocated area on success, NULL on failure.
@@ -1337,10 +1339,11 @@ static struct pcpu_chunk *pcpu_chunk_addr_search(void *addr)
static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
gfp_t gfp)
{
+ bool is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL;
+ bool do_warn = !(gfp & __GFP_NOWARN);
static int warn_limit = 10;
struct pcpu_chunk *chunk;
const char *err;
- bool is_atomic = (gfp & GFP_KERNEL) != GFP_KERNEL;
int slot, off, cpu, ret;
unsigned long flags;
void __percpu *ptr;
@@ -1361,7 +1364,7 @@ static void __percpu *pcpu_alloc(size_t size, size_t align, bool reserved,
if (unlikely(!size || size > PCPU_MIN_UNIT_SIZE || align > PAGE_SIZE ||
!is_power_of_2(align))) {
- WARN(true, "illegal size (%zu) or align (%zu) for percpu allocation\n",
+ WARN(do_warn, "illegal size (%zu) or align (%zu) for percpu allocation\n",
size, align);
return NULL;
}
@@ -1482,7 +1485,7 @@ fail_unlock:
fail:
trace_percpu_alloc_percpu_fail(reserved, is_atomic, size, align);
- if (!is_atomic && warn_limit) {
+ if (!is_atomic && do_warn && warn_limit) {
pr_warn("allocation failed, size=%zu align=%zu atomic=%d, %s\n",
size, align, is_atomic, err);
dump_stack();
@@ -1507,7 +1510,9 @@ fail:
*
* Allocate zero-filled percpu area of @size bytes aligned at @align. If
* @gfp doesn't contain %GFP_KERNEL, the allocation doesn't block and can
- * be called from any context but is a lot more likely to fail.
+ * be called from any context but is a lot more likely to fail. If @gfp
+ * has __GFP_NOWARN then no warning will be triggered on invalid or failed
+ * allocation requests.
*
* RETURNS:
* Percpu pointer to the allocated area on success, NULL on failure.
diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c
index 1175f6a24fdb..1e4ee763c190 100644
--- a/mm/pgtable-generic.c
+++ b/mm/pgtable-generic.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* mm/pgtable-generic.c
*
diff --git a/mm/quicklist.c b/mm/quicklist.c
index daf6ff6e199a..5e98ac78e410 100644
--- a/mm/quicklist.c
+++ b/mm/quicklist.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Quicklist support.
*
diff --git a/mm/rodata_test.c b/mm/rodata_test.c
index 6bb4deb12e78..d908c8769b48 100644
--- a/mm/rodata_test.c
+++ b/mm/rodata_test.c
@@ -14,7 +14,7 @@
#include <linux/uaccess.h>
#include <asm/sections.h>
-const int rodata_test_data = 0xC3;
+static const int rodata_test_data = 0xC3;
void rodata_test(void)
{
diff --git a/mm/slab.c b/mm/slab.c
index 04dec48c3ed7..b7095884fd93 100644
--- a/mm/slab.c
+++ b/mm/slab.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* linux/mm/slab.c
* Written by Mark Hemment, 1996/97.
diff --git a/mm/slab.h b/mm/slab.h
index 073362816acc..86d7c7d860f9 100644
--- a/mm/slab.h
+++ b/mm/slab.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef MM_SLAB_H
#define MM_SLAB_H
/*
@@ -258,7 +259,7 @@ cache_from_memcg_idx(struct kmem_cache *s, int idx)
* memcg_caches issues a write barrier to match this (see
* memcg_create_kmem_cache()).
*/
- cachep = lockless_dereference(arr->entries[idx]);
+ cachep = READ_ONCE(arr->entries[idx]);
rcu_read_unlock();
return cachep;
diff --git a/mm/slab_common.c b/mm/slab_common.c
index 904a83be82de..0d7fe71ff5e4 100644
--- a/mm/slab_common.c
+++ b/mm/slab_common.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Slab allocator functions that are independent of the allocator strategy
*
@@ -165,9 +166,9 @@ static int init_memcg_params(struct kmem_cache *s,
if (!memcg_nr_cache_ids)
return 0;
- arr = kzalloc(sizeof(struct memcg_cache_array) +
- memcg_nr_cache_ids * sizeof(void *),
- GFP_KERNEL);
+ arr = kvzalloc(sizeof(struct memcg_cache_array) +
+ memcg_nr_cache_ids * sizeof(void *),
+ GFP_KERNEL);
if (!arr)
return -ENOMEM;
@@ -178,15 +179,23 @@ static int init_memcg_params(struct kmem_cache *s,
static void destroy_memcg_params(struct kmem_cache *s)
{
if (is_root_cache(s))
- kfree(rcu_access_pointer(s->memcg_params.memcg_caches));
+ kvfree(rcu_access_pointer(s->memcg_params.memcg_caches));
+}
+
+static void free_memcg_params(struct rcu_head *rcu)
+{
+ struct memcg_cache_array *old;
+
+ old = container_of(rcu, struct memcg_cache_array, rcu);
+ kvfree(old);
}
static int update_memcg_params(struct kmem_cache *s, int new_array_size)
{
struct memcg_cache_array *old, *new;
- new = kzalloc(sizeof(struct memcg_cache_array) +
- new_array_size * sizeof(void *), GFP_KERNEL);
+ new = kvzalloc(sizeof(struct memcg_cache_array) +
+ new_array_size * sizeof(void *), GFP_KERNEL);
if (!new)
return -ENOMEM;
@@ -198,7 +207,7 @@ static int update_memcg_params(struct kmem_cache *s, int new_array_size)
rcu_assign_pointer(s->memcg_params.memcg_caches, new);
if (old)
- kfree_rcu(old, rcu);
+ call_rcu(&old->rcu, free_memcg_params);
return 0;
}
diff --git a/mm/slob.c b/mm/slob.c
index a8bd6fa11a66..10249160b693 100644
--- a/mm/slob.c
+++ b/mm/slob.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* SLOB Allocator: Simple List Of Blocks
*
diff --git a/mm/slub.c b/mm/slub.c
index 163352c537ab..1efbb8123037 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* SLUB: A slab allocator that limits cache line use instead of queuing
* objects in per cpu and per node lists.
diff --git a/mm/sparse-vmemmap.c b/mm/sparse-vmemmap.c
index d1a39b8051e0..478ce6d4a2c4 100644
--- a/mm/sparse-vmemmap.c
+++ b/mm/sparse-vmemmap.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Virtual Memory Map support
*
diff --git a/mm/sparse.c b/mm/sparse.c
index 83b3bf6461af..60805abf98af 100644
--- a/mm/sparse.c
+++ b/mm/sparse.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* sparse memory mappings.
*/
@@ -22,8 +23,7 @@
* 1) mem_section - memory sections, mem_map's for valid memory
*/
#ifdef CONFIG_SPARSEMEM_EXTREME
-struct mem_section *mem_section[NR_SECTION_ROOTS]
- ____cacheline_internodealigned_in_smp;
+struct mem_section **mem_section;
#else
struct mem_section mem_section[NR_SECTION_ROOTS][SECTIONS_PER_ROOT]
____cacheline_internodealigned_in_smp;
@@ -100,7 +100,7 @@ static inline int sparse_index_init(unsigned long section_nr, int nid)
int __section_nr(struct mem_section* ms)
{
unsigned long root_nr;
- struct mem_section* root;
+ struct mem_section *root = NULL;
for (root_nr = 0; root_nr < NR_SECTION_ROOTS; root_nr++) {
root = __nr_to_section(root_nr * SECTIONS_PER_ROOT);
@@ -111,7 +111,7 @@ int __section_nr(struct mem_section* ms)
break;
}
- VM_BUG_ON(root_nr == NR_SECTION_ROOTS);
+ VM_BUG_ON(!root);
return (root_nr * SECTIONS_PER_ROOT) + (ms - root);
}
@@ -207,6 +207,16 @@ void __init memory_present(int nid, unsigned long start, unsigned long end)
{
unsigned long pfn;
+#ifdef CONFIG_SPARSEMEM_EXTREME
+ if (unlikely(!mem_section)) {
+ unsigned long size, align;
+
+ size = sizeof(struct mem_section) * NR_SECTION_ROOTS;
+ align = 1 << (INTERNODE_CACHE_SHIFT);
+ mem_section = memblock_virt_alloc(size, align);
+ }
+#endif
+
start &= PAGE_SECTION_MASK;
mminit_validate_memmodel_limits(&start, &end);
for (pfn = start; pfn < end; pfn += PAGES_PER_SECTION) {
@@ -329,11 +339,17 @@ again:
static void __init check_usemap_section_nr(int nid, unsigned long *usemap)
{
unsigned long usemap_snr, pgdat_snr;
- static unsigned long old_usemap_snr = NR_MEM_SECTIONS;
- static unsigned long old_pgdat_snr = NR_MEM_SECTIONS;
+ static unsigned long old_usemap_snr;
+ static unsigned long old_pgdat_snr;
struct pglist_data *pgdat = NODE_DATA(nid);
int usemap_nid;
+ /* First call */
+ if (!old_usemap_snr) {
+ old_usemap_snr = NR_MEM_SECTIONS;
+ old_pgdat_snr = NR_MEM_SECTIONS;
+ }
+
usemap_snr = pfn_to_section_nr(__pa(usemap) >> PAGE_SHIFT);
pgdat_snr = pfn_to_section_nr(__pa(pgdat) >> PAGE_SHIFT);
if (usemap_snr == pgdat_snr)
diff --git a/mm/swap.c b/mm/swap.c
index 9295ae960d66..a77d68f2c1b6 100644
--- a/mm/swap.c
+++ b/mm/swap.c
@@ -575,7 +575,7 @@ static void lru_lazyfree_fn(struct page *page, struct lruvec *lruvec,
void *arg)
{
if (PageLRU(page) && PageAnon(page) && PageSwapBacked(page) &&
- !PageUnevictable(page)) {
+ !PageSwapCache(page) && !PageUnevictable(page)) {
bool active = PageActive(page);
del_page_from_lru_list(page, lruvec,
@@ -665,7 +665,7 @@ void deactivate_file_page(struct page *page)
void mark_page_lazyfree(struct page *page)
{
if (PageLRU(page) && PageAnon(page) && PageSwapBacked(page) &&
- !PageUnevictable(page)) {
+ !PageSwapCache(page) && !PageUnevictable(page)) {
struct pagevec *pvec = &get_cpu_var(lru_lazyfree_pvecs);
get_page(page);
diff --git a/mm/swap_cgroup.c b/mm/swap_cgroup.c
index fcd2740f4ed7..45affaef3bc6 100644
--- a/mm/swap_cgroup.c
+++ b/mm/swap_cgroup.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
#include <linux/swap_cgroup.h>
#include <linux/vmalloc.h>
#include <linux/mm.h>
diff --git a/mm/swap_slots.c b/mm/swap_slots.c
index 13a174006b91..d81cfc5a43d5 100644
--- a/mm/swap_slots.c
+++ b/mm/swap_slots.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Manage cache of swap slots to be used for and returned from
* swap.
diff --git a/mm/swap_state.c b/mm/swap_state.c
index 71ce2d1ccbf7..326439428daf 100644
--- a/mm/swap_state.c
+++ b/mm/swap_state.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* linux/mm/swap_state.c
*
@@ -39,10 +40,6 @@ struct address_space *swapper_spaces[MAX_SWAPFILES];
static unsigned int nr_swapper_spaces[MAX_SWAPFILES];
bool swap_vma_readahead = true;
-#define SWAP_RA_MAX_ORDER_DEFAULT 3
-
-static int swap_ra_max_order = SWAP_RA_MAX_ORDER_DEFAULT;
-
#define SWAP_RA_WIN_SHIFT (PAGE_SHIFT / 2)
#define SWAP_RA_HITS_MASK ((1UL << SWAP_RA_WIN_SHIFT) - 1)
#define SWAP_RA_HITS_MAX SWAP_RA_HITS_MASK
@@ -242,6 +239,17 @@ int add_to_swap(struct page *page)
* clear SWAP_HAS_CACHE flag.
*/
goto fail;
+ /*
+ * Normally the page will be dirtied in unmap because its pte should be
+ * dirty. A special case is MADV_FREE page. The page'e pte could have
+ * dirty bit cleared but the page's SwapBacked bit is still set because
+ * clearing the dirty bit and SwapBacked bit has no lock protected. For
+ * such page, unmap will not set dirty bit for it, so page reclaim will
+ * not write the page out. This can cause data corruption when the page
+ * is swap in later. Always setting the dirty bit for the page solves
+ * the problem.
+ */
+ set_page_dirty(page);
return 1;
@@ -653,6 +661,13 @@ struct page *swap_readahead_detect(struct vm_fault *vmf,
pte_t *tpte;
#endif
+ max_win = 1 << min_t(unsigned int, READ_ONCE(page_cluster),
+ SWAP_RA_ORDER_CEILING);
+ if (max_win == 1) {
+ swap_ra->win = 1;
+ return NULL;
+ }
+
faddr = vmf->address;
entry = pte_to_swp_entry(vmf->orig_pte);
if ((unlikely(non_swap_entry(entry))))
@@ -661,12 +676,6 @@ struct page *swap_readahead_detect(struct vm_fault *vmf,
if (page)
return page;
- max_win = 1 << READ_ONCE(swap_ra_max_order);
- if (max_win == 1) {
- swap_ra->win = 1;
- return NULL;
- }
-
fpfn = PFN_DOWN(faddr);
swap_ra_info = GET_SWAP_RA_VAL(vma);
pfn = PFN_DOWN(SWAP_RA_ADDR(swap_ra_info));
@@ -775,32 +784,8 @@ static struct kobj_attribute vma_ra_enabled_attr =
__ATTR(vma_ra_enabled, 0644, vma_ra_enabled_show,
vma_ra_enabled_store);
-static ssize_t vma_ra_max_order_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buf)
-{
- return sprintf(buf, "%d\n", swap_ra_max_order);
-}
-static ssize_t vma_ra_max_order_store(struct kobject *kobj,
- struct kobj_attribute *attr,
- const char *buf, size_t count)
-{
- int err, v;
-
- err = kstrtoint(buf, 10, &v);
- if (err || v > SWAP_RA_ORDER_CEILING || v <= 0)
- return -EINVAL;
-
- swap_ra_max_order = v;
-
- return count;
-}
-static struct kobj_attribute vma_ra_max_order_attr =
- __ATTR(vma_ra_max_order, 0644, vma_ra_max_order_show,
- vma_ra_max_order_store);
-
static struct attribute *swap_attrs[] = {
&vma_ra_enabled_attr.attr,
- &vma_ra_max_order_attr.attr,
NULL,
};
diff --git a/mm/swapfile.c b/mm/swapfile.c
index bf91dc9e7a79..e47a21e64764 100644
--- a/mm/swapfile.c
+++ b/mm/swapfile.c
@@ -2869,6 +2869,7 @@ static struct swap_info_struct *alloc_swap_info(void)
p->flags = SWP_USED;
spin_unlock(&swap_lock);
spin_lock_init(&p->lock);
+ spin_lock_init(&p->cont_lock);
return p;
}
@@ -3545,6 +3546,7 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)
head = vmalloc_to_page(si->swap_map + offset);
offset &= ~PAGE_MASK;
+ spin_lock(&si->cont_lock);
/*
* Page allocation does not initialize the page's lru field,
* but it does always reset its private field.
@@ -3564,7 +3566,7 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)
* a continuation page, free our allocation and use this one.
*/
if (!(count & COUNT_CONTINUED))
- goto out;
+ goto out_unlock_cont;
map = kmap_atomic(list_page) + offset;
count = *map;
@@ -3575,11 +3577,13 @@ int add_swap_count_continuation(swp_entry_t entry, gfp_t gfp_mask)
* free our allocation and use this one.
*/
if ((count & ~COUNT_CONTINUED) != SWAP_CONT_MAX)
- goto out;
+ goto out_unlock_cont;
}
list_add_tail(&page->lru, &head->lru);
page = NULL; /* now it's attached, don't free it */
+out_unlock_cont:
+ spin_unlock(&si->cont_lock);
out:
unlock_cluster(ci);
spin_unlock(&si->lock);
@@ -3604,6 +3608,7 @@ static bool swap_count_continued(struct swap_info_struct *si,
struct page *head;
struct page *page;
unsigned char *map;
+ bool ret;
head = vmalloc_to_page(si->swap_map + offset);
if (page_private(head) != SWP_CONTINUED) {
@@ -3611,6 +3616,7 @@ static bool swap_count_continued(struct swap_info_struct *si,
return false; /* need to add count continuation */
}
+ spin_lock(&si->cont_lock);
offset &= ~PAGE_MASK;
page = list_entry(head->lru.next, struct page, lru);
map = kmap_atomic(page) + offset;
@@ -3631,8 +3637,10 @@ static bool swap_count_continued(struct swap_info_struct *si,
if (*map == SWAP_CONT_MAX) {
kunmap_atomic(map);
page = list_entry(page->lru.next, struct page, lru);
- if (page == head)
- return false; /* add count continuation */
+ if (page == head) {
+ ret = false; /* add count continuation */
+ goto out;
+ }
map = kmap_atomic(page) + offset;
init_map: *map = 0; /* we didn't zero the page */
}
@@ -3645,7 +3653,7 @@ init_map: *map = 0; /* we didn't zero the page */
kunmap_atomic(map);
page = list_entry(page->lru.prev, struct page, lru);
}
- return true; /* incremented */
+ ret = true; /* incremented */
} else { /* decrementing */
/*
@@ -3671,8 +3679,11 @@ init_map: *map = 0; /* we didn't zero the page */
kunmap_atomic(map);
page = list_entry(page->lru.prev, struct page, lru);
}
- return count == COUNT_CONTINUED;
+ ret = count == COUNT_CONTINUED;
}
+out:
+ spin_unlock(&si->cont_lock);
+ return ret;
}
/*
diff --git a/mm/vmacache.c b/mm/vmacache.c
index 7ffa0ee341b5..db7596eb6132 100644
--- a/mm/vmacache.c
+++ b/mm/vmacache.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Copyright (C) 2014 Davidlohr Bueso.
*/
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 8a43db6284eb..673942094328 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -1695,11 +1695,6 @@ static void *__vmalloc_area_node(struct vm_struct *area, gfp_t gfp_mask,
for (i = 0; i < area->nr_pages; i++) {
struct page *page;
- if (fatal_signal_pending(current)) {
- area->nr_pages = i;
- goto fail_no_warn;
- }
-
if (node == NUMA_NO_NODE)
page = alloc_page(alloc_mask|highmem_mask);
else
@@ -1723,7 +1718,6 @@ fail:
warn_alloc(gfp_mask, NULL,
"vmalloc: allocation failure, allocated %ld of %ld bytes",
(area->nr_pages*PAGE_SIZE), area->size);
-fail_no_warn:
vfree(area->addr);
return NULL;
}
diff --git a/mm/vmscan.c b/mm/vmscan.c
index 13d711dd8776..15b483ef6440 100644
--- a/mm/vmscan.c
+++ b/mm/vmscan.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* linux/mm/vmscan.c
*
@@ -1867,7 +1868,7 @@ shrink_inactive_list(unsigned long nr_to_scan, struct lruvec *lruvec,
* also allow kswapd to start writing pages during reclaim.
*/
if (stat.nr_unqueued_dirty == nr_taken) {
- wakeup_flusher_threads(0, WB_REASON_VMSCAN);
+ wakeup_flusher_threads(WB_REASON_VMSCAN);
set_bit(PGDAT_DIRTY, &pgdat->flags);
}
diff --git a/mm/workingset.c b/mm/workingset.c
index 7119cd745ace..b997c9de28f6 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
/*
* Workingset detection
*
diff --git a/mm/z3fold.c b/mm/z3fold.c
index 486550df32be..b2ba2ba585f3 100644
--- a/mm/z3fold.c
+++ b/mm/z3fold.c
@@ -250,6 +250,7 @@ static void __release_z3fold_page(struct z3fold_header *zhdr, bool locked)
WARN_ON(!list_empty(&zhdr->buddy));
set_bit(PAGE_STALE, &page->private);
+ clear_bit(NEEDS_COMPACTING, &page->private);
spin_lock(&pool->lock);
if (!list_empty(&page->lru))
list_del(&page->lru);
@@ -303,7 +304,6 @@ static void free_pages_work(struct work_struct *w)
list_del(&zhdr->buddy);
if (WARN_ON(!test_bit(PAGE_STALE, &page->private)))
continue;
- clear_bit(NEEDS_COMPACTING, &page->private);
spin_unlock(&pool->stale_lock);
cancel_work_sync(&zhdr->work);
free_z3fold_page(page);
@@ -624,10 +624,8 @@ lookup:
* stale pages list. cancel_work_sync() can sleep so we must make
* sure it won't be called in case we're in atomic context.
*/
- if (zhdr && (can_sleep || !work_pending(&zhdr->work) ||
- !unlikely(work_busy(&zhdr->work)))) {
+ if (zhdr && (can_sleep || !work_pending(&zhdr->work))) {
list_del(&zhdr->buddy);
- clear_bit(NEEDS_COMPACTING, &page->private);
spin_unlock(&pool->stale_lock);
if (can_sleep)
cancel_work_sync(&zhdr->work);
@@ -875,16 +873,18 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries)
goto next;
}
next:
+ spin_lock(&pool->lock);
if (test_bit(PAGE_HEADLESS, &page->private)) {
if (ret == 0) {
+ spin_unlock(&pool->lock);
free_z3fold_page(page);
return 0;
}
} else if (kref_put(&zhdr->refcount, release_z3fold_page)) {
atomic64_dec(&pool->pages_nr);
+ spin_unlock(&pool->lock);
return 0;
}
- spin_lock(&pool->lock);
/*
* Add to the beginning of LRU.