summaryrefslogtreecommitdiff
path: root/mm
diff options
context:
space:
mode:
Diffstat (limited to 'mm')
-rw-r--r--mm/compaction.c11
-rw-r--r--mm/debug_vm_pgtable.c31
-rw-r--r--mm/huge_memory.c28
-rw-r--r--mm/internal.h6
-rw-r--r--mm/kasan/common.c2
-rw-r--r--mm/memblock.c28
-rw-r--r--mm/memcontrol.c3
-rw-r--r--mm/memory.c23
-rw-r--r--mm/migrate.c13
-rw-r--r--mm/mm_init.c43
-rw-r--r--mm/page_alloc.c11
-rw-r--r--mm/page_table_check.c11
-rw-r--r--mm/shmem.c2
-rw-r--r--mm/slub.c7
-rw-r--r--mm/util.c4
-rw-r--r--mm/vmalloc.c21
16 files changed, 111 insertions, 133 deletions
diff --git a/mm/compaction.c b/mm/compaction.c
index e731d45befc7..739b1bf3d637 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -79,6 +79,13 @@ static inline bool is_via_compact_memory(int order) { return false; }
#define COMPACTION_HPAGE_ORDER (PMD_SHIFT - PAGE_SHIFT)
#endif
+static struct page *mark_allocated_noprof(struct page *page, unsigned int order, gfp_t gfp_flags)
+{
+ post_alloc_hook(page, order, __GFP_MOVABLE);
+ return page;
+}
+#define mark_allocated(...) alloc_hooks(mark_allocated_noprof(__VA_ARGS__))
+
static void split_map_pages(struct list_head *freepages)
{
unsigned int i, order;
@@ -93,7 +100,7 @@ static void split_map_pages(struct list_head *freepages)
nr_pages = 1 << order;
- post_alloc_hook(page, order, __GFP_MOVABLE);
+ mark_allocated(page, order, __GFP_MOVABLE);
if (order)
split_page(page, order);
@@ -122,7 +129,7 @@ static unsigned long release_free_list(struct list_head *freepages)
* Convert free pages into post allocation pages, so
* that we can free them via __free_page.
*/
- post_alloc_hook(page, order, __GFP_MOVABLE);
+ mark_allocated(page, order, __GFP_MOVABLE);
__free_pages(page, order);
if (pfn > high_pfn)
high_pfn = pfn;
diff --git a/mm/debug_vm_pgtable.c b/mm/debug_vm_pgtable.c
index b104a353b532..e4969fb54da3 100644
--- a/mm/debug_vm_pgtable.c
+++ b/mm/debug_vm_pgtable.c
@@ -40,22 +40,7 @@
* Please refer Documentation/mm/arch_pgtable_helpers.rst for the semantics
* expectations that are being validated here. All future changes in here
* or the documentation need to be in sync.
- *
- * On s390 platform, the lower 4 bits are used to identify given page table
- * entry type. But these bits might affect the ability to clear entries with
- * pxx_clear() because of how dynamic page table folding works on s390. So
- * while loading up the entries do not change the lower 4 bits. It does not
- * have affect any other platform. Also avoid the 62nd bit on ppc64 that is
- * used to mark a pte entry.
*/
-#define S390_SKIP_MASK GENMASK(3, 0)
-#if __BITS_PER_LONG == 64
-#define PPC64_SKIP_MASK GENMASK(62, 62)
-#else
-#define PPC64_SKIP_MASK 0x0
-#endif
-#define ARCH_SKIP_MASK (S390_SKIP_MASK | PPC64_SKIP_MASK)
-#define RANDOM_ORVALUE (GENMASK(BITS_PER_LONG - 1, 0) & ~ARCH_SKIP_MASK)
#define RANDOM_NZVALUE GENMASK(7, 0)
struct pgtable_debug_args {
@@ -511,8 +496,7 @@ static void __init pud_clear_tests(struct pgtable_debug_args *args)
return;
pr_debug("Validating PUD clear\n");
- pud = __pud(pud_val(pud) | RANDOM_ORVALUE);
- WRITE_ONCE(*args->pudp, pud);
+ WARN_ON(pud_none(pud));
pud_clear(args->pudp);
pud = READ_ONCE(*args->pudp);
WARN_ON(!pud_none(pud));
@@ -548,8 +532,7 @@ static void __init p4d_clear_tests(struct pgtable_debug_args *args)
return;
pr_debug("Validating P4D clear\n");
- p4d = __p4d(p4d_val(p4d) | RANDOM_ORVALUE);
- WRITE_ONCE(*args->p4dp, p4d);
+ WARN_ON(p4d_none(p4d));
p4d_clear(args->p4dp);
p4d = READ_ONCE(*args->p4dp);
WARN_ON(!p4d_none(p4d));
@@ -582,8 +565,7 @@ static void __init pgd_clear_tests(struct pgtable_debug_args *args)
return;
pr_debug("Validating PGD clear\n");
- pgd = __pgd(pgd_val(pgd) | RANDOM_ORVALUE);
- WRITE_ONCE(*args->pgdp, pgd);
+ WARN_ON(pgd_none(pgd));
pgd_clear(args->pgdp);
pgd = READ_ONCE(*args->pgdp);
WARN_ON(!pgd_none(pgd));
@@ -634,10 +616,8 @@ static void __init pte_clear_tests(struct pgtable_debug_args *args)
if (WARN_ON(!args->ptep))
return;
-#ifndef CONFIG_RISCV
- pte = __pte(pte_val(pte) | RANDOM_ORVALUE);
-#endif
set_pte_at(args->mm, args->vaddr, args->ptep, pte);
+ WARN_ON(pte_none(pte));
flush_dcache_page(page);
barrier();
ptep_clear(args->mm, args->vaddr, args->ptep);
@@ -650,8 +630,7 @@ static void __init pmd_clear_tests(struct pgtable_debug_args *args)
pmd_t pmd = READ_ONCE(*args->pmdp);
pr_debug("Validating PMD clear\n");
- pmd = __pmd(pmd_val(pmd) | RANDOM_ORVALUE);
- WRITE_ONCE(*args->pmdp, pmd);
+ WARN_ON(pmd_none(pmd));
pmd_clear(args->pmdp);
pmd = READ_ONCE(*args->pmdp);
WARN_ON(!pmd_none(pmd));
diff --git a/mm/huge_memory.c b/mm/huge_memory.c
index 89932fd0f62e..db7946a0a28c 100644
--- a/mm/huge_memory.c
+++ b/mm/huge_memory.c
@@ -3009,30 +3009,36 @@ int split_huge_page_to_list_to_order(struct page *page, struct list_head *list,
if (new_order >= folio_order(folio))
return -EINVAL;
- /* Cannot split anonymous THP to order-1 */
- if (new_order == 1 && folio_test_anon(folio)) {
- VM_WARN_ONCE(1, "Cannot split to order-1 folio");
- return -EINVAL;
- }
-
- if (new_order) {
- /* Only swapping a whole PMD-mapped folio is supported */
- if (folio_test_swapcache(folio))
+ if (folio_test_anon(folio)) {
+ /* order-1 is not supported for anonymous THP. */
+ if (new_order == 1) {
+ VM_WARN_ONCE(1, "Cannot split to order-1 folio");
return -EINVAL;
+ }
+ } else if (new_order) {
/* Split shmem folio to non-zero order not supported */
if (shmem_mapping(folio->mapping)) {
VM_WARN_ONCE(1,
"Cannot split shmem folio to non-0 order");
return -EINVAL;
}
- /* No split if the file system does not support large folio */
- if (!mapping_large_folio_support(folio->mapping)) {
+ /*
+ * No split if the file system does not support large folio.
+ * Note that we might still have THPs in such mappings due to
+ * CONFIG_READ_ONLY_THP_FOR_FS. But in that case, the mapping
+ * does not actually support large folios properly.
+ */
+ if (IS_ENABLED(CONFIG_READ_ONLY_THP_FOR_FS) &&
+ !mapping_large_folio_support(folio->mapping)) {
VM_WARN_ONCE(1,
"Cannot split file folio to non-0 order");
return -EINVAL;
}
}
+ /* Only swapping a whole PMD-mapped folio is supported */
+ if (folio_test_swapcache(folio) && new_order)
+ return -EINVAL;
is_hzp = is_huge_zero_folio(folio);
if (is_hzp) {
diff --git a/mm/internal.h b/mm/internal.h
index b2c75b12014e..6902b7dd8509 100644
--- a/mm/internal.h
+++ b/mm/internal.h
@@ -588,7 +588,6 @@ extern void __putback_isolated_page(struct page *page, unsigned int order,
extern void memblock_free_pages(struct page *page, unsigned long pfn,
unsigned int order);
extern void __free_pages_core(struct page *page, unsigned int order);
-extern void kernel_init_pages(struct page *page, int numpages);
/*
* This will have no effect, other than possibly generating a warning, if the
@@ -1436,11 +1435,6 @@ unsigned long shrink_slab(gfp_t gfp_mask, int nid, struct mem_cgroup *memcg,
int priority);
#ifdef CONFIG_64BIT
-/* VM is sealed, in vm_flags */
-#define VM_SEALED _BITUL(63)
-#endif
-
-#ifdef CONFIG_64BIT
static inline int can_do_mseal(unsigned long flags)
{
if (flags)
diff --git a/mm/kasan/common.c b/mm/kasan/common.c
index e7c9a4dc89f8..85e7c6b4575c 100644
--- a/mm/kasan/common.c
+++ b/mm/kasan/common.c
@@ -532,7 +532,7 @@ void __kasan_mempool_unpoison_object(void *ptr, size_t size, unsigned long ip)
return;
/* Unpoison the object and save alloc info for non-kmalloc() allocations. */
- unpoison_slab_object(slab->slab_cache, ptr, size, flags);
+ unpoison_slab_object(slab->slab_cache, ptr, flags, false);
/* Poison the redzone and save alloc info for kmalloc() allocations. */
if (is_kmalloc_cache(slab->slab_cache))
diff --git a/mm/memblock.c b/mm/memblock.c
index 08e9806b1cf9..e81fb68f7f88 100644
--- a/mm/memblock.c
+++ b/mm/memblock.c
@@ -754,7 +754,7 @@ bool __init_memblock memblock_validate_numa_coverage(unsigned long threshold_byt
/* calculate lose page */
for_each_mem_pfn_range(i, MAX_NUMNODES, &start_pfn, &end_pfn, &nid) {
- if (nid == NUMA_NO_NODE)
+ if (!numa_valid_node(nid))
nr_pages += end_pfn - start_pfn;
}
@@ -1061,7 +1061,7 @@ static bool should_skip_region(struct memblock_type *type,
return false;
/* only memory regions are associated with nodes, check it */
- if (nid != NUMA_NO_NODE && nid != m_nid)
+ if (numa_valid_node(nid) && nid != m_nid)
return true;
/* skip hotpluggable memory regions if needed */
@@ -1118,10 +1118,6 @@ void __next_mem_range(u64 *idx, int nid, enum memblock_flags flags,
int idx_a = *idx & 0xffffffff;
int idx_b = *idx >> 32;
- if (WARN_ONCE(nid == MAX_NUMNODES,
- "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n"))
- nid = NUMA_NO_NODE;
-
for (; idx_a < type_a->cnt; idx_a++) {
struct memblock_region *m = &type_a->regions[idx_a];
@@ -1215,9 +1211,6 @@ void __init_memblock __next_mem_range_rev(u64 *idx, int nid,
int idx_a = *idx & 0xffffffff;
int idx_b = *idx >> 32;
- if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n"))
- nid = NUMA_NO_NODE;
-
if (*idx == (u64)ULLONG_MAX) {
idx_a = type_a->cnt - 1;
if (type_b != NULL)
@@ -1303,7 +1296,7 @@ void __init_memblock __next_mem_pfn_range(int *idx, int nid,
if (PFN_UP(r->base) >= PFN_DOWN(r->base + r->size))
continue;
- if (nid == MAX_NUMNODES || nid == r_nid)
+ if (!numa_valid_node(nid) || nid == r_nid)
break;
}
if (*idx >= type->cnt) {
@@ -1339,10 +1332,6 @@ int __init_memblock memblock_set_node(phys_addr_t base, phys_addr_t size,
int start_rgn, end_rgn;
int i, ret;
- if (WARN_ONCE(nid == MAX_NUMNODES,
- "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n"))
- nid = NUMA_NO_NODE;
-
ret = memblock_isolate_range(type, base, size, &start_rgn, &end_rgn);
if (ret)
return ret;
@@ -1452,9 +1441,6 @@ phys_addr_t __init memblock_alloc_range_nid(phys_addr_t size,
enum memblock_flags flags = choose_memblock_flags();
phys_addr_t found;
- if (WARN_ONCE(nid == MAX_NUMNODES, "Usage of MAX_NUMNODES is deprecated. Use NUMA_NO_NODE instead\n"))
- nid = NUMA_NO_NODE;
-
if (!align) {
/* Can't use WARNs this early in boot on powerpc */
dump_stack();
@@ -1467,7 +1453,7 @@ again:
if (found && !memblock_reserve(found, size))
goto done;
- if (nid != NUMA_NO_NODE && !exact_nid) {
+ if (numa_valid_node(nid) && !exact_nid) {
found = memblock_find_in_range_node(size, align, start,
end, NUMA_NO_NODE,
flags);
@@ -1987,7 +1973,7 @@ static void __init_memblock memblock_dump(struct memblock_type *type)
end = base + size - 1;
flags = rgn->flags;
#ifdef CONFIG_NUMA
- if (memblock_get_region_node(rgn) != MAX_NUMNODES)
+ if (numa_valid_node(memblock_get_region_node(rgn)))
snprintf(nid_buf, sizeof(nid_buf), " on node %d",
memblock_get_region_node(rgn));
#endif
@@ -2181,7 +2167,7 @@ static void __init memmap_init_reserved_pages(void)
start = region->base;
end = start + region->size;
- if (nid == NUMA_NO_NODE || nid >= MAX_NUMNODES)
+ if (!numa_valid_node(nid))
nid = early_pfn_to_nid(PFN_DOWN(start));
reserve_bootmem_region(start, end, nid);
@@ -2272,7 +2258,7 @@ static int memblock_debug_show(struct seq_file *m, void *private)
seq_printf(m, "%4d: ", i);
seq_printf(m, "%pa..%pa ", &reg->base, &end);
- if (nid != MAX_NUMNODES)
+ if (numa_valid_node(nid))
seq_printf(m, "%4d ", nid);
else
seq_printf(m, "%4c ", 'x');
diff --git a/mm/memcontrol.c b/mm/memcontrol.c
index 36793e509f47..71fe2a95b8bd 100644
--- a/mm/memcontrol.c
+++ b/mm/memcontrol.c
@@ -7745,8 +7745,7 @@ void __mem_cgroup_uncharge_folios(struct folio_batch *folios)
* @new: Replacement folio.
*
* Charge @new as a replacement folio for @old. @old will
- * be uncharged upon free. This is only used by the page cache
- * (in replace_page_cache_folio()).
+ * be uncharged upon free.
*
* Both folios must be locked, @new->mapping must be set up.
*/
diff --git a/mm/memory.c b/mm/memory.c
index 0f47a533014e..d10e616d7389 100644
--- a/mm/memory.c
+++ b/mm/memory.c
@@ -1507,12 +1507,6 @@ static __always_inline void zap_present_folio_ptes(struct mmu_gather *tlb,
if (unlikely(folio_mapcount(folio) < 0))
print_bad_pte(vma, addr, ptent, page);
}
-
- if (want_init_mlocked_on_free() && folio_test_mlocked(folio) &&
- !delay_rmap && folio_test_anon(folio)) {
- kernel_init_pages(page, folio_nr_pages(folio));
- }
-
if (unlikely(__tlb_remove_folio_pages(tlb, page, nr, delay_rmap))) {
*force_flush = true;
*force_break = true;
@@ -4614,8 +4608,9 @@ vm_fault_t do_set_pmd(struct vm_fault *vmf, struct page *page)
if (!thp_vma_suitable_order(vma, haddr, PMD_ORDER))
return ret;
- if (page != &folio->page || folio_order(folio) != HPAGE_PMD_ORDER)
+ if (folio_order(folio) != HPAGE_PMD_ORDER)
return ret;
+ page = &folio->page;
/*
* Just backoff if any subpage of a THP is corrupted otherwise
@@ -5106,10 +5101,16 @@ static void numa_rebuild_large_mapping(struct vm_fault *vmf, struct vm_area_stru
bool ignore_writable, bool pte_write_upgrade)
{
int nr = pte_pfn(fault_pte) - folio_pfn(folio);
- unsigned long start = max(vmf->address - nr * PAGE_SIZE, vma->vm_start);
- unsigned long end = min(vmf->address + (folio_nr_pages(folio) - nr) * PAGE_SIZE, vma->vm_end);
- pte_t *start_ptep = vmf->pte - (vmf->address - start) / PAGE_SIZE;
- unsigned long addr;
+ unsigned long start, end, addr = vmf->address;
+ unsigned long addr_start = addr - (nr << PAGE_SHIFT);
+ unsigned long pt_start = ALIGN_DOWN(addr, PMD_SIZE);
+ pte_t *start_ptep;
+
+ /* Stay within the VMA and within the page table. */
+ start = max3(addr_start, pt_start, vma->vm_start);
+ end = min3(addr_start + folio_size(folio), pt_start + PMD_SIZE,
+ vma->vm_end);
+ start_ptep = vmf->pte - ((addr - start) >> PAGE_SHIFT);
/* Restore all PTEs' mapping of the large folio */
for (addr = start; addr != end; start_ptep++, addr += PAGE_SIZE) {
diff --git a/mm/migrate.c b/mm/migrate.c
index dd04f578c19c..20cb9f5f7446 100644
--- a/mm/migrate.c
+++ b/mm/migrate.c
@@ -1654,7 +1654,16 @@ static int migrate_pages_batch(struct list_head *from,
/*
* The rare folio on the deferred split list should
- * be split now. It should not count as a failure.
+ * be split now. It should not count as a failure:
+ * but increment nr_failed because, without doing so,
+ * migrate_pages() may report success with (split but
+ * unmigrated) pages still on its fromlist; whereas it
+ * always reports success when its fromlist is empty.
+ * stats->nr_thp_failed should be increased too,
+ * otherwise stats inconsistency will happen when
+ * migrate_pages_batch is called via migrate_pages()
+ * with MIGRATE_SYNC and MIGRATE_ASYNC.
+ *
* Only check it without removing it from the list.
* Since the folio can be on deferred_split_scan()
* local list and removing it can cause the local list
@@ -1669,6 +1678,8 @@ static int migrate_pages_batch(struct list_head *from,
if (nr_pages > 2 &&
!list_empty(&folio->_deferred_list)) {
if (try_split_folio(folio, split_folios) == 0) {
+ nr_failed++;
+ stats->nr_thp_failed += is_thp;
stats->nr_thp_split += is_thp;
stats->nr_split++;
continue;
diff --git a/mm/mm_init.c b/mm/mm_init.c
index f72b852bd5b8..3ec04933f7fd 100644
--- a/mm/mm_init.c
+++ b/mm/mm_init.c
@@ -2523,9 +2523,6 @@ EXPORT_SYMBOL(init_on_alloc);
DEFINE_STATIC_KEY_MAYBE(CONFIG_INIT_ON_FREE_DEFAULT_ON, init_on_free);
EXPORT_SYMBOL(init_on_free);
-DEFINE_STATIC_KEY_MAYBE(CONFIG_INIT_MLOCKED_ON_FREE_DEFAULT_ON, init_mlocked_on_free);
-EXPORT_SYMBOL(init_mlocked_on_free);
-
static bool _init_on_alloc_enabled_early __read_mostly
= IS_ENABLED(CONFIG_INIT_ON_ALLOC_DEFAULT_ON);
static int __init early_init_on_alloc(char *buf)
@@ -2543,14 +2540,6 @@ static int __init early_init_on_free(char *buf)
}
early_param("init_on_free", early_init_on_free);
-static bool _init_mlocked_on_free_enabled_early __read_mostly
- = IS_ENABLED(CONFIG_INIT_MLOCKED_ON_FREE_DEFAULT_ON);
-static int __init early_init_mlocked_on_free(char *buf)
-{
- return kstrtobool(buf, &_init_mlocked_on_free_enabled_early);
-}
-early_param("init_mlocked_on_free", early_init_mlocked_on_free);
-
DEFINE_STATIC_KEY_MAYBE(CONFIG_DEBUG_VM, check_pages_enabled);
/*
@@ -2578,21 +2567,12 @@ static void __init mem_debugging_and_hardening_init(void)
}
#endif
- if ((_init_on_alloc_enabled_early || _init_on_free_enabled_early ||
- _init_mlocked_on_free_enabled_early) &&
+ if ((_init_on_alloc_enabled_early || _init_on_free_enabled_early) &&
page_poisoning_requested) {
pr_info("mem auto-init: CONFIG_PAGE_POISONING is on, "
- "will take precedence over init_on_alloc, init_on_free "
- "and init_mlocked_on_free\n");
+ "will take precedence over init_on_alloc and init_on_free\n");
_init_on_alloc_enabled_early = false;
_init_on_free_enabled_early = false;
- _init_mlocked_on_free_enabled_early = false;
- }
-
- if (_init_mlocked_on_free_enabled_early && _init_on_free_enabled_early) {
- pr_info("mem auto-init: init_on_free is on, "
- "will take precedence over init_mlocked_on_free\n");
- _init_mlocked_on_free_enabled_early = false;
}
if (_init_on_alloc_enabled_early) {
@@ -2609,17 +2589,9 @@ static void __init mem_debugging_and_hardening_init(void)
static_branch_disable(&init_on_free);
}
- if (_init_mlocked_on_free_enabled_early) {
- want_check_pages = true;
- static_branch_enable(&init_mlocked_on_free);
- } else {
- static_branch_disable(&init_mlocked_on_free);
- }
-
- if (IS_ENABLED(CONFIG_KMSAN) && (_init_on_alloc_enabled_early ||
- _init_on_free_enabled_early || _init_mlocked_on_free_enabled_early))
- pr_info("mem auto-init: please make sure init_on_alloc, init_on_free and "
- "init_mlocked_on_free are disabled when running KMSAN\n");
+ if (IS_ENABLED(CONFIG_KMSAN) &&
+ (_init_on_alloc_enabled_early || _init_on_free_enabled_early))
+ pr_info("mem auto-init: please make sure init_on_alloc and init_on_free are disabled when running KMSAN\n");
#ifdef CONFIG_DEBUG_PAGEALLOC
if (debug_pagealloc_enabled()) {
@@ -2658,10 +2630,9 @@ static void __init report_meminit(void)
else
stack = "off";
- pr_info("mem auto-init: stack:%s, heap alloc:%s, heap free:%s, mlocked free:%s\n",
+ pr_info("mem auto-init: stack:%s, heap alloc:%s, heap free:%s\n",
stack, want_init_on_alloc(GFP_KERNEL) ? "on" : "off",
- want_init_on_free() ? "on" : "off",
- want_init_mlocked_on_free() ? "on" : "off");
+ want_init_on_free() ? "on" : "off");
if (want_init_on_free())
pr_info("mem auto-init: clearing system memory may take some time...\n");
}
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 222299b5c0e6..9ecf99190ea2 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -504,10 +504,15 @@ out:
static inline unsigned int order_to_pindex(int migratetype, int order)
{
+ bool __maybe_unused movable;
+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
if (order > PAGE_ALLOC_COSTLY_ORDER) {
VM_BUG_ON(order != HPAGE_PMD_ORDER);
- return NR_LOWORDER_PCP_LISTS;
+
+ movable = migratetype == MIGRATE_MOVABLE;
+
+ return NR_LOWORDER_PCP_LISTS + movable;
}
#else
VM_BUG_ON(order > PAGE_ALLOC_COSTLY_ORDER);
@@ -521,7 +526,7 @@ static inline int pindex_to_order(unsigned int pindex)
int order = pindex / MIGRATE_PCPTYPES;
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
- if (pindex == NR_LOWORDER_PCP_LISTS)
+ if (pindex >= NR_LOWORDER_PCP_LISTS)
order = HPAGE_PMD_ORDER;
#else
VM_BUG_ON(order > PAGE_ALLOC_COSTLY_ORDER);
@@ -1016,7 +1021,7 @@ static inline bool should_skip_kasan_poison(struct page *page)
return page_kasan_tag(page) == KASAN_TAG_KERNEL;
}
-void kernel_init_pages(struct page *page, int numpages)
+static void kernel_init_pages(struct page *page, int numpages)
{
int i;
diff --git a/mm/page_table_check.c b/mm/page_table_check.c
index 4169576bed72..509c6ef8de40 100644
--- a/mm/page_table_check.c
+++ b/mm/page_table_check.c
@@ -73,6 +73,9 @@ static void page_table_check_clear(unsigned long pfn, unsigned long pgcnt)
page = pfn_to_page(pfn);
page_ext = page_ext_get(page);
+ if (!page_ext)
+ return;
+
BUG_ON(PageSlab(page));
anon = PageAnon(page);
@@ -110,6 +113,9 @@ static void page_table_check_set(unsigned long pfn, unsigned long pgcnt,
page = pfn_to_page(pfn);
page_ext = page_ext_get(page);
+ if (!page_ext)
+ return;
+
BUG_ON(PageSlab(page));
anon = PageAnon(page);
@@ -140,7 +146,10 @@ void __page_table_check_zero(struct page *page, unsigned int order)
BUG_ON(PageSlab(page));
page_ext = page_ext_get(page);
- BUG_ON(!page_ext);
+
+ if (!page_ext)
+ return;
+
for (i = 0; i < (1ul << order); i++) {
struct page_table_check *ptc = get_page_table_check(page_ext);
diff --git a/mm/shmem.c b/mm/shmem.c
index f5d60436b604..a8b181a63402 100644
--- a/mm/shmem.c
+++ b/mm/shmem.c
@@ -1786,7 +1786,7 @@ static int shmem_replace_folio(struct folio **foliop, gfp_t gfp,
xa_lock_irq(&swap_mapping->i_pages);
error = shmem_replace_entry(swap_mapping, swap_index, old, new);
if (!error) {
- mem_cgroup_migrate(old, new);
+ mem_cgroup_replace_folio(old, new);
__lruvec_stat_mod_folio(new, NR_FILE_PAGES, 1);
__lruvec_stat_mod_folio(new, NR_SHMEM, 1);
__lruvec_stat_mod_folio(old, NR_FILE_PAGES, -1);
diff --git a/mm/slub.c b/mm/slub.c
index 1373ac365a46..4927edec6a8c 100644
--- a/mm/slub.c
+++ b/mm/slub.c
@@ -3902,7 +3902,6 @@ bool slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
unsigned int orig_size)
{
unsigned int zero_size = s->object_size;
- struct slabobj_ext *obj_exts;
bool kasan_init = init;
size_t i;
gfp_t init_flags = flags & gfp_allowed_mask;
@@ -3945,9 +3944,11 @@ bool slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
kmemleak_alloc_recursive(p[i], s->object_size, 1,
s->flags, init_flags);
kmsan_slab_alloc(s, p[i], init_flags);
+#ifdef CONFIG_MEM_ALLOC_PROFILING
if (need_slab_obj_ext()) {
+ struct slabobj_ext *obj_exts;
+
obj_exts = prepare_slab_obj_exts_hook(s, flags, p[i]);
-#ifdef CONFIG_MEM_ALLOC_PROFILING
/*
* Currently obj_exts is used only for allocation profiling.
* If other users appear then mem_alloc_profiling_enabled()
@@ -3955,8 +3956,8 @@ bool slab_post_alloc_hook(struct kmem_cache *s, struct list_lru *lru,
*/
if (likely(obj_exts))
alloc_tag_add(&obj_exts->ref, current->alloc_tag, s->size);
-#endif
}
+#endif
}
return memcg_slab_post_alloc_hook(s, lru, flags, size, p);
diff --git a/mm/util.c b/mm/util.c
index 6c3e6710e4de..fe723241b66f 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -139,14 +139,14 @@ EXPORT_SYMBOL(kmemdup_noprof);
* kmemdup_array - duplicate a given array.
*
* @src: array to duplicate.
- * @element_size: size of each element of array.
* @count: number of elements to duplicate from array.
+ * @element_size: size of each element of array.
* @gfp: GFP mask to use.
*
* Return: duplicated array of @src or %NULL in case of error,
* result is physically contiguous. Use kfree() to free.
*/
-void *kmemdup_array(const void *src, size_t element_size, size_t count, gfp_t gfp)
+void *kmemdup_array(const void *src, size_t count, size_t element_size, gfp_t gfp)
{
return kmemdup(src, size_mul(element_size, count), gfp);
}
diff --git a/mm/vmalloc.c b/mm/vmalloc.c
index 45e1506d58c3..d0cbdd7c1e5b 100644
--- a/mm/vmalloc.c
+++ b/mm/vmalloc.c
@@ -2498,6 +2498,7 @@ struct vmap_block {
struct list_head free_list;
struct rcu_head rcu_head;
struct list_head purge;
+ unsigned int cpu;
};
/* Queue of free and dirty vmap blocks, for allocation and flushing purposes */
@@ -2625,8 +2626,15 @@ static void *new_vmap_block(unsigned int order, gfp_t gfp_mask)
free_vmap_area(va);
return ERR_PTR(err);
}
-
- vbq = raw_cpu_ptr(&vmap_block_queue);
+ /*
+ * list_add_tail_rcu could happened in another core
+ * rather than vb->cpu due to task migration, which
+ * is safe as list_add_tail_rcu will ensure the list's
+ * integrity together with list_for_each_rcu from read
+ * side.
+ */
+ vb->cpu = raw_smp_processor_id();
+ vbq = per_cpu_ptr(&vmap_block_queue, vb->cpu);
spin_lock(&vbq->lock);
list_add_tail_rcu(&vb->free_list, &vbq->free);
spin_unlock(&vbq->lock);
@@ -2654,9 +2662,10 @@ static void free_vmap_block(struct vmap_block *vb)
}
static bool purge_fragmented_block(struct vmap_block *vb,
- struct vmap_block_queue *vbq, struct list_head *purge_list,
- bool force_purge)
+ struct list_head *purge_list, bool force_purge)
{
+ struct vmap_block_queue *vbq = &per_cpu(vmap_block_queue, vb->cpu);
+
if (vb->free + vb->dirty != VMAP_BBMAP_BITS ||
vb->dirty == VMAP_BBMAP_BITS)
return false;
@@ -2704,7 +2713,7 @@ static void purge_fragmented_blocks(int cpu)
continue;
spin_lock(&vb->lock);
- purge_fragmented_block(vb, vbq, &purge, true);
+ purge_fragmented_block(vb, &purge, true);
spin_unlock(&vb->lock);
}
rcu_read_unlock();
@@ -2841,7 +2850,7 @@ static void _vm_unmap_aliases(unsigned long start, unsigned long end, int flush)
* not purgeable, check whether there is dirty
* space to be flushed.
*/
- if (!purge_fragmented_block(vb, vbq, &purge_list, false) &&
+ if (!purge_fragmented_block(vb, &purge_list, false) &&
vb->dirty_max && vb->dirty != VMAP_BBMAP_BITS) {
unsigned long va_start = vb->va->va_start;
unsigned long s, e;