diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2020-10-14 19:57:24 +0300 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2020-10-14 19:57:24 +0300 |
commit | d5660df4a555a98154da850fb61f118269d0a283 (patch) | |
tree | b2c5f3a15c300499df930321c32fd7d288467d6b /include | |
parent | b5fc7a89e58bcc059a3d5e4db79c481fb437de59 (diff) | |
parent | f1f4f3ab54e9a52c7610c998ff8255f019742e67 (diff) | |
download | linux-d5660df4a555a98154da850fb61f118269d0a283.tar.xz |
Merge branch 'akpm' (patches from Andrew)
Merge misc updates from Andrew Morton:
"181 patches.
Subsystems affected by this patch series: kbuild, scripts, ntfs,
ocfs2, vfs, mm (slab, slub, kmemleak, dax, debug, pagecache, fadvise,
gup, swap, memremap, memcg, selftests, pagemap, mincore, hmm, dma,
memory-failure, vmallo and migration)"
* emailed patches from Andrew Morton <akpm@linux-foundation.org>: (181 commits)
mm/migrate: remove obsolete comment about device public
mm/migrate: remove cpages-- in migrate_vma_finalize()
mm, oom_adj: don't loop through tasks in __set_oom_adj when not necessary
memblock: use separate iterators for memory and reserved regions
memblock: implement for_each_reserved_mem_region() using __next_mem_region()
memblock: remove unused memblock_mem_size()
x86/setup: simplify reserve_crashkernel()
x86/setup: simplify initrd relocation and reservation
arch, drivers: replace for_each_membock() with for_each_mem_range()
arch, mm: replace for_each_memblock() with for_each_mem_pfn_range()
memblock: reduce number of parameters in for_each_mem_range()
memblock: make memblock_debug and related functionality private
memblock: make for_each_memblock_type() iterator private
mircoblaze: drop unneeded NUMA and sparsemem initializations
riscv: drop unneeded node initialization
h8300, nds32, openrisc: simplify detection of memory extents
arm64: numa: simplify dummy_numa_init()
arm, xtensa: simplify initialization of high memory pages
dma-contiguous: simplify cma_early_percent_memory()
KVM: PPC: Book3S HV: simplify kvm_cma_reserve()
...
Diffstat (limited to 'include')
30 files changed, 269 insertions, 131 deletions
diff --git a/include/acpi/acpi_numa.h b/include/acpi/acpi_numa.h index fdebcfc6c8df..0e9302285f14 100644 --- a/include/acpi/acpi_numa.h +++ b/include/acpi/acpi_numa.h @@ -17,10 +17,22 @@ extern int pxm_to_node(int); extern int node_to_pxm(int); extern int acpi_map_pxm_to_node(int); extern unsigned char acpi_srat_revision; -extern int acpi_numa __initdata; +extern void disable_srat(void); extern void bad_srat(void); extern int srat_disabled(void); +#else /* CONFIG_ACPI_NUMA */ +static inline void disable_srat(void) +{ +} #endif /* CONFIG_ACPI_NUMA */ + +#ifdef CONFIG_ACPI_HMAT +extern void disable_hmat(void); +#else /* CONFIG_ACPI_HMAT */ +static inline void disable_hmat(void) +{ +} +#endif /* CONFIG_ACPI_HMAT */ #endif /* __ACP_NUMA_H */ diff --git a/include/kunit/test.h b/include/kunit/test.h index 59f3144f009a..3391f38389f8 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -224,6 +224,11 @@ struct kunit { struct list_head resources; /* Protected by lock. */ }; +static inline void kunit_set_failure(struct kunit *test) +{ + WRITE_ONCE(test->success, false); +} + void kunit_init_test(struct kunit *test, const char *name, char *log); int kunit_run_tests(struct kunit_suite *suite); diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 64ae25c59d55..cfa8c0015863 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -709,6 +709,8 @@ static inline u64 acpi_arch_get_root_pointer(void) #define ACPI_HANDLE_FWNODE(fwnode) (NULL) #define ACPI_DEVICE_CLASS(_cls, _msk) .cls = (0), .cls_msk = (0), +#include <acpi/acpi_numa.h> + struct fwnode_handle; static inline bool acpi_dev_found(const char *hid) diff --git a/include/linux/compaction.h b/include/linux/compaction.h index 25a521d299c1..1de5a1151ee7 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -29,9 +29,6 @@ enum compact_result { /* compaction didn't start as it was deferred due to past failures */ COMPACT_DEFERRED, - /* compaction not active last round */ - COMPACT_INACTIVE = COMPACT_DEFERRED, - /* For more detailed tracepoint output - internal to compaction */ COMPACT_NO_SUITABLE_PAGE, /* compaction should continue to another pageblock */ diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index cee0c728d39a..230604e7f057 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -3,6 +3,14 @@ #error "Please don't include <linux/compiler-clang.h> directly, include <linux/compiler.h> instead." #endif +#define CLANG_VERSION (__clang_major__ * 10000 \ + + __clang_minor__ * 100 \ + + __clang_patchlevel__) + +#if CLANG_VERSION < 100001 +# error Sorry, your version of Clang is too old - please use 10.0.1 or newer. +#endif + /* Compiler specific definitions for Clang compiler */ /* same as gcc, this was present in clang-2.6 so we can assume it works diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 7a3769040d7d..d1e3c6896b71 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -12,7 +12,7 @@ /* https://gcc.gnu.org/bugzilla/show_bug.cgi?id=58145 */ #if GCC_VERSION < 40900 -# error Sorry, your compiler is too old - please upgrade it. +# error Sorry, your version of GCC is too old - please use 4.9 or newer. #endif /* Optimization barrier */ diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 92ef163a7479..ac45f6d40d39 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -155,7 +155,7 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, extern typeof(sym) sym; \ static const unsigned long __kentry_##sym \ __used \ - __section("___kentry" "+" #sym ) \ + __attribute__((__section__("___kentry+" #sym))) \ = (unsigned long)&sym; #endif diff --git a/include/linux/dax.h b/include/linux/dax.h index 43b39ab9de1a..4ec0bbf86205 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -238,4 +238,12 @@ static inline bool dax_mapping(struct address_space *mapping) return mapping->host && IS_DAX(mapping->host); } +#ifdef CONFIG_DEV_DAX_HMEM_DEVICES +void hmem_register_device(int target_nid, struct resource *r); +#else +static inline void hmem_register_device(int target_nid, struct resource *r) +{ +} +#endif + #endif diff --git a/include/linux/export.h b/include/linux/export.h index fceb5e855717..8933ff6ad23a 100644 --- a/include/linux/export.h +++ b/include/linux/export.h @@ -130,7 +130,7 @@ struct kernel_symbol { * discarded in the final link stage. */ #define __ksym_marker(sym) \ - static int __ksym_marker_##sym[0] __section(".discard.ksym") __used + static int __ksym_marker_##sym[0] __section(.discard.ksym) __used #define __EXPORT_SYMBOL(sym, sec, ns) \ __ksym_marker(sym); \ diff --git a/include/linux/fs.h b/include/linux/fs.h index 2e621d28cd65..5815f7d4dbf4 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2581,6 +2581,10 @@ extern bool is_bad_inode(struct inode *); unsigned long invalidate_mapping_pages(struct address_space *mapping, pgoff_t start, pgoff_t end); +void invalidate_mapping_pagevec(struct address_space *mapping, + pgoff_t start, pgoff_t end, + unsigned long *nr_pagevec); + static inline void invalidate_remote_inode(struct inode *inode) { if (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 67a0774e080b..07e481993ef5 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -238,7 +238,9 @@ struct vm_area_struct; * %__GFP_FOO flags as necessary. * * %GFP_ATOMIC users can not sleep and need the allocation to succeed. A lower - * watermark is applied to allow access to "atomic reserves" + * watermark is applied to allow access to "atomic reserves". + * The current implementation doesn't support NMI and few other strict + * non-preemptive contexts (e.g. raw_spin_lock). The same applies to %GFP_NOWAIT. * * %GFP_KERNEL is typical for kernel-internal allocations. The caller requires * %ZONE_NORMAL or a lower zone for direct access but can direct reclaim. @@ -560,8 +562,6 @@ extern struct page *alloc_pages_vma(gfp_t gfp_mask, int order, #define alloc_page(gfp_mask) alloc_pages(gfp_mask, 0) #define alloc_page_vma(gfp_mask, vma, addr) \ alloc_pages_vma(gfp_mask, 0, vma, addr, numa_node_id(), false) -#define alloc_page_vma_node(gfp_mask, vma, addr, node) \ - alloc_pages_vma(gfp_mask, 0, vma, addr, node, false) extern unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order); extern unsigned long get_zeroed_page(gfp_t gfp_mask); diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 8a8bc46a2432..0365aa97f8e7 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -38,9 +38,6 @@ extern int zap_huge_pmd(struct mmu_gather *tlb, extern int zap_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma, pud_t *pud, unsigned long addr); -extern int mincore_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, - unsigned long addr, unsigned long end, - unsigned char *vec); extern bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr, unsigned long new_addr, pmd_t *old_pmd, pmd_t *new_pmd); diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 087fba34b209..30d343b4a40a 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -14,6 +14,12 @@ struct task_struct; #include <linux/pgtable.h> #include <asm/kasan.h> +/* kasan_data struct is used in KUnit tests for KASAN expected failures */ +struct kunit_kasan_expectation { + bool report_expected; + bool report_found; +}; + extern unsigned char kasan_early_shadow_page[PAGE_SIZE]; extern pte_t kasan_early_shadow_pte[PTRS_PER_PTE]; extern pmd_t kasan_early_shadow_pmd[PTRS_PER_PMD]; diff --git a/include/linux/memblock.h b/include/linux/memblock.h index 9d925db0d355..ef131255cedc 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -86,7 +86,6 @@ struct memblock { }; extern struct memblock memblock; -extern int memblock_debug; #ifndef CONFIG_ARCH_KEEP_MEMBLOCK #define __init_memblock __meminit @@ -98,9 +97,6 @@ void memblock_discard(void); static inline void memblock_discard(void) {} #endif -#define memblock_dbg(fmt, ...) \ - if (memblock_debug) printk(KERN_INFO pr_fmt(fmt), ##__VA_ARGS__) - phys_addr_t memblock_find_in_range(phys_addr_t start, phys_addr_t end, phys_addr_t size, phys_addr_t align); void memblock_allow_resize(void); @@ -136,9 +132,6 @@ void __next_mem_range_rev(u64 *idx, int nid, enum memblock_flags flags, struct memblock_type *type_b, phys_addr_t *out_start, phys_addr_t *out_end, int *out_nid); -void __next_reserved_mem_region(u64 *idx, phys_addr_t *out_start, - phys_addr_t *out_end); - void __memblock_free_late(phys_addr_t base, phys_addr_t size); #ifdef CONFIG_HAVE_MEMBLOCK_PHYS_MAP @@ -166,7 +159,7 @@ static inline void __next_physmem_range(u64 *idx, struct memblock_type *type, #endif /* CONFIG_HAVE_MEMBLOCK_PHYS_MAP */ /** - * for_each_mem_range - iterate through memblock areas from type_a and not + * __for_each_mem_range - iterate through memblock areas from type_a and not * included in type_b. Or just type_a if type_b is NULL. * @i: u64 used as loop variable * @type_a: ptr to memblock_type to iterate @@ -177,7 +170,7 @@ static inline void __next_physmem_range(u64 *idx, struct memblock_type *type, * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL * @p_nid: ptr to int for nid of the range, can be %NULL */ -#define for_each_mem_range(i, type_a, type_b, nid, flags, \ +#define __for_each_mem_range(i, type_a, type_b, nid, flags, \ p_start, p_end, p_nid) \ for (i = 0, __next_mem_range(&i, nid, flags, type_a, type_b, \ p_start, p_end, p_nid); \ @@ -186,7 +179,7 @@ static inline void __next_physmem_range(u64 *idx, struct memblock_type *type, p_start, p_end, p_nid)) /** - * for_each_mem_range_rev - reverse iterate through memblock areas from + * __for_each_mem_range_rev - reverse iterate through memblock areas from * type_a and not included in type_b. Or just type_a if type_b is NULL. * @i: u64 used as loop variable * @type_a: ptr to memblock_type to iterate @@ -197,17 +190,38 @@ static inline void __next_physmem_range(u64 *idx, struct memblock_type *type, * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL * @p_nid: ptr to int for nid of the range, can be %NULL */ -#define for_each_mem_range_rev(i, type_a, type_b, nid, flags, \ - p_start, p_end, p_nid) \ +#define __for_each_mem_range_rev(i, type_a, type_b, nid, flags, \ + p_start, p_end, p_nid) \ for (i = (u64)ULLONG_MAX, \ - __next_mem_range_rev(&i, nid, flags, type_a, type_b,\ + __next_mem_range_rev(&i, nid, flags, type_a, type_b, \ p_start, p_end, p_nid); \ i != (u64)ULLONG_MAX; \ __next_mem_range_rev(&i, nid, flags, type_a, type_b, \ p_start, p_end, p_nid)) /** - * for_each_reserved_mem_region - iterate over all reserved memblock areas + * for_each_mem_range - iterate through memory areas. + * @i: u64 used as loop variable + * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL + * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL + */ +#define for_each_mem_range(i, p_start, p_end) \ + __for_each_mem_range(i, &memblock.memory, NULL, NUMA_NO_NODE, \ + MEMBLOCK_NONE, p_start, p_end, NULL) + +/** + * for_each_mem_range_rev - reverse iterate through memblock areas from + * type_a and not included in type_b. Or just type_a if type_b is NULL. + * @i: u64 used as loop variable + * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL + * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL + */ +#define for_each_mem_range_rev(i, p_start, p_end) \ + __for_each_mem_range_rev(i, &memblock.memory, NULL, NUMA_NO_NODE, \ + MEMBLOCK_NONE, p_start, p_end, NULL) + +/** + * for_each_reserved_mem_range - iterate over all reserved memblock areas * @i: u64 used as loop variable * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL @@ -215,10 +229,9 @@ static inline void __next_physmem_range(u64 *idx, struct memblock_type *type, * Walks over reserved areas of memblock. Available as soon as memblock * is initialized. */ -#define for_each_reserved_mem_region(i, p_start, p_end) \ - for (i = 0UL, __next_reserved_mem_region(&i, p_start, p_end); \ - i != (u64)ULLONG_MAX; \ - __next_reserved_mem_region(&i, p_start, p_end)) +#define for_each_reserved_mem_range(i, p_start, p_end) \ + __for_each_mem_range(i, &memblock.reserved, NULL, NUMA_NO_NODE, \ + MEMBLOCK_NONE, p_start, p_end, NULL) static inline bool memblock_is_hotpluggable(struct memblock_region *m) { @@ -311,8 +324,8 @@ int __init deferred_page_init_max_threads(const struct cpumask *node_cpumask); * soon as memblock is initialized. */ #define for_each_free_mem_range(i, nid, flags, p_start, p_end, p_nid) \ - for_each_mem_range(i, &memblock.memory, &memblock.reserved, \ - nid, flags, p_start, p_end, p_nid) + __for_each_mem_range(i, &memblock.memory, &memblock.reserved, \ + nid, flags, p_start, p_end, p_nid) /** * for_each_free_mem_range_reverse - rev-iterate through free memblock areas @@ -328,8 +341,8 @@ int __init deferred_page_init_max_threads(const struct cpumask *node_cpumask); */ #define for_each_free_mem_range_reverse(i, nid, flags, p_start, p_end, \ p_nid) \ - for_each_mem_range_rev(i, &memblock.memory, &memblock.reserved, \ - nid, flags, p_start, p_end, p_nid) + __for_each_mem_range_rev(i, &memblock.memory, &memblock.reserved, \ + nid, flags, p_start, p_end, p_nid) int memblock_set_node(phys_addr_t base, phys_addr_t size, struct memblock_type *type, int nid); @@ -464,7 +477,6 @@ static inline bool memblock_bottom_up(void) phys_addr_t memblock_phys_mem_size(void); phys_addr_t memblock_reserved_size(void); -phys_addr_t memblock_mem_size(unsigned long limit_pfn); phys_addr_t memblock_start_of_DRAM(void); phys_addr_t memblock_end_of_DRAM(void); void memblock_enforce_memory_limit(phys_addr_t memory_limit); @@ -476,13 +488,7 @@ bool memblock_is_region_memory(phys_addr_t base, phys_addr_t size); bool memblock_is_reserved(phys_addr_t addr); bool memblock_is_region_reserved(phys_addr_t base, phys_addr_t size); -extern void __memblock_dump_all(void); - -static inline void memblock_dump_all(void) -{ - if (memblock_debug) - __memblock_dump_all(); -} +void memblock_dump_all(void); /** * memblock_set_current_limit - Set the current allocation limit to allow @@ -547,15 +553,23 @@ static inline unsigned long memblock_region_reserved_end_pfn(const struct memblo return PFN_UP(reg->base + reg->size); } -#define for_each_memblock(memblock_type, region) \ - for (region = memblock.memblock_type.regions; \ - region < (memblock.memblock_type.regions + memblock.memblock_type.cnt); \ +/** + * for_each_mem_region - itereate over memory regions + * @region: loop variable + */ +#define for_each_mem_region(region) \ + for (region = memblock.memory.regions; \ + region < (memblock.memory.regions + memblock.memory.cnt); \ region++) -#define for_each_memblock_type(i, memblock_type, rgn) \ - for (i = 0, rgn = &memblock_type->regions[0]; \ - i < memblock_type->cnt; \ - i++, rgn = &memblock_type->regions[i]) +/** + * for_each_reserved_mem_region - itereate over reserved memory regions + * @region: loop variable + */ +#define for_each_reserved_mem_region(region) \ + for (region = memblock.reserved.regions; \ + region < (memblock.reserved.regions + memblock.reserved.cnt); \ + region++) extern void *alloc_large_system_hash(const char *tablename, unsigned long bucketsize, diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index d0b036123c6a..6ef4a552e09d 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -215,13 +215,16 @@ struct mem_cgroup { struct mem_cgroup_id id; /* Accounted resources */ - struct page_counter memory; - struct page_counter swap; + struct page_counter memory; /* Both v1 & v2 */ + + union { + struct page_counter swap; /* v2 only */ + struct page_counter memsw; /* v1 only */ + }; /* Legacy consumer-oriented counters */ - struct page_counter memsw; - struct page_counter kmem; - struct page_counter tcpmem; + struct page_counter kmem; /* v1 only */ + struct page_counter tcpmem; /* v1 only */ /* Range enforcement for interrupt charges */ struct work_struct high_work; diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 375515803cd8..c0faa7a30c46 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -149,15 +149,6 @@ int add_pages(int nid, unsigned long start_pfn, unsigned long nr_pages, struct mhp_params *params); #endif /* ARCH_HAS_ADD_PAGES */ -#ifdef CONFIG_NUMA -extern int memory_add_physaddr_to_nid(u64 start); -#else -static inline int memory_add_physaddr_to_nid(u64 start) -{ - return 0; -} -#endif - #ifdef CONFIG_HAVE_ARCH_NODEDATA_EXTENSION /* * For supporting node-hotadd, we have to allocate a new pgdat. @@ -284,6 +275,20 @@ static inline bool movable_node_is_enabled(void) } #endif /* ! CONFIG_MEMORY_HOTPLUG */ +#ifdef CONFIG_NUMA +extern int memory_add_physaddr_to_nid(u64 start); +extern int phys_to_target_node(u64 start); +#else +static inline int memory_add_physaddr_to_nid(u64 start) +{ + return 0; +} +static inline int phys_to_target_node(u64 start) +{ + return 0; +} +#endif + #if defined(CONFIG_MEMORY_HOTPLUG) || defined(CONFIG_DEFERRED_STRUCT_PAGE_INIT) /* * pgdat resizing functions diff --git a/include/linux/memremap.h b/include/linux/memremap.h index e5862746751b..79c49e7f5c30 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -1,6 +1,7 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_MEMREMAP_H_ #define _LINUX_MEMREMAP_H_ +#include <linux/range.h> #include <linux/ioport.h> #include <linux/percpu-refcount.h> @@ -93,7 +94,6 @@ struct dev_pagemap_ops { /** * struct dev_pagemap - metadata for ZONE_DEVICE mappings * @altmap: pre-allocated/reserved memory for vmemmap allocations - * @res: physical address range covered by @ref * @ref: reference count that pins the devm_memremap_pages() mapping * @internal_ref: internal reference if @ref is not provided by the caller * @done: completion for @internal_ref @@ -103,10 +103,12 @@ struct dev_pagemap_ops { * @owner: an opaque pointer identifying the entity that manages this * instance. Used by various helpers to make sure that no * foreign ZONE_DEVICE memory is accessed. + * @nr_range: number of ranges to be mapped + * @range: range to be mapped when nr_range == 1 + * @ranges: array of ranges to be mapped when nr_range > 1 */ struct dev_pagemap { struct vmem_altmap altmap; - struct resource res; struct percpu_ref *ref; struct percpu_ref internal_ref; struct completion done; @@ -114,6 +116,11 @@ struct dev_pagemap { unsigned int flags; const struct dev_pagemap_ops *ops; void *owner; + int nr_range; + union { + struct range range; + struct range ranges[0]; + }; }; static inline struct vmem_altmap *pgmap_altmap(struct dev_pagemap *pgmap) diff --git a/include/linux/mm.h b/include/linux/mm.h index 13dc9b9ccf8e..620961e4f32b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -791,7 +791,7 @@ static inline void *kvcalloc(size_t n, size_t size, gfp_t flags) extern void kvfree(const void *addr); extern void kvfree_sensitive(const void *addr, size_t len); -static inline int head_mapcount(struct page *head) +static inline int head_compound_mapcount(struct page *head) { return atomic_read(compound_mapcount_ptr(head)) + 1; } @@ -805,7 +805,7 @@ static inline int compound_mapcount(struct page *page) { VM_BUG_ON_PAGE(!PageCompound(page), page); page = compound_head(page); - return head_mapcount(page); + return head_compound_mapcount(page); } /* @@ -918,7 +918,7 @@ static inline bool hpage_pincount_available(struct page *page) return PageCompound(page) && compound_order(page) > 1; } -static inline int head_pincount(struct page *head) +static inline int head_compound_pincount(struct page *head) { return atomic_read(compound_pincount_ptr(head)); } @@ -927,7 +927,7 @@ static inline int compound_pincount(struct page *page) { VM_BUG_ON_PAGE(!hpage_pincount_available(page), page); page = compound_head(page); - return head_pincount(page); + return head_compound_pincount(page); } static inline void set_compound_order(struct page *page, unsigned int order) @@ -1653,8 +1653,8 @@ struct mmu_notifier_range; void free_pgd_range(struct mmu_gather *tlb, unsigned long addr, unsigned long end, unsigned long floor, unsigned long ceiling); -int copy_page_range(struct mm_struct *dst, struct mm_struct *src, - struct vm_area_struct *vma, struct vm_area_struct *new); +int +copy_page_range(struct vm_area_struct *dst_vma, struct vm_area_struct *src_vma); int follow_pte_pmd(struct mm_struct *mm, unsigned long address, struct mmu_notifier_range *range, pte_t **ptepp, pmd_t **pmdpp, spinlock_t **ptlp); @@ -2254,7 +2254,7 @@ static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd) return ptlock_ptr(pmd_to_page(pmd)); } -static inline bool pgtable_pmd_page_ctor(struct page *page) +static inline bool pmd_ptlock_init(struct page *page) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE page->pmd_huge_pte = NULL; @@ -2262,7 +2262,7 @@ static inline bool pgtable_pmd_page_ctor(struct page *page) return ptlock_init(page); } -static inline void pgtable_pmd_page_dtor(struct page *page) +static inline void pmd_ptlock_free(struct page *page) { #ifdef CONFIG_TRANSPARENT_HUGEPAGE VM_BUG_ON_PAGE(page->pmd_huge_pte, page); @@ -2279,8 +2279,8 @@ static inline spinlock_t *pmd_lockptr(struct mm_struct *mm, pmd_t *pmd) return &mm->page_table_lock; } -static inline bool pgtable_pmd_page_ctor(struct page *page) { return true; } -static inline void pgtable_pmd_page_dtor(struct page *page) {} +static inline bool pmd_ptlock_init(struct page *page) { return true; } +static inline void pmd_ptlock_free(struct page *page) {} #define pmd_huge_pte(mm, pmd) ((mm)->pmd_huge_pte) @@ -2293,6 +2293,22 @@ static inline spinlock_t *pmd_lock(struct mm_struct *mm, pmd_t *pmd) return ptl; } +static inline bool pgtable_pmd_page_ctor(struct page *page) +{ + if (!pmd_ptlock_init(page)) + return false; + __SetPageTable(page); + inc_zone_page_state(page, NR_PAGETABLE); + return true; +} + +static inline void pgtable_pmd_page_dtor(struct page *page) +{ + pmd_ptlock_free(page); + __ClearPageTable(page); + dec_zone_page_state(page, NR_PAGETABLE); +} + /* * No scalability reason to split PUD locks yet, but follow the same pattern * as the PMD locks to make it easier if we decide to. The VM should not be diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h index 0707671851a8..18e7eae9b5ba 100644 --- a/include/linux/mmap_lock.h +++ b/include/linux/mmap_lock.h @@ -87,4 +87,9 @@ static inline void mmap_assert_write_locked(struct mm_struct *mm) VM_BUG_ON_MM(!rwsem_is_locked(&mm->mmap_lock), mm); } +static inline int mmap_lock_is_contended(struct mm_struct *mm) +{ + return rwsem_is_contended(&mm->mmap_lock); +} + #endif /* _LINUX_MMAP_LOCK_H */ diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 0f7a4ff4b059..c27fb1faffe5 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -396,6 +396,41 @@ enum zone_type { */ ZONE_HIGHMEM, #endif + /* + * ZONE_MOVABLE is similar to ZONE_NORMAL, except that it contains + * movable pages with few exceptional cases described below. Main use + * cases for ZONE_MOVABLE are to make memory offlining/unplug more + * likely to succeed, and to locally limit unmovable allocations - e.g., + * to increase the number of THP/huge pages. Notable special cases are: + * + * 1. Pinned pages: (long-term) pinning of movable pages might + * essentially turn such pages unmovable. Memory offlining might + * retry a long time. + * 2. memblock allocations: kernelcore/movablecore setups might create + * situations where ZONE_MOVABLE contains unmovable allocations + * after boot. Memory offlining and allocations fail early. + * 3. Memory holes: kernelcore/movablecore setups might create very rare + * situations where ZONE_MOVABLE contains memory holes after boot, + * for example, if we have sections that are only partially + * populated. Memory offlining and allocations fail early. + * 4. PG_hwpoison pages: while poisoned pages can be skipped during + * memory offlining, such pages cannot be allocated. + * 5. Unmovable PG_offline pages: in paravirtualized environments, + * hotplugged memory blocks might only partially be managed by the + * buddy (e.g., via XEN-balloon, Hyper-V balloon, virtio-mem). The + * parts not manged by the buddy are unmovable PG_offline pages. In + * some cases (virtio-mem), such pages can be skipped during + * memory offlining, however, cannot be moved/allocated. These + * techniques might use alloc_contig_range() to hide previously + * exposed pages from the buddy again (e.g., to implement some sort + * of memory unplug in virtio-mem). + * + * In general, no unmovable allocations that degrade memory offlining + * should end up in ZONE_MOVABLE. Allocators (like alloc_contig_range()) + * have to expect that migrating pages in ZONE_MOVABLE can fail (even + * if has_unmovable_pages() states that there are no unmovable pages, + * there can be false negatives). + */ ZONE_MOVABLE, #ifdef CONFIG_ZONE_DEVICE ZONE_DEVICE, @@ -1081,7 +1116,7 @@ static inline struct zoneref *first_zones_zonelist(struct zonelist *zonelist, z = next_zones_zonelist(++z, highidx, nodemask), \ zone = zonelist_zone(z)) -#define for_next_zone_zonelist_nodemask(zone, z, zlist, highidx, nodemask) \ +#define for_next_zone_zonelist_nodemask(zone, z, highidx, nodemask) \ for (zone = z->zone; \ zone; \ z = next_zones_zonelist(++z, highidx, nodemask), \ diff --git a/include/linux/numa.h b/include/linux/numa.h index a42df804679e..8cb33ccfb671 100644 --- a/include/linux/numa.h +++ b/include/linux/numa.h @@ -23,22 +23,11 @@ #ifdef CONFIG_NUMA /* Generic implementation available */ int numa_map_to_online_node(int node); - -/* - * Optional architecture specific implementation, users need a "depends - * on $ARCH" - */ -int phys_to_target_node(phys_addr_t addr); #else static inline int numa_map_to_online_node(int node) { return NUMA_NO_NODE; } - -static inline int phys_to_target_node(phys_addr_t addr) -{ - return NUMA_NO_NODE; -} #endif #endif /* _LINUX_NUMA_H */ diff --git a/include/linux/oom.h b/include/linux/oom.h index f022f581ac29..2db9a1432511 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -55,6 +55,7 @@ struct oom_control { }; extern struct mutex oom_lock; +extern struct mutex oom_adj_mutex; static inline void set_current_oom_origin(void) { diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 276140c94f4a..38ded408bd4c 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -167,7 +167,7 @@ enum pageflags { PG_slob_free = PG_private, /* Compound pages. Stored in first tail page's flags */ - PG_double_map = PG_private_2, + PG_double_map = PG_workingset, /* non-lru isolated movable page */ PG_isolated = PG_reclaim, @@ -235,6 +235,9 @@ static inline void page_init_poison(struct page *page, size_t size) * * PF_NO_COMPOUND: * the page flag is not relevant for compound pages. + * + * PF_SECOND: + * the page flag is stored in the first tail page. */ #define PF_POISONED_CHECK(page) ({ \ VM_BUG_ON_PGFLAGS(PagePoisoned(page), page); \ @@ -250,6 +253,9 @@ static inline void page_init_poison(struct page *page, size_t size) #define PF_NO_COMPOUND(page, enforce) ({ \ VM_BUG_ON_PGFLAGS(enforce && PageCompound(page), page); \ PF_POISONED_CHECK(page); }) +#define PF_SECOND(page, enforce) ({ \ + VM_BUG_ON_PGFLAGS(!PageHead(page), page); \ + PF_POISONED_CHECK(&page[1]); }) /* * Macros to create function definitions for page flags @@ -688,42 +694,15 @@ static inline int PageTransTail(struct page *page) * * See also __split_huge_pmd_locked() and page_remove_anon_compound_rmap(). */ -static inline int PageDoubleMap(struct page *page) -{ - return PageHead(page) && test_bit(PG_double_map, &page[1].flags); -} - -static inline void SetPageDoubleMap(struct page *page) -{ - VM_BUG_ON_PAGE(!PageHead(page), page); - set_bit(PG_double_map, &page[1].flags); -} - -static inline void ClearPageDoubleMap(struct page *page) -{ - VM_BUG_ON_PAGE(!PageHead(page), page); - clear_bit(PG_double_map, &page[1].flags); -} -static inline int TestSetPageDoubleMap(struct page *page) -{ - VM_BUG_ON_PAGE(!PageHead(page), page); - return test_and_set_bit(PG_double_map, &page[1].flags); -} - -static inline int TestClearPageDoubleMap(struct page *page) -{ - VM_BUG_ON_PAGE(!PageHead(page), page); - return test_and_clear_bit(PG_double_map, &page[1].flags); -} - +PAGEFLAG(DoubleMap, double_map, PF_SECOND) + TESTSCFLAG(DoubleMap, double_map, PF_SECOND) #else TESTPAGEFLAG_FALSE(TransHuge) TESTPAGEFLAG_FALSE(TransCompound) TESTPAGEFLAG_FALSE(TransCompoundMap) TESTPAGEFLAG_FALSE(TransTail) PAGEFLAG_FALSE(DoubleMap) - TESTSETFLAG_FALSE(DoubleMap) - TESTCLEARFLAG_FALSE(DoubleMap) + TESTSCFLAG_FALSE(DoubleMap) #endif /* @@ -888,6 +867,7 @@ static inline int page_has_private(struct page *page) #undef PF_ONLY_HEAD #undef PF_NO_TAIL #undef PF_NO_COMPOUND +#undef PF_SECOND #endif /* !__GENERATING_BOUNDS_H */ #endif /* PAGE_FLAGS_H */ diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 434c9c34aeb6..1a3554f5d992 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -279,6 +279,7 @@ pgoff_t page_cache_prev_miss(struct address_space *mapping, #define FGP_NOFS 0x00000010 #define FGP_NOWAIT 0x00000020 #define FGP_FOR_MMAP 0x00000040 +#define FGP_HEAD 0x00000080 struct page *pagecache_get_page(struct address_space *mapping, pgoff_t offset, int fgp_flags, gfp_t cache_gfp_mask); @@ -310,18 +311,37 @@ static inline struct page *find_get_page_flags(struct address_space *mapping, * @mapping: the address_space to search * @offset: the page index * - * Looks up the page cache slot at @mapping & @offset. If there is a + * Looks up the page cache entry at @mapping & @offset. If there is a * page cache page, it is returned locked and with an increased * refcount. * - * Otherwise, %NULL is returned. - * - * find_lock_page() may sleep. + * Context: May sleep. + * Return: A struct page or %NULL if there is no page in the cache for this + * index. */ static inline struct page *find_lock_page(struct address_space *mapping, - pgoff_t offset) + pgoff_t index) +{ + return pagecache_get_page(mapping, index, FGP_LOCK, 0); +} + +/** + * find_lock_head - Locate, pin and lock a pagecache page. + * @mapping: The address_space to search. + * @offset: The page index. + * + * Looks up the page cache entry at @mapping & @offset. If there is a + * page cache page, its head page is returned locked and with an increased + * refcount. + * + * Context: May sleep. + * Return: A struct page which is !PageTail, or %NULL if there is no page + * in the cache for this index. + */ +static inline struct page *find_lock_head(struct address_space *mapping, + pgoff_t index) { - return pagecache_get_page(mapping, offset, FGP_LOCK, 0); + return pagecache_get_page(mapping, index, FGP_LOCK | FGP_HEAD, 0); } /** @@ -372,6 +392,15 @@ static inline struct page *grab_cache_page_nowait(struct address_space *mapping, mapping_gfp_mask(mapping)); } +/* Does this page contain this index? */ +static inline bool thp_contains(struct page *head, pgoff_t index) +{ + /* HugeTLBfs indexes the page cache in units of hpage_size */ + if (PageHuge(head)) + return head->index == index; + return page_index(head) == (index & ~(thp_nr_pages(head) - 1UL)); +} + /* * Given the page we found in the page cache, return the page corresponding * to this index in the file @@ -385,8 +414,6 @@ static inline struct page *find_subpage(struct page *head, pgoff_t index) return head + (index & (thp_nr_pages(head) - 1)); } -struct page *find_get_entry(struct address_space *mapping, pgoff_t offset); -struct page *find_lock_entry(struct address_space *mapping, pgoff_t offset); unsigned find_get_entries(struct address_space *mapping, pgoff_t start, unsigned int nr_entries, struct page **entries, pgoff_t *indices); diff --git a/include/linux/range.h b/include/linux/range.h index d1fbeb664012..274681cc3154 100644 --- a/include/linux/range.h +++ b/include/linux/range.h @@ -1,12 +1,18 @@ /* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_RANGE_H #define _LINUX_RANGE_H +#include <linux/types.h> struct range { u64 start; u64 end; }; +static inline u64 range_len(const struct range *range) +{ + return range->end - range->start + 1; +} + int add_range(struct range *range, int az, int nr_range, u64 start, u64 end); diff --git a/include/linux/sched.h b/include/linux/sched.h index 829b0697d19c..9030f3abd969 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1208,6 +1208,10 @@ struct task_struct { #endif #endif +#if IS_ENABLED(CONFIG_KUNIT) + struct kunit *kunit_test; +#endif + #ifdef CONFIG_FUNCTION_GRAPH_TRACER /* Index of current stored address in ret_stack: */ int curr_ret_stack; diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h index ecdc6542070f..dfd82eab2902 100644 --- a/include/linux/sched/coredump.h +++ b/include/linux/sched/coredump.h @@ -72,6 +72,7 @@ static inline int get_dumpable(struct mm_struct *mm) #define MMF_DISABLE_THP 24 /* disable THP for all VMAs */ #define MMF_OOM_VICTIM 25 /* mm is the oom victim */ #define MMF_OOM_REAP_QUEUED 26 /* mm was queued for oom_reaper */ +#define MMF_MULTIPROCESS 27 /* mm is shared between processes */ #define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ diff --git a/include/linux/slab.h b/include/linux/slab.h index 24df2393ec03..9e155cc83b8a 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -279,7 +279,7 @@ static inline void __check_heap_object(const void *ptr, unsigned long n, #define KMALLOC_MAX_SIZE (1UL << KMALLOC_SHIFT_MAX) /* Maximum size for which we actually use a slab cache */ #define KMALLOC_MAX_CACHE_SIZE (1UL << KMALLOC_SHIFT_HIGH) -/* Maximum order allocatable via the slab allocagtor */ +/* Maximum order allocatable via the slab allocator */ #define KMALLOC_MAX_ORDER (KMALLOC_SHIFT_MAX - PAGE_SHIFT) /* diff --git a/include/linux/swap.h b/include/linux/swap.h index 4340a7b6e7a1..667935c0dbd4 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -170,7 +170,7 @@ enum { SWP_CONTINUED = (1 << 5), /* swap_map has count continuation */ SWP_BLKDEV = (1 << 6), /* its a block device */ SWP_ACTIVATED = (1 << 7), /* set after swap_activate success */ - SWP_FS = (1 << 8), /* swap file goes through fs */ + SWP_FS_OPS = (1 << 8), /* swapfile operations go through fs */ SWP_AREA_DISCARD = (1 << 9), /* single-time swap area discards */ SWP_PAGE_DISCARD = (1 << 10), /* freed swap page-cluster discards */ SWP_STABLE_WRITES = (1 << 11), /* no overwrite PG_writeback pages */ @@ -340,7 +340,6 @@ extern void lru_note_cost_page(struct page *); extern void lru_cache_add(struct page *); extern void lru_add_page_tail(struct page *page, struct page *page_tail, struct lruvec *lruvec, struct list_head *head); -extern void activate_page(struct page *); extern void mark_page_accessed(struct page *); extern void lru_add_drain(void); extern void lru_add_drain_cpu(int cpu); @@ -427,6 +426,7 @@ extern void free_pages_and_swap_cache(struct page **, int); extern struct page *lookup_swap_cache(swp_entry_t entry, struct vm_area_struct *vma, unsigned long addr); +struct page *find_get_incore_page(struct address_space *mapping, pgoff_t index); extern struct page *read_swap_cache_async(swp_entry_t, gfp_t, struct vm_area_struct *vma, unsigned long addr, bool do_poll); @@ -570,6 +570,12 @@ static inline struct page *lookup_swap_cache(swp_entry_t swp, return NULL; } +static inline +struct page *find_get_incore_page(struct address_space *mapping, pgoff_t index) +{ + return find_get_page(mapping, index); +} + static inline int add_to_swap(struct page *page) { return 0; diff --git a/include/linux/swap_slots.h b/include/linux/swap_slots.h index e36b200c2a77..347f1a304190 100644 --- a/include/linux/swap_slots.h +++ b/include/linux/swap_slots.h @@ -23,7 +23,7 @@ struct swap_slots_cache { void disable_swap_slots_cache_lock(void); void reenable_swap_slots_cache_unlock(void); -int enable_swap_slots_cache(void); +void enable_swap_slots_cache(void); int free_swap_slot(swp_entry_t entry); extern bool swap_slot_cache_enabled; |