summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2021-09-08 22:55:35 +0300
committerLinus Torvalds <torvalds@linux-foundation.org>2021-09-08 22:55:35 +0300
commit2d338201d5311bcd79d42f66df4cecbcbc5f4f2c (patch)
tree75d87f65c31f4721ba6a5356d2a487af9e2961c3 /include/linux
parentcc09ee80c3b18ae1a897a30a17fe710b2b2f620a (diff)
parentb285437d1d929785a5bef3603da78d2cd5341893 (diff)
downloadlinux-2d338201d5311bcd79d42f66df4cecbcbc5f4f2c.tar.xz
Merge branch 'akpm' (patches from Andrew)
Merge more updates from Andrew Morton: "147 patches, based on 7d2a07b769330c34b4deabeed939325c77a7ec2f. Subsystems affected by this patch series: mm (memory-hotplug, rmap, ioremap, highmem, cleanups, secretmem, kfence, damon, and vmscan), alpha, percpu, procfs, misc, core-kernel, MAINTAINERS, lib, checkpatch, epoll, init, nilfs2, coredump, fork, pids, criu, kconfig, selftests, ipc, and scripts" * emailed patches from Andrew Morton <akpm@linux-foundation.org>: (94 commits) scripts: check_extable: fix typo in user error message mm/workingset: correct kernel-doc notations ipc: replace costly bailout check in sysvipc_find_ipc() selftests/memfd: remove unused variable Kconfig.debug: drop selecting non-existing HARDLOCKUP_DETECTOR_ARCH configs: remove the obsolete CONFIG_INPUT_POLLDEV prctl: allow to setup brk for et_dyn executables pid: cleanup the stale comment mentioning pidmap_init(). kernel/fork.c: unexport get_{mm,task}_exe_file coredump: fix memleak in dump_vma_snapshot() fs/coredump.c: log if a core dump is aborted due to changed file permissions nilfs2: use refcount_dec_and_lock() to fix potential UAF nilfs2: fix memory leak in nilfs_sysfs_delete_snapshot_group nilfs2: fix memory leak in nilfs_sysfs_create_snapshot_group nilfs2: fix memory leak in nilfs_sysfs_delete_##name##_group nilfs2: fix memory leak in nilfs_sysfs_create_##name##_group nilfs2: fix NULL pointer in nilfs_##name##_attr_release nilfs2: fix memory leak in nilfs_sysfs_create_device_group trap: cleanup trap_init() init: move usermodehelper_enable() to populate_rootfs() ...
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/damon.h268
-rw-r--r--include/linux/highmem-internal.h27
-rw-r--r--include/linux/memory.h55
-rw-r--r--include/linux/memory_hotplug.h34
-rw-r--r--include/linux/mmzone.h19
-rw-r--r--include/linux/once.h2
-rw-r--r--include/linux/page-flags.h8
-rw-r--r--include/linux/page_ext.h2
-rw-r--r--include/linux/page_idle.h6
-rw-r--r--include/linux/pagemap.h7
-rw-r--r--include/linux/sched/user.h3
-rw-r--r--include/linux/threads.h2
-rw-r--r--include/linux/units.h10
-rw-r--r--include/linux/vmalloc.h3
14 files changed, 396 insertions, 50 deletions
diff --git a/include/linux/damon.h b/include/linux/damon.h
new file mode 100644
index 000000000000..d68b67b8d458
--- /dev/null
+++ b/include/linux/damon.h
@@ -0,0 +1,268 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * DAMON api
+ *
+ * Author: SeongJae Park <sjpark@amazon.de>
+ */
+
+#ifndef _DAMON_H_
+#define _DAMON_H_
+
+#include <linux/mutex.h>
+#include <linux/time64.h>
+#include <linux/types.h>
+
+/* Minimal region size. Every damon_region is aligned by this. */
+#define DAMON_MIN_REGION PAGE_SIZE
+
+/**
+ * struct damon_addr_range - Represents an address region of [@start, @end).
+ * @start: Start address of the region (inclusive).
+ * @end: End address of the region (exclusive).
+ */
+struct damon_addr_range {
+ unsigned long start;
+ unsigned long end;
+};
+
+/**
+ * struct damon_region - Represents a monitoring target region.
+ * @ar: The address range of the region.
+ * @sampling_addr: Address of the sample for the next access check.
+ * @nr_accesses: Access frequency of this region.
+ * @list: List head for siblings.
+ */
+struct damon_region {
+ struct damon_addr_range ar;
+ unsigned long sampling_addr;
+ unsigned int nr_accesses;
+ struct list_head list;
+};
+
+/**
+ * struct damon_target - Represents a monitoring target.
+ * @id: Unique identifier for this target.
+ * @nr_regions: Number of monitoring target regions of this target.
+ * @regions_list: Head of the monitoring target regions of this target.
+ * @list: List head for siblings.
+ *
+ * Each monitoring context could have multiple targets. For example, a context
+ * for virtual memory address spaces could have multiple target processes. The
+ * @id of each target should be unique among the targets of the context. For
+ * example, in the virtual address monitoring context, it could be a pidfd or
+ * an address of an mm_struct.
+ */
+struct damon_target {
+ unsigned long id;
+ unsigned int nr_regions;
+ struct list_head regions_list;
+ struct list_head list;
+};
+
+struct damon_ctx;
+
+/**
+ * struct damon_primitive Monitoring primitives for given use cases.
+ *
+ * @init: Initialize primitive-internal data structures.
+ * @update: Update primitive-internal data structures.
+ * @prepare_access_checks: Prepare next access check of target regions.
+ * @check_accesses: Check the accesses to target regions.
+ * @reset_aggregated: Reset aggregated accesses monitoring results.
+ * @target_valid: Determine if the target is valid.
+ * @cleanup: Clean up the context.
+ *
+ * DAMON can be extended for various address spaces and usages. For this,
+ * users should register the low level primitives for their target address
+ * space and usecase via the &damon_ctx.primitive. Then, the monitoring thread
+ * (&damon_ctx.kdamond) calls @init and @prepare_access_checks before starting
+ * the monitoring, @update after each &damon_ctx.primitive_update_interval, and
+ * @check_accesses, @target_valid and @prepare_access_checks after each
+ * &damon_ctx.sample_interval. Finally, @reset_aggregated is called after each
+ * &damon_ctx.aggr_interval.
+ *
+ * @init should initialize primitive-internal data structures. For example,
+ * this could be used to construct proper monitoring target regions and link
+ * those to @damon_ctx.adaptive_targets.
+ * @update should update the primitive-internal data structures. For example,
+ * this could be used to update monitoring target regions for current status.
+ * @prepare_access_checks should manipulate the monitoring regions to be
+ * prepared for the next access check.
+ * @check_accesses should check the accesses to each region that made after the
+ * last preparation and update the number of observed accesses of each region.
+ * It should also return max number of observed accesses that made as a result
+ * of its update. The value will be used for regions adjustment threshold.
+ * @reset_aggregated should reset the access monitoring results that aggregated
+ * by @check_accesses.
+ * @target_valid should check whether the target is still valid for the
+ * monitoring.
+ * @cleanup is called from @kdamond just before its termination.
+ */
+struct damon_primitive {
+ void (*init)(struct damon_ctx *context);
+ void (*update)(struct damon_ctx *context);
+ void (*prepare_access_checks)(struct damon_ctx *context);
+ unsigned int (*check_accesses)(struct damon_ctx *context);
+ void (*reset_aggregated)(struct damon_ctx *context);
+ bool (*target_valid)(void *target);
+ void (*cleanup)(struct damon_ctx *context);
+};
+
+/*
+ * struct damon_callback Monitoring events notification callbacks.
+ *
+ * @before_start: Called before starting the monitoring.
+ * @after_sampling: Called after each sampling.
+ * @after_aggregation: Called after each aggregation.
+ * @before_terminate: Called before terminating the monitoring.
+ * @private: User private data.
+ *
+ * The monitoring thread (&damon_ctx.kdamond) calls @before_start and
+ * @before_terminate just before starting and finishing the monitoring,
+ * respectively. Therefore, those are good places for installing and cleaning
+ * @private.
+ *
+ * The monitoring thread calls @after_sampling and @after_aggregation for each
+ * of the sampling intervals and aggregation intervals, respectively.
+ * Therefore, users can safely access the monitoring results without additional
+ * protection. For the reason, users are recommended to use these callback for
+ * the accesses to the results.
+ *
+ * If any callback returns non-zero, monitoring stops.
+ */
+struct damon_callback {
+ void *private;
+
+ int (*before_start)(struct damon_ctx *context);
+ int (*after_sampling)(struct damon_ctx *context);
+ int (*after_aggregation)(struct damon_ctx *context);
+ int (*before_terminate)(struct damon_ctx *context);
+};
+
+/**
+ * struct damon_ctx - Represents a context for each monitoring. This is the
+ * main interface that allows users to set the attributes and get the results
+ * of the monitoring.
+ *
+ * @sample_interval: The time between access samplings.
+ * @aggr_interval: The time between monitor results aggregations.
+ * @primitive_update_interval: The time between monitoring primitive updates.
+ *
+ * For each @sample_interval, DAMON checks whether each region is accessed or
+ * not. It aggregates and keeps the access information (number of accesses to
+ * each region) for @aggr_interval time. DAMON also checks whether the target
+ * memory regions need update (e.g., by ``mmap()`` calls from the application,
+ * in case of virtual memory monitoring) and applies the changes for each
+ * @primitive_update_interval. All time intervals are in micro-seconds.
+ * Please refer to &struct damon_primitive and &struct damon_callback for more
+ * detail.
+ *
+ * @kdamond: Kernel thread who does the monitoring.
+ * @kdamond_stop: Notifies whether kdamond should stop.
+ * @kdamond_lock: Mutex for the synchronizations with @kdamond.
+ *
+ * For each monitoring context, one kernel thread for the monitoring is
+ * created. The pointer to the thread is stored in @kdamond.
+ *
+ * Once started, the monitoring thread runs until explicitly required to be
+ * terminated or every monitoring target is invalid. The validity of the
+ * targets is checked via the &damon_primitive.target_valid of @primitive. The
+ * termination can also be explicitly requested by writing non-zero to
+ * @kdamond_stop. The thread sets @kdamond to NULL when it terminates.
+ * Therefore, users can know whether the monitoring is ongoing or terminated by
+ * reading @kdamond. Reads and writes to @kdamond and @kdamond_stop from
+ * outside of the monitoring thread must be protected by @kdamond_lock.
+ *
+ * Note that the monitoring thread protects only @kdamond and @kdamond_stop via
+ * @kdamond_lock. Accesses to other fields must be protected by themselves.
+ *
+ * @primitive: Set of monitoring primitives for given use cases.
+ * @callback: Set of callbacks for monitoring events notifications.
+ *
+ * @min_nr_regions: The minimum number of adaptive monitoring regions.
+ * @max_nr_regions: The maximum number of adaptive monitoring regions.
+ * @adaptive_targets: Head of monitoring targets (&damon_target) list.
+ */
+struct damon_ctx {
+ unsigned long sample_interval;
+ unsigned long aggr_interval;
+ unsigned long primitive_update_interval;
+
+/* private: internal use only */
+ struct timespec64 last_aggregation;
+ struct timespec64 last_primitive_update;
+
+/* public: */
+ struct task_struct *kdamond;
+ bool kdamond_stop;
+ struct mutex kdamond_lock;
+
+ struct damon_primitive primitive;
+ struct damon_callback callback;
+
+ unsigned long min_nr_regions;
+ unsigned long max_nr_regions;
+ struct list_head adaptive_targets;
+};
+
+#define damon_next_region(r) \
+ (container_of(r->list.next, struct damon_region, list))
+
+#define damon_prev_region(r) \
+ (container_of(r->list.prev, struct damon_region, list))
+
+#define damon_for_each_region(r, t) \
+ list_for_each_entry(r, &t->regions_list, list)
+
+#define damon_for_each_region_safe(r, next, t) \
+ list_for_each_entry_safe(r, next, &t->regions_list, list)
+
+#define damon_for_each_target(t, ctx) \
+ list_for_each_entry(t, &(ctx)->adaptive_targets, list)
+
+#define damon_for_each_target_safe(t, next, ctx) \
+ list_for_each_entry_safe(t, next, &(ctx)->adaptive_targets, list)
+
+#ifdef CONFIG_DAMON
+
+struct damon_region *damon_new_region(unsigned long start, unsigned long end);
+inline void damon_insert_region(struct damon_region *r,
+ struct damon_region *prev, struct damon_region *next,
+ struct damon_target *t);
+void damon_add_region(struct damon_region *r, struct damon_target *t);
+void damon_destroy_region(struct damon_region *r, struct damon_target *t);
+
+struct damon_target *damon_new_target(unsigned long id);
+void damon_add_target(struct damon_ctx *ctx, struct damon_target *t);
+void damon_free_target(struct damon_target *t);
+void damon_destroy_target(struct damon_target *t);
+unsigned int damon_nr_regions(struct damon_target *t);
+
+struct damon_ctx *damon_new_ctx(void);
+void damon_destroy_ctx(struct damon_ctx *ctx);
+int damon_set_targets(struct damon_ctx *ctx,
+ unsigned long *ids, ssize_t nr_ids);
+int damon_set_attrs(struct damon_ctx *ctx, unsigned long sample_int,
+ unsigned long aggr_int, unsigned long primitive_upd_int,
+ unsigned long min_nr_reg, unsigned long max_nr_reg);
+int damon_nr_running_ctxs(void);
+
+int damon_start(struct damon_ctx **ctxs, int nr_ctxs);
+int damon_stop(struct damon_ctx **ctxs, int nr_ctxs);
+
+#endif /* CONFIG_DAMON */
+
+#ifdef CONFIG_DAMON_VADDR
+
+/* Monitoring primitives for virtual memory address spaces */
+void damon_va_init(struct damon_ctx *ctx);
+void damon_va_update(struct damon_ctx *ctx);
+void damon_va_prepare_access_checks(struct damon_ctx *ctx);
+unsigned int damon_va_check_accesses(struct damon_ctx *ctx);
+bool damon_va_target_valid(void *t);
+void damon_va_cleanup(struct damon_ctx *ctx);
+void damon_va_set_primitives(struct damon_ctx *ctx);
+
+#endif /* CONFIG_DAMON_VADDR */
+
+#endif /* _DAMON_H */
diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h
index 7902c7d8b55f..4aa1031d3e4c 100644
--- a/include/linux/highmem-internal.h
+++ b/include/linux/highmem-internal.h
@@ -90,7 +90,11 @@ static inline void __kunmap_local(void *vaddr)
static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot)
{
- preempt_disable();
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ migrate_disable();
+ else
+ preempt_disable();
+
pagefault_disable();
return __kmap_local_page_prot(page, prot);
}
@@ -102,7 +106,11 @@ static inline void *kmap_atomic(struct page *page)
static inline void *kmap_atomic_pfn(unsigned long pfn)
{
- preempt_disable();
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ migrate_disable();
+ else
+ preempt_disable();
+
pagefault_disable();
return __kmap_local_pfn_prot(pfn, kmap_prot);
}
@@ -111,7 +119,10 @@ static inline void __kunmap_atomic(void *addr)
{
kunmap_local_indexed(addr);
pagefault_enable();
- preempt_enable();
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ migrate_enable();
+ else
+ preempt_enable();
}
unsigned int __nr_free_highpages(void);
@@ -179,7 +190,10 @@ static inline void __kunmap_local(void *addr)
static inline void *kmap_atomic(struct page *page)
{
- preempt_disable();
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ migrate_disable();
+ else
+ preempt_disable();
pagefault_disable();
return page_address(page);
}
@@ -200,7 +214,10 @@ static inline void __kunmap_atomic(void *addr)
kunmap_flush_on_unmap(addr);
#endif
pagefault_enable();
- preempt_enable();
+ if (IS_ENABLED(CONFIG_PREEMPT_RT))
+ migrate_enable();
+ else
+ preempt_enable();
}
static inline unsigned int nr_free_highpages(void) { return 0; }
diff --git a/include/linux/memory.h b/include/linux/memory.h
index d9a0b61cd432..7efc0a7c14c9 100644
--- a/include/linux/memory.h
+++ b/include/linux/memory.h
@@ -23,6 +23,48 @@
#define MIN_MEMORY_BLOCK_SIZE (1UL << SECTION_SIZE_BITS)
+/**
+ * struct memory_group - a logical group of memory blocks
+ * @nid: The node id for all memory blocks inside the memory group.
+ * @blocks: List of all memory blocks belonging to this memory group.
+ * @present_kernel_pages: Present (online) memory outside ZONE_MOVABLE of this
+ * memory group.
+ * @present_movable_pages: Present (online) memory in ZONE_MOVABLE of this
+ * memory group.
+ * @is_dynamic: The memory group type: static vs. dynamic
+ * @s.max_pages: Valid with &memory_group.is_dynamic == false. The maximum
+ * number of pages we'll have in this static memory group.
+ * @d.unit_pages: Valid with &memory_group.is_dynamic == true. Unit in pages
+ * in which memory is added/removed in this dynamic memory group.
+ * This granularity defines the alignment of a unit in physical
+ * address space; it has to be at least as big as a single
+ * memory block.
+ *
+ * A memory group logically groups memory blocks; each memory block
+ * belongs to at most one memory group. A memory group corresponds to
+ * a memory device, such as a DIMM or a NUMA node, which spans multiple
+ * memory blocks and might even span multiple non-contiguous physical memory
+ * ranges.
+ *
+ * Modification of members after registration is serialized by memory
+ * hot(un)plug code.
+ */
+struct memory_group {
+ int nid;
+ struct list_head memory_blocks;
+ unsigned long present_kernel_pages;
+ unsigned long present_movable_pages;
+ bool is_dynamic;
+ union {
+ struct {
+ unsigned long max_pages;
+ } s;
+ struct {
+ unsigned long unit_pages;
+ } d;
+ };
+};
+
struct memory_block {
unsigned long start_section_nr;
unsigned long state; /* serialized by the dev->lock */
@@ -34,6 +76,8 @@ struct memory_block {
* lay at the beginning of the memory block.
*/
unsigned long nr_vmemmap_pages;
+ struct memory_group *group; /* group (if any) for this block */
+ struct list_head group_next; /* next block inside memory group */
};
int arch_get_memory_phys_device(unsigned long start_pfn);
@@ -86,7 +130,8 @@ static inline int memory_notify(unsigned long val, void *v)
extern int register_memory_notifier(struct notifier_block *nb);
extern void unregister_memory_notifier(struct notifier_block *nb);
int create_memory_block_devices(unsigned long start, unsigned long size,
- unsigned long vmemmap_pages);
+ unsigned long vmemmap_pages,
+ struct memory_group *group);
void remove_memory_block_devices(unsigned long start, unsigned long size);
extern void memory_dev_init(void);
extern int memory_notify(unsigned long val, void *v);
@@ -96,6 +141,14 @@ extern int walk_memory_blocks(unsigned long start, unsigned long size,
void *arg, walk_memory_blocks_func_t func);
extern int for_each_memory_block(void *arg, walk_memory_blocks_func_t func);
#define CONFIG_MEM_BLOCK_SIZE (PAGES_PER_SECTION<<PAGE_SHIFT)
+
+extern int memory_group_register_static(int nid, unsigned long max_pages);
+extern int memory_group_register_dynamic(int nid, unsigned long unit_pages);
+extern int memory_group_unregister(int mgid);
+struct memory_group *memory_group_find_by_id(int mgid);
+typedef int (*walk_memory_groups_func_t)(struct memory_group *, void *);
+int walk_dynamic_memory_groups(int nid, walk_memory_groups_func_t func,
+ struct memory_group *excluded, void *arg);
#endif /* CONFIG_MEMORY_HOTPLUG_SPARSE */
#ifdef CONFIG_MEMORY_HOTPLUG
diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h
index a7fd2c3ccb77..e5a867c950b2 100644
--- a/include/linux/memory_hotplug.h
+++ b/include/linux/memory_hotplug.h
@@ -12,6 +12,7 @@ struct zone;
struct pglist_data;
struct mem_section;
struct memory_block;
+struct memory_group;
struct resource;
struct vmem_altmap;
@@ -50,6 +51,11 @@ typedef int __bitwise mhp_t;
* Only selected architectures support it with SPARSE_VMEMMAP.
*/
#define MHP_MEMMAP_ON_MEMORY ((__force mhp_t)BIT(1))
+/*
+ * The nid field specifies a memory group id (mgid) instead. The memory group
+ * implies the node id (nid).
+ */
+#define MHP_NID_IS_MGID ((__force mhp_t)BIT(2))
/*
* Extended parameters for memory hotplug:
@@ -95,13 +101,15 @@ static inline void zone_seqlock_init(struct zone *zone)
extern int zone_grow_free_lists(struct zone *zone, unsigned long new_nr_pages);
extern int zone_grow_waitqueues(struct zone *zone, unsigned long nr_pages);
extern int add_one_highpage(struct page *page, int pfn, int bad_ppro);
-extern void adjust_present_page_count(struct zone *zone, long nr_pages);
+extern void adjust_present_page_count(struct page *page,
+ struct memory_group *group,
+ long nr_pages);
/* VM interface that may be used by firmware interface */
extern int mhp_init_memmap_on_memory(unsigned long pfn, unsigned long nr_pages,
struct zone *zone);
extern void mhp_deinit_memmap_on_memory(unsigned long pfn, unsigned long nr_pages);
extern int online_pages(unsigned long pfn, unsigned long nr_pages,
- struct zone *zone);
+ struct zone *zone, struct memory_group *group);
extern struct zone *test_pages_in_a_zone(unsigned long start_pfn,
unsigned long end_pfn);
extern void __offline_isolated_pages(unsigned long start_pfn,
@@ -130,8 +138,7 @@ static inline bool movable_node_is_enabled(void)
return movable_node_enabled;
}
-extern void arch_remove_memory(int nid, u64 start, u64 size,
- struct vmem_altmap *altmap);
+extern void arch_remove_memory(u64 start, u64 size, struct vmem_altmap *altmap);
extern void __remove_pages(unsigned long start_pfn, unsigned long nr_pages,
struct vmem_altmap *altmap);
@@ -292,25 +299,27 @@ static inline void pgdat_resize_init(struct pglist_data *pgdat) {}
#ifdef CONFIG_MEMORY_HOTREMOVE
extern void try_offline_node(int nid);
-extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages);
-extern int remove_memory(int nid, u64 start, u64 size);
-extern void __remove_memory(int nid, u64 start, u64 size);
-extern int offline_and_remove_memory(int nid, u64 start, u64 size);
+extern int offline_pages(unsigned long start_pfn, unsigned long nr_pages,
+ struct memory_group *group);
+extern int remove_memory(u64 start, u64 size);
+extern void __remove_memory(u64 start, u64 size);
+extern int offline_and_remove_memory(u64 start, u64 size);
#else
static inline void try_offline_node(int nid) {}
-static inline int offline_pages(unsigned long start_pfn, unsigned long nr_pages)
+static inline int offline_pages(unsigned long start_pfn, unsigned long nr_pages,
+ struct memory_group *group)
{
return -EINVAL;
}
-static inline int remove_memory(int nid, u64 start, u64 size)
+static inline int remove_memory(u64 start, u64 size)
{
return -EBUSY;
}
-static inline void __remove_memory(int nid, u64 start, u64 size) {}
+static inline void __remove_memory(u64 start, u64 size) {}
#endif /* CONFIG_MEMORY_HOTREMOVE */
extern void set_zone_contiguous(struct zone *zone);
@@ -339,7 +348,8 @@ extern void sparse_remove_section(struct mem_section *ms,
unsigned long map_offset, struct vmem_altmap *altmap);
extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map,
unsigned long pnum);
-extern struct zone *zone_for_pfn_range(int online_type, int nid, unsigned start_pfn,
+extern struct zone *zone_for_pfn_range(int online_type, int nid,
+ struct memory_group *group, unsigned long start_pfn,
unsigned long nr_pages);
extern int arch_create_linear_mapping(int nid, u64 start, u64 size,
struct mhp_params *params);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 1bd5f5955f9a..6a1d79d84675 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -540,6 +540,10 @@ struct zone {
* is calculated as:
* present_pages = spanned_pages - absent_pages(pages in holes);
*
+ * present_early_pages is present pages existing within the zone
+ * located on memory available since early boot, excluding hotplugged
+ * memory.
+ *
* managed_pages is present pages managed by the buddy system, which
* is calculated as (reserved_pages includes pages allocated by the
* bootmem allocator):
@@ -572,6 +576,9 @@ struct zone {
atomic_long_t managed_pages;
unsigned long spanned_pages;
unsigned long present_pages;
+#if defined(CONFIG_MEMORY_HOTPLUG)
+ unsigned long present_early_pages;
+#endif
#ifdef CONFIG_CMA
unsigned long cma_pages;
#endif
@@ -1525,18 +1532,6 @@ void sparse_init(void);
#define subsection_map_init(_pfn, _nr_pages) do {} while (0)
#endif /* CONFIG_SPARSEMEM */
-/*
- * If it is possible to have holes within a MAX_ORDER_NR_PAGES, then we
- * need to check pfn validity within that MAX_ORDER_NR_PAGES block.
- * pfn_valid_within() should be used in this case; we optimise this away
- * when we have no holes within a MAX_ORDER_NR_PAGES block.
- */
-#ifdef CONFIG_HOLES_IN_ZONE
-#define pfn_valid_within(pfn) pfn_valid(pfn)
-#else
-#define pfn_valid_within(pfn) (1)
-#endif
-
#endif /* !__GENERATING_BOUNDS.H */
#endif /* !__ASSEMBLY__ */
#endif /* _LINUX_MMZONE_H */
diff --git a/include/linux/once.h b/include/linux/once.h
index ae6f4eb41cbe..d361fb14ac3a 100644
--- a/include/linux/once.h
+++ b/include/linux/once.h
@@ -16,7 +16,7 @@ void __do_once_done(bool *done, struct static_key_true *once_key,
* out the condition into a nop. DO_ONCE() guarantees type safety of
* arguments!
*
- * Not that the following is not equivalent ...
+ * Note that the following is not equivalent ...
*
* DO_ONCE(func, arg);
* DO_ONCE(func, arg);
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index 4ca160053b1f..a558d67ee86f 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -131,7 +131,7 @@ enum pageflags {
#ifdef CONFIG_MEMORY_FAILURE
PG_hwpoison, /* hardware poisoned page. Don't touch */
#endif
-#if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT)
+#if defined(CONFIG_PAGE_IDLE_FLAG) && defined(CONFIG_64BIT)
PG_young,
PG_idle,
#endif
@@ -178,6 +178,8 @@ enum pageflags {
PG_reported = PG_uptodate,
};
+#define PAGEFLAGS_MASK ((1UL << NR_PAGEFLAGS) - 1)
+
#ifndef __GENERATING_BOUNDS_H
static inline unsigned long _compound_head(const struct page *page)
@@ -439,7 +441,7 @@ PAGEFLAG_FALSE(HWPoison)
#define __PG_HWPOISON 0
#endif
-#if defined(CONFIG_IDLE_PAGE_TRACKING) && defined(CONFIG_64BIT)
+#if defined(CONFIG_PAGE_IDLE_FLAG) && defined(CONFIG_64BIT)
TESTPAGEFLAG(Young, young, PF_ANY)
SETPAGEFLAG(Young, young, PF_ANY)
TESTCLEARFLAG(Young, young, PF_ANY)
@@ -831,7 +833,7 @@ static inline void ClearPageSlabPfmemalloc(struct page *page)
* alloc-free cycle to prevent from reusing the page.
*/
#define PAGE_FLAGS_CHECK_AT_PREP \
- (((1UL << NR_PAGEFLAGS) - 1) & ~__PG_HWPOISON)
+ (PAGEFLAGS_MASK & ~__PG_HWPOISON)
#define PAGE_FLAGS_PRIVATE \
(1UL << PG_private | 1UL << PG_private_2)
diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h
index aff81ba31bd8..fabb2e1e087f 100644
--- a/include/linux/page_ext.h
+++ b/include/linux/page_ext.h
@@ -19,7 +19,7 @@ struct page_ext_operations {
enum page_ext_flags {
PAGE_EXT_OWNER,
PAGE_EXT_OWNER_ALLOCATED,
-#if defined(CONFIG_IDLE_PAGE_TRACKING) && !defined(CONFIG_64BIT)
+#if defined(CONFIG_PAGE_IDLE_FLAG) && !defined(CONFIG_64BIT)
PAGE_EXT_YOUNG,
PAGE_EXT_IDLE,
#endif
diff --git a/include/linux/page_idle.h b/include/linux/page_idle.h
index 1e894d34bdce..d8a6aecf99cb 100644
--- a/include/linux/page_idle.h
+++ b/include/linux/page_idle.h
@@ -6,7 +6,7 @@
#include <linux/page-flags.h>
#include <linux/page_ext.h>
-#ifdef CONFIG_IDLE_PAGE_TRACKING
+#ifdef CONFIG_PAGE_IDLE_FLAG
#ifdef CONFIG_64BIT
static inline bool page_is_young(struct page *page)
@@ -106,7 +106,7 @@ static inline void clear_page_idle(struct page *page)
}
#endif /* CONFIG_64BIT */
-#else /* !CONFIG_IDLE_PAGE_TRACKING */
+#else /* !CONFIG_PAGE_IDLE_FLAG */
static inline bool page_is_young(struct page *page)
{
@@ -135,6 +135,6 @@ static inline void clear_page_idle(struct page *page)
{
}
-#endif /* CONFIG_IDLE_PAGE_TRACKING */
+#endif /* CONFIG_PAGE_IDLE_FLAG */
#endif /* _LINUX_MM_PAGE_IDLE_H */
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 5dcf446f42e5..62db6b0176b9 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -521,18 +521,17 @@ static inline struct page *read_mapping_page(struct address_space *mapping,
*/
static inline pgoff_t page_to_index(struct page *page)
{
- pgoff_t pgoff;
+ struct page *head;
if (likely(!PageTransTail(page)))
return page->index;
+ head = compound_head(page);
/*
* We don't initialize ->index for tail pages: calculate based on
* head page
*/
- pgoff = compound_head(page)->index;
- pgoff += page - compound_head(page);
- return pgoff;
+ return head->index + page - head;
}
extern pgoff_t hugetlb_basepage_index(struct page *page);
diff --git a/include/linux/sched/user.h b/include/linux/sched/user.h
index 2462f7d07695..00ed419dd464 100644
--- a/include/linux/sched/user.h
+++ b/include/linux/sched/user.h
@@ -4,6 +4,7 @@
#include <linux/uidgid.h>
#include <linux/atomic.h>
+#include <linux/percpu_counter.h>
#include <linux/refcount.h>
#include <linux/ratelimit.h>
@@ -13,7 +14,7 @@
struct user_struct {
refcount_t __count; /* reference count */
#ifdef CONFIG_EPOLL
- atomic_long_t epoll_watches; /* The number of file descriptors currently watched */
+ struct percpu_counter epoll_watches; /* The number of file descriptors currently watched */
#endif
unsigned long unix_inflight; /* How many files in flight in unix sockets */
atomic_long_t pipe_bufs; /* how many pages are allocated in pipe buffers */
diff --git a/include/linux/threads.h b/include/linux/threads.h
index 18d5a74bcc3d..c34173e6c5f1 100644
--- a/include/linux/threads.h
+++ b/include/linux/threads.h
@@ -38,7 +38,7 @@
* Define a minimum number of pids per cpu. Heuristically based
* on original pid max of 32k for 32 cpus. Also, increase the
* minimum settable value for pid_max on the running system based
- * on similar defaults. See kernel/pid.c:pidmap_init() for details.
+ * on similar defaults. See kernel/pid.c:pid_idr_init() for details.
*/
#define PIDS_PER_CPU_DEFAULT 1024
#define PIDS_PER_CPU_MIN 8
diff --git a/include/linux/units.h b/include/linux/units.h
index 4a25e0cc8fb3..681fc652e3d7 100644
--- a/include/linux/units.h
+++ b/include/linux/units.h
@@ -20,9 +20,13 @@
#define PICO 1000000000000ULL
#define FEMTO 1000000000000000ULL
-#define MILLIWATT_PER_WATT 1000L
-#define MICROWATT_PER_MILLIWATT 1000L
-#define MICROWATT_PER_WATT 1000000L
+#define HZ_PER_KHZ 1000UL
+#define KHZ_PER_MHZ 1000UL
+#define HZ_PER_MHZ 1000000UL
+
+#define MILLIWATT_PER_WATT 1000UL
+#define MICROWATT_PER_MILLIWATT 1000UL
+#define MICROWATT_PER_WATT 1000000UL
#define ABSOLUTE_ZERO_MILLICELSIUS -273150
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 2644425b6dce..671d402c3778 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -225,9 +225,6 @@ static inline bool is_vm_area_hugepages(const void *addr)
}
#ifdef CONFIG_MMU
-int vmap_range(unsigned long addr, unsigned long end,
- phys_addr_t phys_addr, pgprot_t prot,
- unsigned int max_page_shift);
void vunmap_range(unsigned long addr, unsigned long end);
static inline void set_vm_flush_reset_perms(void *addr)
{