summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/acpi_iort.h4
-rw-r--r--include/linux/atomic-arch-fallback.h90
-rw-r--r--include/linux/atomic-fallback.h90
-rw-r--r--include/linux/ccp.h3
-rw-r--r--include/linux/cgroup-defs.h15
-rw-r--r--include/linux/cgroup.h5
-rw-r--r--include/linux/compaction.h12
-rw-r--r--include/linux/completion.h5
-rw-r--r--include/linux/console.h1
-rw-r--r--include/linux/context_tracking.h6
-rw-r--r--include/linux/cpuhotplug.h1
-rw-r--r--include/linux/cpumask.h6
-rw-r--r--include/linux/dma-buf-map.h266
-rw-r--r--include/linux/dma-buf.h18
-rw-r--r--include/linux/edac.h16
-rw-r--r--include/linux/elf.h10
-rw-r--r--include/linux/entry-common.h171
-rw-r--r--include/linux/entry-kvm.h4
-rw-r--r--include/linux/filter.h2
-rw-r--r--include/linux/font.h3
-rw-r--r--include/linux/freelist.h129
-rw-r--r--include/linux/fs.h2
-rw-r--r--include/linux/fscrypt.h112
-rw-r--r--include/linux/gfp.h2
-rw-r--r--include/linux/highmem-internal.h232
-rw-r--r--include/linux/highmem.h313
-rw-r--r--include/linux/hrtimer.h6
-rw-r--r--include/linux/huge_mm.h93
-rw-r--r--include/linux/io-mapping.h38
-rw-r--r--include/linux/io-pgtable.h8
-rw-r--r--include/linux/iommu.h1
-rw-r--r--include/linux/ipc_namespace.h3
-rw-r--r--include/linux/irq_work.h33
-rw-r--r--include/linux/irqflags.h8
-rw-r--r--include/linux/kernel.h22
-rw-r--r--include/linux/kprobes.h25
-rw-r--r--include/linux/list.h2
-rw-r--r--include/linux/llist.h23
-rw-r--r--include/linux/lockdep.h6
-rw-r--r--include/linux/memcontrol.h149
-rw-r--r--include/linux/migrate.h4
-rw-r--r--include/linux/mm.h121
-rw-r--r--include/linux/mm_types.h8
-rw-r--r--include/linux/mmap_lock.h94
-rw-r--r--include/linux/mmzone.h70
-rw-r--r--include/linux/msi.h46
-rw-r--r--include/linux/ns_common.h3
-rw-r--r--include/linux/of.h7
-rw-r--r--include/linux/page-flags.h6
-rw-r--r--include/linux/page_ext.h8
-rw-r--r--include/linux/pagevec.h3
-rw-r--r--include/linux/perf/arm_pmu.h2
-rw-r--r--include/linux/perf_event.h6
-rw-r--r--include/linux/pgtable.h71
-rw-r--r--include/linux/pid_namespace.h4
-rw-r--r--include/linux/platform_data/media/coda.h14
-rw-r--r--include/linux/platform_data/shmob_drm.h2
-rw-r--r--include/linux/poison.h4
-rw-r--r--include/linux/preempt.h83
-rw-r--r--include/linux/purgatory.h2
-rw-r--r--include/linux/rcupdate.h11
-rw-r--r--include/linux/rcupdate_trace.h4
-rw-r--r--include/linux/rcutiny.h2
-rw-r--r--include/linux/rcutree.h1
-rw-r--r--include/linux/refcount.h2
-rw-r--r--include/linux/rmap.h1
-rw-r--r--include/linux/rtc.h69
-rw-r--r--include/linux/rwsem.h3
-rw-r--r--include/linux/scatterlist.h6
-rw-r--r--include/linux/sched.h20
-rw-r--r--include/linux/sched/hotplug.h2
-rw-r--r--include/linux/sched/mm.h21
-rw-r--r--include/linux/sched/signal.h20
-rw-r--r--include/linux/sched/task.h2
-rw-r--r--include/linux/sched/topology.h8
-rw-r--r--include/linux/scs.h16
-rw-r--r--include/linux/seccomp.h2
-rw-r--r--include/linux/seqlock.h121
-rw-r--r--include/linux/set_memory.h5
-rw-r--r--include/linux/shmem_fs.h6
-rw-r--r--include/linux/signal.h14
-rw-r--r--include/linux/signal_types.h12
-rw-r--r--include/linux/slab.h18
-rw-r--r--include/linux/smp.h19
-rw-r--r--include/linux/stop_machine.h5
-rw-r--r--include/linux/syscall_user_dispatch.h40
-rw-r--r--include/linux/thread_info.h50
-rw-r--r--include/linux/time_namespace.h37
-rw-r--r--include/linux/timekeeping.h2
-rw-r--r--include/linux/timer.h1
-rw-r--r--include/linux/timex.h1
-rw-r--r--include/linux/tracehook.h44
-rw-r--r--include/linux/user_namespace.h5
-rw-r--r--include/linux/utsname.h9
-rw-r--r--include/linux/vmalloc.h8
-rw-r--r--include/linux/vmstat.h104
96 files changed, 2264 insertions, 920 deletions
diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h
index 20a32120bb88..1a12baa58e40 100644
--- a/include/linux/acpi_iort.h
+++ b/include/linux/acpi_iort.h
@@ -38,6 +38,7 @@ void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *size);
const struct iommu_ops *iort_iommu_configure_id(struct device *dev,
const u32 *id_in);
int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head);
+phys_addr_t acpi_iort_dma_get_max_cpu_address(void);
#else
static inline void acpi_iort_init(void) { }
static inline u32 iort_msi_map_id(struct device *dev, u32 id)
@@ -55,6 +56,9 @@ static inline const struct iommu_ops *iort_iommu_configure_id(
static inline
int iort_iommu_msi_get_resv_regions(struct device *dev, struct list_head *head)
{ return 0; }
+
+static inline phys_addr_t acpi_iort_dma_get_max_cpu_address(void)
+{ return PHYS_ADDR_MAX; }
#endif
#endif /* __ACPI_IORT_H__ */
diff --git a/include/linux/atomic-arch-fallback.h b/include/linux/atomic-arch-fallback.h
index bcb6aa27cfa6..a3dba31df01e 100644
--- a/include/linux/atomic-arch-fallback.h
+++ b/include/linux/atomic-arch-fallback.h
@@ -9,9 +9,9 @@
#include <linux/compiler.h>
#ifndef arch_xchg_relaxed
-#define arch_xchg_relaxed arch_xchg
-#define arch_xchg_acquire arch_xchg
-#define arch_xchg_release arch_xchg
+#define arch_xchg_acquire arch_xchg
+#define arch_xchg_release arch_xchg
+#define arch_xchg_relaxed arch_xchg
#else /* arch_xchg_relaxed */
#ifndef arch_xchg_acquire
@@ -32,9 +32,9 @@
#endif /* arch_xchg_relaxed */
#ifndef arch_cmpxchg_relaxed
-#define arch_cmpxchg_relaxed arch_cmpxchg
-#define arch_cmpxchg_acquire arch_cmpxchg
-#define arch_cmpxchg_release arch_cmpxchg
+#define arch_cmpxchg_acquire arch_cmpxchg
+#define arch_cmpxchg_release arch_cmpxchg
+#define arch_cmpxchg_relaxed arch_cmpxchg
#else /* arch_cmpxchg_relaxed */
#ifndef arch_cmpxchg_acquire
@@ -55,9 +55,9 @@
#endif /* arch_cmpxchg_relaxed */
#ifndef arch_cmpxchg64_relaxed
-#define arch_cmpxchg64_relaxed arch_cmpxchg64
-#define arch_cmpxchg64_acquire arch_cmpxchg64
-#define arch_cmpxchg64_release arch_cmpxchg64
+#define arch_cmpxchg64_acquire arch_cmpxchg64
+#define arch_cmpxchg64_release arch_cmpxchg64
+#define arch_cmpxchg64_relaxed arch_cmpxchg64
#else /* arch_cmpxchg64_relaxed */
#ifndef arch_cmpxchg64_acquire
@@ -77,6 +77,76 @@
#endif /* arch_cmpxchg64_relaxed */
+#ifndef arch_try_cmpxchg_relaxed
+#ifdef arch_try_cmpxchg
+#define arch_try_cmpxchg_acquire arch_try_cmpxchg
+#define arch_try_cmpxchg_release arch_try_cmpxchg
+#define arch_try_cmpxchg_relaxed arch_try_cmpxchg
+#endif /* arch_try_cmpxchg */
+
+#ifndef arch_try_cmpxchg
+#define arch_try_cmpxchg(_ptr, _oldp, _new) \
+({ \
+ typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
+ ___r = arch_cmpxchg((_ptr), ___o, (_new)); \
+ if (unlikely(___r != ___o)) \
+ *___op = ___r; \
+ likely(___r == ___o); \
+})
+#endif /* arch_try_cmpxchg */
+
+#ifndef arch_try_cmpxchg_acquire
+#define arch_try_cmpxchg_acquire(_ptr, _oldp, _new) \
+({ \
+ typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
+ ___r = arch_cmpxchg_acquire((_ptr), ___o, (_new)); \
+ if (unlikely(___r != ___o)) \
+ *___op = ___r; \
+ likely(___r == ___o); \
+})
+#endif /* arch_try_cmpxchg_acquire */
+
+#ifndef arch_try_cmpxchg_release
+#define arch_try_cmpxchg_release(_ptr, _oldp, _new) \
+({ \
+ typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
+ ___r = arch_cmpxchg_release((_ptr), ___o, (_new)); \
+ if (unlikely(___r != ___o)) \
+ *___op = ___r; \
+ likely(___r == ___o); \
+})
+#endif /* arch_try_cmpxchg_release */
+
+#ifndef arch_try_cmpxchg_relaxed
+#define arch_try_cmpxchg_relaxed(_ptr, _oldp, _new) \
+({ \
+ typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
+ ___r = arch_cmpxchg_relaxed((_ptr), ___o, (_new)); \
+ if (unlikely(___r != ___o)) \
+ *___op = ___r; \
+ likely(___r == ___o); \
+})
+#endif /* arch_try_cmpxchg_relaxed */
+
+#else /* arch_try_cmpxchg_relaxed */
+
+#ifndef arch_try_cmpxchg_acquire
+#define arch_try_cmpxchg_acquire(...) \
+ __atomic_op_acquire(arch_try_cmpxchg, __VA_ARGS__)
+#endif
+
+#ifndef arch_try_cmpxchg_release
+#define arch_try_cmpxchg_release(...) \
+ __atomic_op_release(arch_try_cmpxchg, __VA_ARGS__)
+#endif
+
+#ifndef arch_try_cmpxchg
+#define arch_try_cmpxchg(...) \
+ __atomic_op_fence(arch_try_cmpxchg, __VA_ARGS__)
+#endif
+
+#endif /* arch_try_cmpxchg_relaxed */
+
#ifndef arch_atomic_read_acquire
static __always_inline int
arch_atomic_read_acquire(const atomic_t *v)
@@ -2288,4 +2358,4 @@ arch_atomic64_dec_if_positive(atomic64_t *v)
#endif
#endif /* _LINUX_ATOMIC_FALLBACK_H */
-// 90cd26cfd69d2250303d654955a0cc12620fb91b
+// cca554917d7ea73d5e3e7397dd70c484cad9b2c4
diff --git a/include/linux/atomic-fallback.h b/include/linux/atomic-fallback.h
index fd525c71d676..2a3f55d98be9 100644
--- a/include/linux/atomic-fallback.h
+++ b/include/linux/atomic-fallback.h
@@ -9,9 +9,9 @@
#include <linux/compiler.h>
#ifndef xchg_relaxed
-#define xchg_relaxed xchg
-#define xchg_acquire xchg
-#define xchg_release xchg
+#define xchg_acquire xchg
+#define xchg_release xchg
+#define xchg_relaxed xchg
#else /* xchg_relaxed */
#ifndef xchg_acquire
@@ -32,9 +32,9 @@
#endif /* xchg_relaxed */
#ifndef cmpxchg_relaxed
-#define cmpxchg_relaxed cmpxchg
-#define cmpxchg_acquire cmpxchg
-#define cmpxchg_release cmpxchg
+#define cmpxchg_acquire cmpxchg
+#define cmpxchg_release cmpxchg
+#define cmpxchg_relaxed cmpxchg
#else /* cmpxchg_relaxed */
#ifndef cmpxchg_acquire
@@ -55,9 +55,9 @@
#endif /* cmpxchg_relaxed */
#ifndef cmpxchg64_relaxed
-#define cmpxchg64_relaxed cmpxchg64
-#define cmpxchg64_acquire cmpxchg64
-#define cmpxchg64_release cmpxchg64
+#define cmpxchg64_acquire cmpxchg64
+#define cmpxchg64_release cmpxchg64
+#define cmpxchg64_relaxed cmpxchg64
#else /* cmpxchg64_relaxed */
#ifndef cmpxchg64_acquire
@@ -77,6 +77,76 @@
#endif /* cmpxchg64_relaxed */
+#ifndef try_cmpxchg_relaxed
+#ifdef try_cmpxchg
+#define try_cmpxchg_acquire try_cmpxchg
+#define try_cmpxchg_release try_cmpxchg
+#define try_cmpxchg_relaxed try_cmpxchg
+#endif /* try_cmpxchg */
+
+#ifndef try_cmpxchg
+#define try_cmpxchg(_ptr, _oldp, _new) \
+({ \
+ typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
+ ___r = cmpxchg((_ptr), ___o, (_new)); \
+ if (unlikely(___r != ___o)) \
+ *___op = ___r; \
+ likely(___r == ___o); \
+})
+#endif /* try_cmpxchg */
+
+#ifndef try_cmpxchg_acquire
+#define try_cmpxchg_acquire(_ptr, _oldp, _new) \
+({ \
+ typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
+ ___r = cmpxchg_acquire((_ptr), ___o, (_new)); \
+ if (unlikely(___r != ___o)) \
+ *___op = ___r; \
+ likely(___r == ___o); \
+})
+#endif /* try_cmpxchg_acquire */
+
+#ifndef try_cmpxchg_release
+#define try_cmpxchg_release(_ptr, _oldp, _new) \
+({ \
+ typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
+ ___r = cmpxchg_release((_ptr), ___o, (_new)); \
+ if (unlikely(___r != ___o)) \
+ *___op = ___r; \
+ likely(___r == ___o); \
+})
+#endif /* try_cmpxchg_release */
+
+#ifndef try_cmpxchg_relaxed
+#define try_cmpxchg_relaxed(_ptr, _oldp, _new) \
+({ \
+ typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \
+ ___r = cmpxchg_relaxed((_ptr), ___o, (_new)); \
+ if (unlikely(___r != ___o)) \
+ *___op = ___r; \
+ likely(___r == ___o); \
+})
+#endif /* try_cmpxchg_relaxed */
+
+#else /* try_cmpxchg_relaxed */
+
+#ifndef try_cmpxchg_acquire
+#define try_cmpxchg_acquire(...) \
+ __atomic_op_acquire(try_cmpxchg, __VA_ARGS__)
+#endif
+
+#ifndef try_cmpxchg_release
+#define try_cmpxchg_release(...) \
+ __atomic_op_release(try_cmpxchg, __VA_ARGS__)
+#endif
+
+#ifndef try_cmpxchg
+#define try_cmpxchg(...) \
+ __atomic_op_fence(try_cmpxchg, __VA_ARGS__)
+#endif
+
+#endif /* try_cmpxchg_relaxed */
+
#define arch_atomic_read atomic_read
#define arch_atomic_read_acquire atomic_read_acquire
@@ -2522,4 +2592,4 @@ atomic64_dec_if_positive(atomic64_t *v)
#endif
#endif /* _LINUX_ATOMIC_FALLBACK_H */
-// 9d95b56f98d82a2a26c7b79ccdd0c47572d50a6f
+// d78e6c293c661c15188f0ec05bce45188c8d5892
diff --git a/include/linux/ccp.h b/include/linux/ccp.h
index a5dfbaf2470d..868924dec5a1 100644
--- a/include/linux/ccp.h
+++ b/include/linux/ccp.h
@@ -15,7 +15,8 @@
#include <linux/workqueue.h>
#include <linux/list.h>
#include <crypto/aes.h>
-#include <crypto/sha.h>
+#include <crypto/sha1.h>
+#include <crypto/sha2.h>
struct ccp_device;
struct ccp_cmd;
diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h
index fee0b5547cd0..559ee05f86b2 100644
--- a/include/linux/cgroup-defs.h
+++ b/include/linux/cgroup-defs.h
@@ -668,21 +668,6 @@ struct cgroup_subsys {
*/
bool threaded:1;
- /*
- * If %false, this subsystem is properly hierarchical -
- * configuration, resource accounting and restriction on a parent
- * cgroup cover those of its children. If %true, hierarchy support
- * is broken in some ways - some subsystems ignore hierarchy
- * completely while others are only implemented half-way.
- *
- * It's now disallowed to create nested cgroups if the subsystem is
- * broken and cgroup core will emit a warning message on such
- * cases. Eventually, all subsystems will be made properly
- * hierarchical and this will go away.
- */
- bool broken_hierarchy:1;
- bool warned_broken_hierarchy:1;
-
/* the following two fields are initialized automtically during boot */
int id;
const char *name;
diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 618838c48313..451c2d26a5db 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -854,7 +854,6 @@ static inline void cgroup_sk_free(struct sock_cgroup_data *skcd) {}
#endif /* CONFIG_CGROUP_DATA */
struct cgroup_namespace {
- refcount_t count;
struct ns_common ns;
struct user_namespace *user_ns;
struct ucounts *ucounts;
@@ -889,12 +888,12 @@ copy_cgroup_ns(unsigned long flags, struct user_namespace *user_ns,
static inline void get_cgroup_ns(struct cgroup_namespace *ns)
{
if (ns)
- refcount_inc(&ns->count);
+ refcount_inc(&ns->ns.count);
}
static inline void put_cgroup_ns(struct cgroup_namespace *ns)
{
- if (ns && refcount_dec_and_test(&ns->count))
+ if (ns && refcount_dec_and_test(&ns->ns.count))
free_cgroup_ns(ns);
}
diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 1de5a1151ee7..ed4070ed41ef 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -98,11 +98,8 @@ extern void reset_isolation_suitable(pg_data_t *pgdat);
extern enum compact_result compaction_suitable(struct zone *zone, int order,
unsigned int alloc_flags, int highest_zoneidx);
-extern void defer_compaction(struct zone *zone, int order);
-extern bool compaction_deferred(struct zone *zone, int order);
extern void compaction_defer_reset(struct zone *zone, int order,
bool alloc_success);
-extern bool compaction_restarting(struct zone *zone, int order);
/* Compaction has made some progress and retrying makes sense */
static inline bool compaction_made_progress(enum compact_result result)
@@ -194,15 +191,6 @@ static inline enum compact_result compaction_suitable(struct zone *zone, int ord
return COMPACT_SKIPPED;
}
-static inline void defer_compaction(struct zone *zone, int order)
-{
-}
-
-static inline bool compaction_deferred(struct zone *zone, int order)
-{
- return true;
-}
-
static inline bool compaction_made_progress(enum compact_result result)
{
return false;
diff --git a/include/linux/completion.h b/include/linux/completion.h
index bf8e77001f18..51d9ab079629 100644
--- a/include/linux/completion.h
+++ b/include/linux/completion.h
@@ -28,8 +28,7 @@ struct completion {
struct swait_queue_head wait;
};
-#define init_completion_map(x, m) __init_completion(x)
-#define init_completion(x) __init_completion(x)
+#define init_completion_map(x, m) init_completion(x)
static inline void complete_acquire(struct completion *x) {}
static inline void complete_release(struct completion *x) {}
@@ -82,7 +81,7 @@ static inline void complete_release(struct completion *x) {}
* This inline function will initialize a dynamically created completion
* structure.
*/
-static inline void __init_completion(struct completion *x)
+static inline void init_completion(struct completion *x)
{
x->done = 0;
init_swait_queue_head(&x->wait);
diff --git a/include/linux/console.h b/include/linux/console.h
index 4b1e26c4cb42..20874db50bc8 100644
--- a/include/linux/console.h
+++ b/include/linux/console.h
@@ -62,7 +62,6 @@ struct consw {
int (*con_font_get)(struct vc_data *vc, struct console_font *font);
int (*con_font_default)(struct vc_data *vc,
struct console_font *font, char *name);
- int (*con_font_copy)(struct vc_data *vc, int con);
int (*con_resize)(struct vc_data *vc, unsigned int width,
unsigned int height, unsigned int user);
void (*con_set_palette)(struct vc_data *vc,
diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h
index d53cd331c4dd..bceb06498521 100644
--- a/include/linux/context_tracking.h
+++ b/include/linux/context_tracking.h
@@ -51,7 +51,8 @@ static inline enum ctx_state exception_enter(void)
{
enum ctx_state prev_ctx;
- if (!context_tracking_enabled())
+ if (IS_ENABLED(CONFIG_HAVE_CONTEXT_TRACKING_OFFSTACK) ||
+ !context_tracking_enabled())
return 0;
prev_ctx = this_cpu_read(context_tracking.state);
@@ -63,7 +64,8 @@ static inline enum ctx_state exception_enter(void)
static inline void exception_exit(enum ctx_state prev_ctx)
{
- if (context_tracking_enabled()) {
+ if (!IS_ENABLED(CONFIG_HAVE_CONTEXT_TRACKING_OFFSTACK) &&
+ context_tracking_enabled()) {
if (prev_ctx != CONTEXT_KERNEL)
context_tracking_enter(prev_ctx);
}
diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index bc56287a1ed1..0042ef362511 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -152,6 +152,7 @@ enum cpuhp_state {
CPUHP_AP_ONLINE,
CPUHP_TEARDOWN_CPU,
CPUHP_AP_ONLINE_IDLE,
+ CPUHP_AP_SCHED_WAIT_EMPTY,
CPUHP_AP_SMPBOOT_THREADS,
CPUHP_AP_X86_VDSO_VMA_ONLINE,
CPUHP_AP_IRQ_AFFINITY_ONLINE,
diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h
index f0d895d6ac39..383684e30f12 100644
--- a/include/linux/cpumask.h
+++ b/include/linux/cpumask.h
@@ -199,6 +199,11 @@ static inline int cpumask_any_and_distribute(const struct cpumask *src1p,
return cpumask_next_and(-1, src1p, src2p);
}
+static inline int cpumask_any_distribute(const struct cpumask *srcp)
+{
+ return cpumask_first(srcp);
+}
+
#define for_each_cpu(cpu, mask) \
for ((cpu) = 0; (cpu) < 1; (cpu)++, (void)mask)
#define for_each_cpu_not(cpu, mask) \
@@ -252,6 +257,7 @@ int cpumask_any_but(const struct cpumask *mask, unsigned int cpu);
unsigned int cpumask_local_spread(unsigned int i, int node);
int cpumask_any_and_distribute(const struct cpumask *src1p,
const struct cpumask *src2p);
+int cpumask_any_distribute(const struct cpumask *srcp);
/**
* for_each_cpu - iterate over every cpu in a mask
diff --git a/include/linux/dma-buf-map.h b/include/linux/dma-buf-map.h
new file mode 100644
index 000000000000..583a3a1f9447
--- /dev/null
+++ b/include/linux/dma-buf-map.h
@@ -0,0 +1,266 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/*
+ * Pointer to dma-buf-mapped memory, plus helpers.
+ */
+
+#ifndef __DMA_BUF_MAP_H__
+#define __DMA_BUF_MAP_H__
+
+#include <linux/io.h>
+#include <linux/string.h>
+
+/**
+ * DOC: overview
+ *
+ * Calling dma-buf's vmap operation returns a pointer to the buffer's memory.
+ * Depending on the location of the buffer, users may have to access it with
+ * I/O operations or memory load/store operations. For example, copying to
+ * system memory could be done with memcpy(), copying to I/O memory would be
+ * done with memcpy_toio().
+ *
+ * .. code-block:: c
+ *
+ * void *vaddr = ...; // pointer to system memory
+ * memcpy(vaddr, src, len);
+ *
+ * void *vaddr_iomem = ...; // pointer to I/O memory
+ * memcpy_toio(vaddr, _iomem, src, len);
+ *
+ * When using dma-buf's vmap operation, the returned pointer is encoded as
+ * :c:type:`struct dma_buf_map <dma_buf_map>`.
+ * :c:type:`struct dma_buf_map <dma_buf_map>` stores the buffer's address in
+ * system or I/O memory and a flag that signals the required method of
+ * accessing the buffer. Use the returned instance and the helper functions
+ * to access the buffer's memory in the correct way.
+ *
+ * The type :c:type:`struct dma_buf_map <dma_buf_map>` and its helpers are
+ * actually independent from the dma-buf infrastructure. When sharing buffers
+ * among devices, drivers have to know the location of the memory to access
+ * the buffers in a safe way. :c:type:`struct dma_buf_map <dma_buf_map>`
+ * solves this problem for dma-buf and its users. If other drivers or
+ * sub-systems require similar functionality, the type could be generalized
+ * and moved to a more prominent header file.
+ *
+ * Open-coding access to :c:type:`struct dma_buf_map <dma_buf_map>` is
+ * considered bad style. Rather then accessing its fields directly, use one
+ * of the provided helper functions, or implement your own. For example,
+ * instances of :c:type:`struct dma_buf_map <dma_buf_map>` can be initialized
+ * statically with DMA_BUF_MAP_INIT_VADDR(), or at runtime with
+ * dma_buf_map_set_vaddr(). These helpers will set an address in system memory.
+ *
+ * .. code-block:: c
+ *
+ * struct dma_buf_map map = DMA_BUF_MAP_INIT_VADDR(0xdeadbeaf);
+ *
+ * dma_buf_map_set_vaddr(&map. 0xdeadbeaf);
+ *
+ * To set an address in I/O memory, use dma_buf_map_set_vaddr_iomem().
+ *
+ * .. code-block:: c
+ *
+ * dma_buf_map_set_vaddr_iomem(&map. 0xdeadbeaf);
+ *
+ * Instances of struct dma_buf_map do not have to be cleaned up, but
+ * can be cleared to NULL with dma_buf_map_clear(). Cleared mappings
+ * always refer to system memory.
+ *
+ * .. code-block:: c
+ *
+ * dma_buf_map_clear(&map);
+ *
+ * Test if a mapping is valid with either dma_buf_map_is_set() or
+ * dma_buf_map_is_null().
+ *
+ * .. code-block:: c
+ *
+ * if (dma_buf_map_is_set(&map) != dma_buf_map_is_null(&map))
+ * // always true
+ *
+ * Instances of :c:type:`struct dma_buf_map <dma_buf_map>` can be compared
+ * for equality with dma_buf_map_is_equal(). Mappings the point to different
+ * memory spaces, system or I/O, are never equal. That's even true if both
+ * spaces are located in the same address space, both mappings contain the
+ * same address value, or both mappings refer to NULL.
+ *
+ * .. code-block:: c
+ *
+ * struct dma_buf_map sys_map; // refers to system memory
+ * struct dma_buf_map io_map; // refers to I/O memory
+ *
+ * if (dma_buf_map_is_equal(&sys_map, &io_map))
+ * // always false
+ *
+ * A set up instance of struct dma_buf_map can be used to access or manipulate
+ * the buffer memory. Depending on the location of the memory, the provided
+ * helpers will pick the correct operations. Data can be copied into the memory
+ * with dma_buf_map_memcpy_to(). The address can be manipulated with
+ * dma_buf_map_incr().
+ *
+ * .. code-block:: c
+ *
+ * const void *src = ...; // source buffer
+ * size_t len = ...; // length of src
+ *
+ * dma_buf_map_memcpy_to(&map, src, len);
+ * dma_buf_map_incr(&map, len); // go to first byte after the memcpy
+ */
+
+/**
+ * struct dma_buf_map - Pointer to vmap'ed dma-buf memory.
+ * @vaddr_iomem: The buffer's address if in I/O memory
+ * @vaddr: The buffer's address if in system memory
+ * @is_iomem: True if the dma-buf memory is located in I/O
+ * memory, or false otherwise.
+ */
+struct dma_buf_map {
+ union {
+ void __iomem *vaddr_iomem;
+ void *vaddr;
+ };
+ bool is_iomem;
+};
+
+/**
+ * DMA_BUF_MAP_INIT_VADDR - Initializes struct dma_buf_map to an address in system memory
+ * @vaddr: A system-memory address
+ */
+#define DMA_BUF_MAP_INIT_VADDR(vaddr_) \
+ { \
+ .vaddr = (vaddr_), \
+ .is_iomem = false, \
+ }
+
+/**
+ * dma_buf_map_set_vaddr - Sets a dma-buf mapping structure to an address in system memory
+ * @map: The dma-buf mapping structure
+ * @vaddr: A system-memory address
+ *
+ * Sets the address and clears the I/O-memory flag.
+ */
+static inline void dma_buf_map_set_vaddr(struct dma_buf_map *map, void *vaddr)
+{
+ map->vaddr = vaddr;
+ map->is_iomem = false;
+}
+
+/**
+ * dma_buf_map_set_vaddr_iomem - Sets a dma-buf mapping structure to an address in I/O memory
+ * @map: The dma-buf mapping structure
+ * @vaddr_iomem: An I/O-memory address
+ *
+ * Sets the address and the I/O-memory flag.
+ */
+static inline void dma_buf_map_set_vaddr_iomem(struct dma_buf_map *map,
+ void __iomem *vaddr_iomem)
+{
+ map->vaddr_iomem = vaddr_iomem;
+ map->is_iomem = true;
+}
+
+/**
+ * dma_buf_map_is_equal - Compares two dma-buf mapping structures for equality
+ * @lhs: The dma-buf mapping structure
+ * @rhs: A dma-buf mapping structure to compare with
+ *
+ * Two dma-buf mapping structures are equal if they both refer to the same type of memory
+ * and to the same address within that memory.
+ *
+ * Returns:
+ * True is both structures are equal, or false otherwise.
+ */
+static inline bool dma_buf_map_is_equal(const struct dma_buf_map *lhs,
+ const struct dma_buf_map *rhs)
+{
+ if (lhs->is_iomem != rhs->is_iomem)
+ return false;
+ else if (lhs->is_iomem)
+ return lhs->vaddr_iomem == rhs->vaddr_iomem;
+ else
+ return lhs->vaddr == rhs->vaddr;
+}
+
+/**
+ * dma_buf_map_is_null - Tests for a dma-buf mapping to be NULL
+ * @map: The dma-buf mapping structure
+ *
+ * Depending on the state of struct dma_buf_map.is_iomem, tests if the
+ * mapping is NULL.
+ *
+ * Returns:
+ * True if the mapping is NULL, or false otherwise.
+ */
+static inline bool dma_buf_map_is_null(const struct dma_buf_map *map)
+{
+ if (map->is_iomem)
+ return !map->vaddr_iomem;
+ return !map->vaddr;
+}
+
+/**
+ * dma_buf_map_is_set - Tests is the dma-buf mapping has been set
+ * @map: The dma-buf mapping structure
+ *
+ * Depending on the state of struct dma_buf_map.is_iomem, tests if the
+ * mapping has been set.
+ *
+ * Returns:
+ * True if the mapping is been set, or false otherwise.
+ */
+static inline bool dma_buf_map_is_set(const struct dma_buf_map *map)
+{
+ return !dma_buf_map_is_null(map);
+}
+
+/**
+ * dma_buf_map_clear - Clears a dma-buf mapping structure
+ * @map: The dma-buf mapping structure
+ *
+ * Clears all fields to zero; including struct dma_buf_map.is_iomem. So
+ * mapping structures that were set to point to I/O memory are reset for
+ * system memory. Pointers are cleared to NULL. This is the default.
+ */
+static inline void dma_buf_map_clear(struct dma_buf_map *map)
+{
+ if (map->is_iomem) {
+ map->vaddr_iomem = NULL;
+ map->is_iomem = false;
+ } else {
+ map->vaddr = NULL;
+ }
+}
+
+/**
+ * dma_buf_map_memcpy_to - Memcpy into dma-buf mapping
+ * @dst: The dma-buf mapping structure
+ * @src: The source buffer
+ * @len: The number of byte in src
+ *
+ * Copies data into a dma-buf mapping. The source buffer is in system
+ * memory. Depending on the buffer's location, the helper picks the correct
+ * method of accessing the memory.
+ */
+static inline void dma_buf_map_memcpy_to(struct dma_buf_map *dst, const void *src, size_t len)
+{
+ if (dst->is_iomem)
+ memcpy_toio(dst->vaddr_iomem, src, len);
+ else
+ memcpy(dst->vaddr, src, len);
+}
+
+/**
+ * dma_buf_map_incr - Increments the address stored in a dma-buf mapping
+ * @map: The dma-buf mapping structure
+ * @incr: The number of bytes to increment
+ *
+ * Increments the address stored in a dma-buf mapping. Depending on the
+ * buffer's location, the correct value will be updated.
+ */
+static inline void dma_buf_map_incr(struct dma_buf_map *map, size_t incr)
+{
+ if (map->is_iomem)
+ map->vaddr_iomem += incr;
+ else
+ map->vaddr += incr;
+}
+
+#endif /* __DMA_BUF_MAP_H__ */
diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h
index 957b398d30e5..cf72699cb2bc 100644
--- a/include/linux/dma-buf.h
+++ b/include/linux/dma-buf.h
@@ -13,6 +13,7 @@
#ifndef __DMA_BUF_H__
#define __DMA_BUF_H__
+#include <linux/dma-buf-map.h>
#include <linux/file.h>
#include <linux/err.h>
#include <linux/scatterlist.h>
@@ -145,7 +146,8 @@ struct dma_buf_ops {
*
* A &sg_table scatter list of or the backing storage of the DMA buffer,
* already mapped into the device address space of the &device attached
- * with the provided &dma_buf_attachment.
+ * with the provided &dma_buf_attachment. The addresses and lengths in
+ * the scatter list are PAGE_SIZE aligned.
*
* On failure, returns a negative error value wrapped into a pointer.
* May also return -EINTR when a signal was received while being
@@ -265,13 +267,13 @@ struct dma_buf_ops {
*/
int (*mmap)(struct dma_buf *, struct vm_area_struct *vma);
- void *(*vmap)(struct dma_buf *);
- void (*vunmap)(struct dma_buf *, void *vaddr);
+ int (*vmap)(struct dma_buf *dmabuf, struct dma_buf_map *map);
+ void (*vunmap)(struct dma_buf *dmabuf, struct dma_buf_map *map);
};
/**
* struct dma_buf - shared buffer object
- * @size: size of the buffer
+ * @size: size of the buffer; invariant over the lifetime of the buffer.
* @file: file pointer used for sharing buffers across, and for refcounting.
* @attachments: list of dma_buf_attachment that denotes all devices attached,
* protected by dma_resv lock.
@@ -309,7 +311,7 @@ struct dma_buf {
const struct dma_buf_ops *ops;
struct mutex lock;
unsigned vmapping_counter;
- void *vmap_ptr;
+ struct dma_buf_map vmap_ptr;
const char *exp_name;
const char *name;
spinlock_t name_lock;
@@ -403,7 +405,7 @@ struct dma_buf_attachment {
* @exp_name: name of the exporter - useful for debugging.
* @owner: pointer to exporter module - used for refcounting kernel module
* @ops: Attach allocator-defined dma buf ops to the new buffer
- * @size: Size of the buffer
+ * @size: Size of the buffer - invariant over the lifetime of the buffer
* @flags: mode flags for the file
* @resv: reservation-object, NULL to allocate default one
* @priv: Attach private data of allocator to this buffer
@@ -502,6 +504,6 @@ int dma_buf_end_cpu_access(struct dma_buf *dma_buf,
int dma_buf_mmap(struct dma_buf *, struct vm_area_struct *,
unsigned long);
-void *dma_buf_vmap(struct dma_buf *);
-void dma_buf_vunmap(struct dma_buf *, void *vaddr);
+int dma_buf_vmap(struct dma_buf *dmabuf, struct dma_buf_map *map);
+void dma_buf_vunmap(struct dma_buf *dmabuf, struct dma_buf_map *map);
#endif /* __DMA_BUF_H__ */
diff --git a/include/linux/edac.h b/include/linux/edac.h
index 15e8f3d8a895..76d3562d3006 100644
--- a/include/linux/edac.h
+++ b/include/linux/edac.h
@@ -175,11 +175,15 @@ static inline char *mc_event_error_type(const unsigned int err_type)
* @MEM_RDDR3: Registered DDR3 RAM
* This is a variant of the DDR3 memories.
* @MEM_LRDDR3: Load-Reduced DDR3 memory.
+ * @MEM_LPDDR3: Low-Power DDR3 memory.
* @MEM_DDR4: Unbuffered DDR4 RAM
* @MEM_RDDR4: Registered DDR4 RAM
* This is a variant of the DDR4 memories.
* @MEM_LRDDR4: Load-Reduced DDR4 memory.
+ * @MEM_LPDDR4: Low-Power DDR4 memory.
+ * @MEM_DDR5: Unbuffered DDR5 RAM
* @MEM_NVDIMM: Non-volatile RAM
+ * @MEM_WIO2: Wide I/O 2.
*/
enum mem_type {
MEM_EMPTY = 0,
@@ -200,10 +204,14 @@ enum mem_type {
MEM_DDR3,
MEM_RDDR3,
MEM_LRDDR3,
+ MEM_LPDDR3,
MEM_DDR4,
MEM_RDDR4,
MEM_LRDDR4,
+ MEM_LPDDR4,
+ MEM_DDR5,
MEM_NVDIMM,
+ MEM_WIO2,
};
#define MEM_FLAG_EMPTY BIT(MEM_EMPTY)
@@ -223,13 +231,17 @@ enum mem_type {
#define MEM_FLAG_XDR BIT(MEM_XDR)
#define MEM_FLAG_DDR3 BIT(MEM_DDR3)
#define MEM_FLAG_RDDR3 BIT(MEM_RDDR3)
+#define MEM_FLAG_LPDDR3 BIT(MEM_LPDDR3)
#define MEM_FLAG_DDR4 BIT(MEM_DDR4)
#define MEM_FLAG_RDDR4 BIT(MEM_RDDR4)
#define MEM_FLAG_LRDDR4 BIT(MEM_LRDDR4)
+#define MEM_FLAG_LPDDR4 BIT(MEM_LPDDR4)
+#define MEM_FLAG_DDR5 BIT(MEM_DDR5)
#define MEM_FLAG_NVDIMM BIT(MEM_NVDIMM)
+#define MEM_FLAG_WIO2 BIT(MEM_WIO2)
/**
- * enum edac-type - Error Detection and Correction capabilities and mode
+ * enum edac_type - Error Detection and Correction capabilities and mode
* @EDAC_UNKNOWN: Unknown if ECC is available
* @EDAC_NONE: Doesn't support ECC
* @EDAC_RESERVED: Reserved ECC type
@@ -309,7 +321,7 @@ enum scrub_type {
#define OP_OFFLINE 0x300
/**
- * enum edac_mc_layer - memory controller hierarchy layer
+ * enum edac_mc_layer_type - memory controller hierarchy layer
*
* @EDAC_MC_LAYER_BRANCH: memory layer is named "branch"
* @EDAC_MC_LAYER_CHANNEL: memory layer is named "channel"
diff --git a/include/linux/elf.h b/include/linux/elf.h
index 5d5b0321da0b..c9a46c4e183b 100644
--- a/include/linux/elf.h
+++ b/include/linux/elf.h
@@ -22,6 +22,16 @@
SET_PERSONALITY(ex)
#endif
+#ifndef START_THREAD
+#define START_THREAD(elf_ex, regs, elf_entry, start_stack) \
+ start_thread(regs, elf_entry, start_stack)
+#endif
+
+#if defined(ARCH_HAS_SETUP_ADDITIONAL_PAGES) && !defined(ARCH_SETUP_ADDITIONAL_PAGES)
+#define ARCH_SETUP_ADDITIONAL_PAGES(bprm, ex, interpreter) \
+ arch_setup_additional_pages(bprm, interpreter)
+#endif
+
#define ELF32_GNU_PROPERTY_ALIGN 4
#define ELF64_GNU_PROPERTY_ALIGN 8
diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h
index 474f29638d2c..7c581a4c3797 100644
--- a/include/linux/entry-common.h
+++ b/include/linux/entry-common.h
@@ -13,22 +13,6 @@
* Define dummy _TIF work flags if not defined by the architecture or for
* disabled functionality.
*/
-#ifndef _TIF_SYSCALL_EMU
-# define _TIF_SYSCALL_EMU (0)
-#endif
-
-#ifndef _TIF_SYSCALL_TRACEPOINT
-# define _TIF_SYSCALL_TRACEPOINT (0)
-#endif
-
-#ifndef _TIF_SECCOMP
-# define _TIF_SECCOMP (0)
-#endif
-
-#ifndef _TIF_SYSCALL_AUDIT
-# define _TIF_SYSCALL_AUDIT (0)
-#endif
-
#ifndef _TIF_PATCH_PENDING
# define _TIF_PATCH_PENDING (0)
#endif
@@ -37,28 +21,36 @@
# define _TIF_UPROBE (0)
#endif
+#ifndef _TIF_NOTIFY_SIGNAL
+# define _TIF_NOTIFY_SIGNAL (0)
+#endif
+
/*
- * TIF flags handled in syscall_enter_from_user_mode()
+ * SYSCALL_WORK flags handled in syscall_enter_from_user_mode()
*/
-#ifndef ARCH_SYSCALL_ENTER_WORK
-# define ARCH_SYSCALL_ENTER_WORK (0)
+#ifndef ARCH_SYSCALL_WORK_ENTER
+# define ARCH_SYSCALL_WORK_ENTER (0)
#endif
-#define SYSCALL_ENTER_WORK \
- (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | _TIF_SECCOMP | \
- _TIF_SYSCALL_TRACEPOINT | _TIF_SYSCALL_EMU | \
- ARCH_SYSCALL_ENTER_WORK)
-
/*
- * TIF flags handled in syscall_exit_to_user_mode()
+ * SYSCALL_WORK flags handled in syscall_exit_to_user_mode()
*/
-#ifndef ARCH_SYSCALL_EXIT_WORK
-# define ARCH_SYSCALL_EXIT_WORK (0)
+#ifndef ARCH_SYSCALL_WORK_EXIT
+# define ARCH_SYSCALL_WORK_EXIT (0)
#endif
-#define SYSCALL_EXIT_WORK \
- (_TIF_SYSCALL_TRACE | _TIF_SYSCALL_AUDIT | \
- _TIF_SYSCALL_TRACEPOINT | ARCH_SYSCALL_EXIT_WORK)
+#define SYSCALL_WORK_ENTER (SYSCALL_WORK_SECCOMP | \
+ SYSCALL_WORK_SYSCALL_TRACEPOINT | \
+ SYSCALL_WORK_SYSCALL_TRACE | \
+ SYSCALL_WORK_SYSCALL_EMU | \
+ SYSCALL_WORK_SYSCALL_AUDIT | \
+ SYSCALL_WORK_SYSCALL_USER_DISPATCH | \
+ ARCH_SYSCALL_WORK_ENTER)
+#define SYSCALL_WORK_EXIT (SYSCALL_WORK_SYSCALL_TRACEPOINT | \
+ SYSCALL_WORK_SYSCALL_TRACE | \
+ SYSCALL_WORK_SYSCALL_AUDIT | \
+ SYSCALL_WORK_SYSCALL_USER_DISPATCH | \
+ ARCH_SYSCALL_WORK_EXIT)
/*
* TIF flags handled in exit_to_user_mode_loop()
@@ -69,7 +61,7 @@
#define EXIT_TO_USER_MODE_WORK \
(_TIF_SIGPENDING | _TIF_NOTIFY_RESUME | _TIF_UPROBE | \
- _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | \
+ _TIF_NEED_RESCHED | _TIF_PATCH_PENDING | _TIF_NOTIFY_SIGNAL | \
ARCH_EXIT_TO_USER_MODE_WORK)
/**
@@ -110,6 +102,27 @@ static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs
#endif
/**
+ * enter_from_user_mode - Establish state when coming from user mode
+ *
+ * Syscall/interrupt entry disables interrupts, but user mode is traced as
+ * interrupts enabled. Also with NO_HZ_FULL RCU might be idle.
+ *
+ * 1) Tell lockdep that interrupts are disabled
+ * 2) Invoke context tracking if enabled to reactivate RCU
+ * 3) Trace interrupts off state
+ *
+ * Invoked from architecture specific syscall entry code with interrupts
+ * disabled. The calling code has to be non-instrumentable. When the
+ * function returns all state is correct and interrupts are still
+ * disabled. The subsequent functions can be instrumented.
+ *
+ * This is invoked when there is architecture specific functionality to be
+ * done between establishing state and enabling interrupts. The caller must
+ * enable interrupts before invoking syscall_enter_from_user_mode_work().
+ */
+void enter_from_user_mode(struct pt_regs *regs);
+
+/**
* syscall_enter_from_user_mode_prepare - Establish state and enable interrupts
* @regs: Pointer to currents pt_regs
*
@@ -118,7 +131,8 @@ static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs
* function returns all state is correct, interrupts are enabled and the
* subsequent functions can be instrumented.
*
- * This handles lockdep, RCU (context tracking) and tracing state.
+ * This handles lockdep, RCU (context tracking) and tracing state, i.e.
+ * the functionality provided by enter_from_user_mode().
*
* This is invoked when there is extra architecture specific functionality
* to be done between establishing state and handling user mode entry work.
@@ -144,8 +158,8 @@ void syscall_enter_from_user_mode_prepare(struct pt_regs *regs);
*
* It handles the following work items:
*
- * 1) TIF flag dependent invocations of arch_syscall_enter_tracehook(),
- * __secure_computing(), trace_sys_enter()
+ * 1) syscall_work flag dependent invocations of
+ * arch_syscall_enter_tracehook(), __secure_computing(), trace_sys_enter()
* 2) Invocation of audit_syscall_entry()
*/
long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall);
@@ -259,12 +273,13 @@ static __always_inline void arch_exit_to_user_mode(void) { }
#endif
/**
- * arch_do_signal - Architecture specific signal delivery function
+ * arch_do_signal_or_restart - Architecture specific signal delivery function
* @regs: Pointer to currents pt_regs
+ * @has_signal: actual signal to handle
*
* Invoked from exit_to_user_mode_loop().
*/
-void arch_do_signal(struct pt_regs *regs);
+void arch_do_signal_or_restart(struct pt_regs *regs, bool has_signal);
/**
* arch_syscall_exit_tracehook - Wrapper around tracehook_report_syscall_exit()
@@ -286,6 +301,41 @@ static inline void arch_syscall_exit_tracehook(struct pt_regs *regs, bool step)
#endif
/**
+ * exit_to_user_mode - Fixup state when exiting to user mode
+ *
+ * Syscall/interrupt exit enables interrupts, but the kernel state is
+ * interrupts disabled when this is invoked. Also tell RCU about it.
+ *
+ * 1) Trace interrupts on state
+ * 2) Invoke context tracking if enabled to adjust RCU state
+ * 3) Invoke architecture specific last minute exit code, e.g. speculation
+ * mitigations, etc.: arch_exit_to_user_mode()
+ * 4) Tell lockdep that interrupts are enabled
+ *
+ * Invoked from architecture specific code when syscall_exit_to_user_mode()
+ * is not suitable as the last step before returning to userspace. Must be
+ * invoked with interrupts disabled and the caller must be
+ * non-instrumentable.
+ * The caller has to invoke syscall_exit_to_user_mode_work() before this.
+ */
+void exit_to_user_mode(void);
+
+/**
+ * syscall_exit_to_user_mode_work - Handle work before returning to user mode
+ * @regs: Pointer to currents pt_regs
+ *
+ * Same as step 1 and 2 of syscall_exit_to_user_mode() but without calling
+ * exit_to_user_mode() to perform the final transition to user mode.
+ *
+ * Calling convention is the same as for syscall_exit_to_user_mode() and it
+ * returns with all work handled and interrupts disabled. The caller must
+ * invoke exit_to_user_mode() before actually switching to user mode to
+ * make the final state transitions. Interrupts must stay disabled between
+ * return from this function and the invocation of exit_to_user_mode().
+ */
+void syscall_exit_to_user_mode_work(struct pt_regs *regs);
+
+/**
* syscall_exit_to_user_mode - Handle work before returning to user mode
* @regs: Pointer to currents pt_regs
*
@@ -307,8 +357,12 @@ static inline void arch_syscall_exit_tracehook(struct pt_regs *regs, bool step)
* - Architecture specific one time work arch_exit_to_user_mode_prepare()
* - Address limit and lockdep checks
*
- * 3) Final transition (lockdep, tracing, context tracking, RCU). Invokes
- * arch_exit_to_user_mode() to handle e.g. speculation mitigations
+ * 3) Final transition (lockdep, tracing, context tracking, RCU), i.e. the
+ * functionality in exit_to_user_mode().
+ *
+ * This is a combination of syscall_exit_to_user_mode_work() (1,2) and
+ * exit_to_user_mode(). This function is preferred unless there is a
+ * compelling architectural reason to use the seperate functions.
*/
void syscall_exit_to_user_mode(struct pt_regs *regs);
@@ -341,8 +395,26 @@ void irqentry_enter_from_user_mode(struct pt_regs *regs);
void irqentry_exit_to_user_mode(struct pt_regs *regs);
#ifndef irqentry_state
+/**
+ * struct irqentry_state - Opaque object for exception state storage
+ * @exit_rcu: Used exclusively in the irqentry_*() calls; signals whether the
+ * exit path has to invoke rcu_irq_exit().
+ * @lockdep: Used exclusively in the irqentry_nmi_*() calls; ensures that
+ * lockdep state is restored correctly on exit from nmi.
+ *
+ * This opaque object is filled in by the irqentry_*_enter() functions and
+ * must be passed back into the corresponding irqentry_*_exit() functions
+ * when the exception is complete.
+ *
+ * Callers of irqentry_*_[enter|exit]() must consider this structure opaque
+ * and all members private. Descriptions of the members are provided to aid in
+ * the maintenance of the irqentry_*() functions.
+ */
typedef struct irqentry_state {
- bool exit_rcu;
+ union {
+ bool exit_rcu;
+ bool lockdep;
+ };
} irqentry_state_t;
#endif
@@ -392,7 +464,7 @@ void irqentry_exit_cond_resched(void);
* @state: Return value from matching call to irqentry_enter()
*
* Depending on the return target (kernel/user) this runs the necessary
- * preemption and work checks if possible and reguired and returns to
+ * preemption and work checks if possible and required and returns to
* the caller with interrupts disabled and no further work pending.
*
* This is the last action before returning to the low level ASM code which
@@ -402,4 +474,23 @@ void irqentry_exit_cond_resched(void);
*/
void noinstr irqentry_exit(struct pt_regs *regs, irqentry_state_t state);
+/**
+ * irqentry_nmi_enter - Handle NMI entry
+ * @regs: Pointer to currents pt_regs
+ *
+ * Similar to irqentry_enter() but taking care of the NMI constraints.
+ */
+irqentry_state_t noinstr irqentry_nmi_enter(struct pt_regs *regs);
+
+/**
+ * irqentry_nmi_exit - Handle return from NMI handling
+ * @regs: Pointer to pt_regs (NMI entry regs)
+ * @irq_state: Return value from matching call to irqentry_nmi_enter()
+ *
+ * Last action before returning to the low level assembly code.
+ *
+ * Counterpart to irqentry_nmi_enter().
+ */
+void noinstr irqentry_nmi_exit(struct pt_regs *regs, irqentry_state_t irq_state);
+
#endif
diff --git a/include/linux/entry-kvm.h b/include/linux/entry-kvm.h
index 0cef17afb41a..9b93f8584ff7 100644
--- a/include/linux/entry-kvm.h
+++ b/include/linux/entry-kvm.h
@@ -11,8 +11,8 @@
# define ARCH_XFER_TO_GUEST_MODE_WORK (0)
#endif
-#define XFER_TO_GUEST_MODE_WORK \
- (_TIF_NEED_RESCHED | _TIF_SIGPENDING | \
+#define XFER_TO_GUEST_MODE_WORK \
+ (_TIF_NEED_RESCHED | _TIF_SIGPENDING | _TIF_NOTIFY_SIGNAL | \
_TIF_NOTIFY_RESUME | ARCH_XFER_TO_GUEST_MODE_WORK)
struct kvm_vcpu;
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 1b62397bd124..29c27656165b 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -21,7 +21,7 @@
#include <linux/if_vlan.h>
#include <linux/vmalloc.h>
#include <linux/sockptr.h>
-#include <crypto/sha.h>
+#include <crypto/sha1.h>
#include <net/sch_generic.h>
diff --git a/include/linux/font.h b/include/linux/font.h
index b5b312c19e46..abf1442ce719 100644
--- a/include/linux/font.h
+++ b/include/linux/font.h
@@ -16,7 +16,8 @@
struct font_desc {
int idx;
const char *name;
- int width, height;
+ unsigned int width, height;
+ unsigned int charcount;
const void *data;
int pref;
};
diff --git a/include/linux/freelist.h b/include/linux/freelist.h
new file mode 100644
index 000000000000..fc1842b96469
--- /dev/null
+++ b/include/linux/freelist.h
@@ -0,0 +1,129 @@
+/* SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause */
+#ifndef FREELIST_H
+#define FREELIST_H
+
+#include <linux/atomic.h>
+
+/*
+ * Copyright: cameron@moodycamel.com
+ *
+ * A simple CAS-based lock-free free list. Not the fastest thing in the world
+ * under heavy contention, but simple and correct (assuming nodes are never
+ * freed until after the free list is destroyed), and fairly speedy under low
+ * contention.
+ *
+ * Adapted from: https://moodycamel.com/blog/2014/solving-the-aba-problem-for-lock-free-free-lists
+ */
+
+struct freelist_node {
+ atomic_t refs;
+ struct freelist_node *next;
+};
+
+struct freelist_head {
+ struct freelist_node *head;
+};
+
+#define REFS_ON_FREELIST 0x80000000
+#define REFS_MASK 0x7FFFFFFF
+
+static inline void __freelist_add(struct freelist_node *node, struct freelist_head *list)
+{
+ /*
+ * Since the refcount is zero, and nobody can increase it once it's
+ * zero (except us, and we run only one copy of this method per node at
+ * a time, i.e. the single thread case), then we know we can safely
+ * change the next pointer of the node; however, once the refcount is
+ * back above zero, then other threads could increase it (happens under
+ * heavy contention, when the refcount goes to zero in between a load
+ * and a refcount increment of a node in try_get, then back up to
+ * something non-zero, then the refcount increment is done by the other
+ * thread) -- so if the CAS to add the node to the actual list fails,
+ * decrese the refcount and leave the add operation to the next thread
+ * who puts the refcount back to zero (which could be us, hence the
+ * loop).
+ */
+ struct freelist_node *head = READ_ONCE(list->head);
+
+ for (;;) {
+ WRITE_ONCE(node->next, head);
+ atomic_set_release(&node->refs, 1);
+
+ if (!try_cmpxchg_release(&list->head, &head, node)) {
+ /*
+ * Hmm, the add failed, but we can only try again when
+ * the refcount goes back to zero.
+ */
+ if (atomic_fetch_add_release(REFS_ON_FREELIST - 1, &node->refs) == 1)
+ continue;
+ }
+ return;
+ }
+}
+
+static inline void freelist_add(struct freelist_node *node, struct freelist_head *list)
+{
+ /*
+ * We know that the should-be-on-freelist bit is 0 at this point, so
+ * it's safe to set it using a fetch_add.
+ */
+ if (!atomic_fetch_add_release(REFS_ON_FREELIST, &node->refs)) {
+ /*
+ * Oh look! We were the last ones referencing this node, and we
+ * know we want to add it to the free list, so let's do it!
+ */
+ __freelist_add(node, list);
+ }
+}
+
+static inline struct freelist_node *freelist_try_get(struct freelist_head *list)
+{
+ struct freelist_node *prev, *next, *head = smp_load_acquire(&list->head);
+ unsigned int refs;
+
+ while (head) {
+ prev = head;
+ refs = atomic_read(&head->refs);
+ if ((refs & REFS_MASK) == 0 ||
+ !atomic_try_cmpxchg_acquire(&head->refs, &refs, refs+1)) {
+ head = smp_load_acquire(&list->head);
+ continue;
+ }
+
+ /*
+ * Good, reference count has been incremented (it wasn't at
+ * zero), which means we can read the next and not worry about
+ * it changing between now and the time we do the CAS.
+ */
+ next = READ_ONCE(head->next);
+ if (try_cmpxchg_acquire(&list->head, &head, next)) {
+ /*
+ * Yay, got the node. This means it was on the list,
+ * which means should-be-on-freelist must be false no
+ * matter the refcount (because nobody else knows it's
+ * been taken off yet, it can't have been put back on).
+ */
+ WARN_ON_ONCE(atomic_read(&head->refs) & REFS_ON_FREELIST);
+
+ /*
+ * Decrease refcount twice, once for our ref, and once
+ * for the list's ref.
+ */
+ atomic_fetch_add(-2, &head->refs);
+
+ return head;
+ }
+
+ /*
+ * OK, the head must have changed on us, but we still need to decrement
+ * the refcount we increased.
+ */
+ refs = atomic_fetch_add(-1, &prev->refs);
+ if (refs == REFS_ON_FREELIST + 1)
+ __freelist_add(prev, list);
+ }
+
+ return NULL;
+}
+
+#endif /* FREELIST_H */
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 8667d0cdc71e..1fcc2b00582b 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3230,7 +3230,7 @@ static inline bool vma_is_fsdax(struct vm_area_struct *vma)
{
struct inode *inode;
- if (!vma->vm_file)
+ if (!IS_ENABLED(CONFIG_FS_DAX) || !vma->vm_file)
return false;
if (!vma_is_dax(vma))
return false;
diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h
index a8f7a43f031b..d23156d1ac94 100644
--- a/include/linux/fscrypt.h
+++ b/include/linux/fscrypt.h
@@ -75,7 +75,7 @@ struct fscrypt_operations {
static inline struct fscrypt_info *fscrypt_get_info(const struct inode *inode)
{
/*
- * Pairs with the cmpxchg_release() in fscrypt_get_encryption_info().
+ * Pairs with the cmpxchg_release() in fscrypt_setup_encryption_info().
* I.e., another task may publish ->i_crypt_info concurrently, executing
* a RELEASE barrier. We need to use smp_load_acquire() here to safely
* ACQUIRE the memory the other task published.
@@ -111,6 +111,35 @@ static inline void fscrypt_handle_d_move(struct dentry *dentry)
dentry->d_flags &= ~DCACHE_NOKEY_NAME;
}
+/**
+ * fscrypt_is_nokey_name() - test whether a dentry is a no-key name
+ * @dentry: the dentry to check
+ *
+ * This returns true if the dentry is a no-key dentry. A no-key dentry is a
+ * dentry that was created in an encrypted directory that hasn't had its
+ * encryption key added yet. Such dentries may be either positive or negative.
+ *
+ * When a filesystem is asked to create a new filename in an encrypted directory
+ * and the new filename's dentry is a no-key dentry, it must fail the operation
+ * with ENOKEY. This includes ->create(), ->mkdir(), ->mknod(), ->symlink(),
+ * ->rename(), and ->link(). (However, ->rename() and ->link() are already
+ * handled by fscrypt_prepare_rename() and fscrypt_prepare_link().)
+ *
+ * This is necessary because creating a filename requires the directory's
+ * encryption key, but just checking for the key on the directory inode during
+ * the final filesystem operation doesn't guarantee that the key was available
+ * during the preceding dentry lookup. And the key must have already been
+ * available during the dentry lookup in order for it to have been checked
+ * whether the filename already exists in the directory and for the new file's
+ * dentry not to be invalidated due to it incorrectly having the no-key flag.
+ *
+ * Return: %true if the dentry is a no-key name
+ */
+static inline bool fscrypt_is_nokey_name(const struct dentry *dentry)
+{
+ return dentry->d_flags & DCACHE_NOKEY_NAME;
+}
+
/* crypto.c */
void fscrypt_enqueue_decrypt_work(struct work_struct *);
@@ -171,7 +200,6 @@ int fscrypt_ioctl_remove_key_all_users(struct file *filp, void __user *arg);
int fscrypt_ioctl_get_key_status(struct file *filp, void __user *arg);
/* keysetup.c */
-int fscrypt_get_encryption_info(struct inode *inode);
int fscrypt_prepare_new_inode(struct inode *dir, struct inode *inode,
bool *encrypt_ret);
void fscrypt_put_encryption_info(struct inode *inode);
@@ -213,6 +241,8 @@ int __fscrypt_prepare_rename(struct inode *old_dir, struct dentry *old_dentry,
unsigned int flags);
int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry,
struct fscrypt_name *fname);
+int __fscrypt_prepare_readdir(struct inode *dir);
+int __fscrypt_prepare_setattr(struct dentry *dentry, struct iattr *attr);
int fscrypt_prepare_setflags(struct inode *inode,
unsigned int oldflags, unsigned int flags);
int fscrypt_prepare_symlink(struct inode *dir, const char *target,
@@ -244,6 +274,11 @@ static inline void fscrypt_handle_d_move(struct dentry *dentry)
{
}
+static inline bool fscrypt_is_nokey_name(const struct dentry *dentry)
+{
+ return false;
+}
+
/* crypto.c */
static inline void fscrypt_enqueue_decrypt_work(struct work_struct *work)
{
@@ -372,10 +407,6 @@ static inline int fscrypt_ioctl_get_key_status(struct file *filp,
}
/* keysetup.c */
-static inline int fscrypt_get_encryption_info(struct inode *inode)
-{
- return -EOPNOTSUPP;
-}
static inline int fscrypt_prepare_new_inode(struct inode *dir,
struct inode *inode,
@@ -503,6 +534,17 @@ static inline int __fscrypt_prepare_lookup(struct inode *dir,
return -EOPNOTSUPP;
}
+static inline int __fscrypt_prepare_readdir(struct inode *dir)
+{
+ return -EOPNOTSUPP;
+}
+
+static inline int __fscrypt_prepare_setattr(struct dentry *dentry,
+ struct iattr *attr)
+{
+ return -EOPNOTSUPP;
+}
+
static inline int fscrypt_prepare_setflags(struct inode *inode,
unsigned int oldflags,
unsigned int flags)
@@ -642,32 +684,6 @@ static inline bool fscrypt_has_encryption_key(const struct inode *inode)
}
/**
- * fscrypt_require_key() - require an inode's encryption key
- * @inode: the inode we need the key for
- *
- * If the inode is encrypted, set up its encryption key if not already done.
- * Then require that the key be present and return -ENOKEY otherwise.
- *
- * No locks are needed, and the key will live as long as the struct inode --- so
- * it won't go away from under you.
- *
- * Return: 0 on success, -ENOKEY if the key is missing, or another -errno code
- * if a problem occurred while setting up the encryption key.
- */
-static inline int fscrypt_require_key(struct inode *inode)
-{
- if (IS_ENCRYPTED(inode)) {
- int err = fscrypt_get_encryption_info(inode);
-
- if (err)
- return err;
- if (!fscrypt_has_encryption_key(inode))
- return -ENOKEY;
- }
- return 0;
-}
-
-/**
* fscrypt_prepare_link() - prepare to link an inode into a possibly-encrypted
* directory
* @old_dentry: an existing dentry for the inode being linked
@@ -676,8 +692,7 @@ static inline int fscrypt_require_key(struct inode *inode)
*
* A new link can only be added to an encrypted directory if the directory's
* encryption key is available --- since otherwise we'd have no way to encrypt
- * the filename. Therefore, we first set up the directory's encryption key (if
- * not already done) and return an error if it's unavailable.
+ * the filename.
*
* We also verify that the link will not violate the constraint that all files
* in an encrypted directory tree use the same encryption policy.
@@ -738,8 +753,9 @@ static inline int fscrypt_prepare_rename(struct inode *old_dir,
*
* Prepare for ->lookup() in a directory which may be encrypted by determining
* the name that will actually be used to search the directory on-disk. If the
- * directory's encryption key is available, then the lookup is assumed to be by
- * plaintext name; otherwise, it is assumed to be by no-key name.
+ * directory's encryption policy is supported by this kernel and its encryption
+ * key is available, then the lookup is assumed to be by plaintext name;
+ * otherwise, it is assumed to be by no-key name.
*
* This also installs a custom ->d_revalidate() method which will invalidate the
* dentry if it was created without the key and the key is later added.
@@ -763,6 +779,26 @@ static inline int fscrypt_prepare_lookup(struct inode *dir,
}
/**
+ * fscrypt_prepare_readdir() - prepare to read a possibly-encrypted directory
+ * @dir: the directory inode
+ *
+ * If the directory is encrypted and it doesn't already have its encryption key
+ * set up, try to set it up so that the filenames will be listed in plaintext
+ * form rather than in no-key form.
+ *
+ * Return: 0 on success; -errno on error. Note that the encryption key being
+ * unavailable is not considered an error. It is also not an error if
+ * the encryption policy is unsupported by this kernel; that is treated
+ * like the key being unavailable, so that files can still be deleted.
+ */
+static inline int fscrypt_prepare_readdir(struct inode *dir)
+{
+ if (IS_ENCRYPTED(dir))
+ return __fscrypt_prepare_readdir(dir);
+ return 0;
+}
+
+/**
* fscrypt_prepare_setattr() - prepare to change a possibly-encrypted inode's
* attributes
* @dentry: dentry through which the inode is being changed
@@ -783,8 +819,8 @@ static inline int fscrypt_prepare_lookup(struct inode *dir,
static inline int fscrypt_prepare_setattr(struct dentry *dentry,
struct iattr *attr)
{
- if (attr->ia_valid & ATTR_SIZE)
- return fscrypt_require_key(d_inode(dentry));
+ if (IS_ENCRYPTED(d_inode(dentry)))
+ return __fscrypt_prepare_setattr(dentry, attr);
return 0;
}
diff --git a/include/linux/gfp.h b/include/linux/gfp.h
index c603237e006c..6e479e9c48ce 100644
--- a/include/linux/gfp.h
+++ b/include/linux/gfp.h
@@ -580,8 +580,6 @@ void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask);
extern void __free_pages(struct page *page, unsigned int order);
extern void free_pages(unsigned long addr, unsigned int order);
-extern void free_unref_page(struct page *page);
-extern void free_unref_page_list(struct list_head *list);
struct page_frag_cache;
extern void __page_frag_cache_drain(struct page *page, unsigned int count);
diff --git a/include/linux/highmem-internal.h b/include/linux/highmem-internal.h
new file mode 100644
index 000000000000..1bbe96dc8be6
--- /dev/null
+++ b/include/linux/highmem-internal.h
@@ -0,0 +1,232 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_HIGHMEM_INTERNAL_H
+#define _LINUX_HIGHMEM_INTERNAL_H
+
+/*
+ * Outside of CONFIG_HIGHMEM to support X86 32bit iomap_atomic() cruft.
+ */
+#ifdef CONFIG_KMAP_LOCAL
+void *__kmap_local_pfn_prot(unsigned long pfn, pgprot_t prot);
+void *__kmap_local_page_prot(struct page *page, pgprot_t prot);
+void kunmap_local_indexed(void *vaddr);
+void kmap_local_fork(struct task_struct *tsk);
+void __kmap_local_sched_out(void);
+void __kmap_local_sched_in(void);
+static inline void kmap_assert_nomap(void)
+{
+ DEBUG_LOCKS_WARN_ON(current->kmap_ctrl.idx);
+}
+#else
+static inline void kmap_local_fork(struct task_struct *tsk) { }
+static inline void kmap_assert_nomap(void) { }
+#endif
+
+#ifdef CONFIG_HIGHMEM
+#include <asm/highmem.h>
+
+#ifndef ARCH_HAS_KMAP_FLUSH_TLB
+static inline void kmap_flush_tlb(unsigned long addr) { }
+#endif
+
+#ifndef kmap_prot
+#define kmap_prot PAGE_KERNEL
+#endif
+
+void *kmap_high(struct page *page);
+void kunmap_high(struct page *page);
+void __kmap_flush_unused(void);
+struct page *__kmap_to_page(void *addr);
+
+static inline void *kmap(struct page *page)
+{
+ void *addr;
+
+ might_sleep();
+ if (!PageHighMem(page))
+ addr = page_address(page);
+ else
+ addr = kmap_high(page);
+ kmap_flush_tlb((unsigned long)addr);
+ return addr;
+}
+
+static inline void kunmap(struct page *page)
+{
+ might_sleep();
+ if (!PageHighMem(page))
+ return;
+ kunmap_high(page);
+}
+
+static inline struct page *kmap_to_page(void *addr)
+{
+ return __kmap_to_page(addr);
+}
+
+static inline void kmap_flush_unused(void)
+{
+ __kmap_flush_unused();
+}
+
+static inline void *kmap_local_page(struct page *page)
+{
+ return __kmap_local_page_prot(page, kmap_prot);
+}
+
+static inline void *kmap_local_page_prot(struct page *page, pgprot_t prot)
+{
+ return __kmap_local_page_prot(page, prot);
+}
+
+static inline void *kmap_local_pfn(unsigned long pfn)
+{
+ return __kmap_local_pfn_prot(pfn, kmap_prot);
+}
+
+static inline void __kunmap_local(void *vaddr)
+{
+ kunmap_local_indexed(vaddr);
+}
+
+static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot)
+{
+ preempt_disable();
+ pagefault_disable();
+ return __kmap_local_page_prot(page, prot);
+}
+
+static inline void *kmap_atomic(struct page *page)
+{
+ return kmap_atomic_prot(page, kmap_prot);
+}
+
+static inline void *kmap_atomic_pfn(unsigned long pfn)
+{
+ preempt_disable();
+ pagefault_disable();
+ return __kmap_local_pfn_prot(pfn, kmap_prot);
+}
+
+static inline void __kunmap_atomic(void *addr)
+{
+ kunmap_local_indexed(addr);
+ pagefault_enable();
+ preempt_enable();
+}
+
+unsigned int __nr_free_highpages(void);
+extern atomic_long_t _totalhigh_pages;
+
+static inline unsigned int nr_free_highpages(void)
+{
+ return __nr_free_highpages();
+}
+
+static inline unsigned long totalhigh_pages(void)
+{
+ return (unsigned long)atomic_long_read(&_totalhigh_pages);
+}
+
+static inline void totalhigh_pages_inc(void)
+{
+ atomic_long_inc(&_totalhigh_pages);
+}
+
+static inline void totalhigh_pages_add(long count)
+{
+ atomic_long_add(count, &_totalhigh_pages);
+}
+
+#else /* CONFIG_HIGHMEM */
+
+static inline struct page *kmap_to_page(void *addr)
+{
+ return virt_to_page(addr);
+}
+
+static inline void *kmap(struct page *page)
+{
+ might_sleep();
+ return page_address(page);
+}
+
+static inline void kunmap_high(struct page *page) { }
+static inline void kmap_flush_unused(void) { }
+
+static inline void kunmap(struct page *page)
+{
+#ifdef ARCH_HAS_FLUSH_ON_KUNMAP
+ kunmap_flush_on_unmap(page_address(page));
+#endif
+}
+
+static inline void *kmap_local_page(struct page *page)
+{
+ return page_address(page);
+}
+
+static inline void *kmap_local_page_prot(struct page *page, pgprot_t prot)
+{
+ return kmap_local_page(page);
+}
+
+static inline void *kmap_local_pfn(unsigned long pfn)
+{
+ return kmap_local_page(pfn_to_page(pfn));
+}
+
+static inline void __kunmap_local(void *addr)
+{
+#ifdef ARCH_HAS_FLUSH_ON_KUNMAP
+ kunmap_flush_on_unmap(addr);
+#endif
+}
+
+static inline void *kmap_atomic(struct page *page)
+{
+ preempt_disable();
+ pagefault_disable();
+ return page_address(page);
+}
+
+static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot)
+{
+ return kmap_atomic(page);
+}
+
+static inline void *kmap_atomic_pfn(unsigned long pfn)
+{
+ return kmap_atomic(pfn_to_page(pfn));
+}
+
+static inline void __kunmap_atomic(void *addr)
+{
+#ifdef ARCH_HAS_FLUSH_ON_KUNMAP
+ kunmap_flush_on_unmap(addr);
+#endif
+ pagefault_enable();
+ preempt_enable();
+}
+
+static inline unsigned int nr_free_highpages(void) { return 0; }
+static inline unsigned long totalhigh_pages(void) { return 0UL; }
+
+#endif /* CONFIG_HIGHMEM */
+
+/*
+ * Prevent people trying to call kunmap_atomic() as if it were kunmap()
+ * kunmap_atomic() should get the return value of kmap_atomic, not the page.
+ */
+#define kunmap_atomic(__addr) \
+do { \
+ BUILD_BUG_ON(__same_type((__addr), struct page *)); \
+ __kunmap_atomic(__addr); \
+} while (0)
+
+#define kunmap_local(__addr) \
+do { \
+ BUILD_BUG_ON(__same_type((__addr), struct page *)); \
+ __kunmap_local(__addr); \
+} while (0)
+
+#endif
diff --git a/include/linux/highmem.h b/include/linux/highmem.h
index 14e6202ce47f..d2c70d3772a3 100644
--- a/include/linux/highmem.h
+++ b/include/linux/highmem.h
@@ -11,217 +11,137 @@
#include <asm/cacheflush.h>
-#ifndef ARCH_HAS_FLUSH_ANON_PAGE
-static inline void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr)
-{
-}
-#endif
-
-#ifndef ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
-static inline void flush_kernel_dcache_page(struct page *page)
-{
-}
-static inline void flush_kernel_vmap_range(void *vaddr, int size)
-{
-}
-static inline void invalidate_kernel_vmap_range(void *vaddr, int size)
-{
-}
-#endif
-
-#include <asm/kmap_types.h>
-
-#ifdef CONFIG_HIGHMEM
-extern void *kmap_atomic_high_prot(struct page *page, pgprot_t prot);
-extern void kunmap_atomic_high(void *kvaddr);
-#include <asm/highmem.h>
-
-#ifndef ARCH_HAS_KMAP_FLUSH_TLB
-static inline void kmap_flush_tlb(unsigned long addr) { }
-#endif
-
-#ifndef kmap_prot
-#define kmap_prot PAGE_KERNEL
-#endif
-
-void *kmap_high(struct page *page);
-static inline void *kmap(struct page *page)
-{
- void *addr;
-
- might_sleep();
- if (!PageHighMem(page))
- addr = page_address(page);
- else
- addr = kmap_high(page);
- kmap_flush_tlb((unsigned long)addr);
- return addr;
-}
-
-void kunmap_high(struct page *page);
-
-static inline void kunmap(struct page *page)
-{
- might_sleep();
- if (!PageHighMem(page))
- return;
- kunmap_high(page);
-}
+#include "highmem-internal.h"
-/*
- * kmap_atomic/kunmap_atomic is significantly faster than kmap/kunmap because
- * no global lock is needed and because the kmap code must perform a global TLB
- * invalidation when the kmap pool wraps.
+/**
+ * kmap - Map a page for long term usage
+ * @page: Pointer to the page to be mapped
+ *
+ * Returns: The virtual address of the mapping
+ *
+ * Can only be invoked from preemptible task context because on 32bit
+ * systems with CONFIG_HIGHMEM enabled this function might sleep.
*
- * However when holding an atomic kmap it is not legal to sleep, so atomic
- * kmaps are appropriate for short, tight code paths only.
+ * For systems with CONFIG_HIGHMEM=n and for pages in the low memory area
+ * this returns the virtual address of the direct kernel mapping.
*
- * The use of kmap_atomic/kunmap_atomic is discouraged - kmap/kunmap
- * gives a more generic (and caching) interface. But kmap_atomic can
- * be used in IRQ contexts, so in some (very limited) cases we need
- * it.
+ * The returned virtual address is globally visible and valid up to the
+ * point where it is unmapped via kunmap(). The pointer can be handed to
+ * other contexts.
+ *
+ * For highmem pages on 32bit systems this can be slow as the mapping space
+ * is limited and protected by a global lock. In case that there is no
+ * mapping slot available the function blocks until a slot is released via
+ * kunmap().
*/
-static inline void *kmap_atomic_prot(struct page *page, pgprot_t prot)
-{
- preempt_disable();
- pagefault_disable();
- if (!PageHighMem(page))
- return page_address(page);
- return kmap_atomic_high_prot(page, prot);
-}
-#define kmap_atomic(page) kmap_atomic_prot(page, kmap_prot)
+static inline void *kmap(struct page *page);
-/* declarations for linux/mm/highmem.c */
-unsigned int nr_free_highpages(void);
-extern atomic_long_t _totalhigh_pages;
-static inline unsigned long totalhigh_pages(void)
-{
- return (unsigned long)atomic_long_read(&_totalhigh_pages);
-}
-
-static inline void totalhigh_pages_inc(void)
-{
- atomic_long_inc(&_totalhigh_pages);
-}
-
-static inline void totalhigh_pages_dec(void)
-{
- atomic_long_dec(&_totalhigh_pages);
-}
-
-static inline void totalhigh_pages_add(long count)
-{
- atomic_long_add(count, &_totalhigh_pages);
-}
-
-static inline void totalhigh_pages_set(long val)
-{
- atomic_long_set(&_totalhigh_pages, val);
-}
-
-void kmap_flush_unused(void);
-
-struct page *kmap_to_page(void *addr);
-
-#else /* CONFIG_HIGHMEM */
+/**
+ * kunmap - Unmap the virtual address mapped by kmap()
+ * @addr: Virtual address to be unmapped
+ *
+ * Counterpart to kmap(). A NOOP for CONFIG_HIGHMEM=n and for mappings of
+ * pages in the low memory area.
+ */
+static inline void kunmap(struct page *page);
-static inline unsigned int nr_free_highpages(void) { return 0; }
+/**
+ * kmap_to_page - Get the page for a kmap'ed address
+ * @addr: The address to look up
+ *
+ * Returns: The page which is mapped to @addr.
+ */
+static inline struct page *kmap_to_page(void *addr);
-static inline struct page *kmap_to_page(void *addr)
-{
- return virt_to_page(addr);
-}
+/**
+ * kmap_flush_unused - Flush all unused kmap mappings in order to
+ * remove stray mappings
+ */
+static inline void kmap_flush_unused(void);
-static inline unsigned long totalhigh_pages(void) { return 0UL; }
+/**
+ * kmap_local_page - Map a page for temporary usage
+ * @page: Pointer to the page to be mapped
+ *
+ * Returns: The virtual address of the mapping
+ *
+ * Can be invoked from any context.
+ *
+ * Requires careful handling when nesting multiple mappings because the map
+ * management is stack based. The unmap has to be in the reverse order of
+ * the map operation:
+ *
+ * addr1 = kmap_local_page(page1);
+ * addr2 = kmap_local_page(page2);
+ * ...
+ * kunmap_local(addr2);
+ * kunmap_local(addr1);
+ *
+ * Unmapping addr1 before addr2 is invalid and causes malfunction.
+ *
+ * Contrary to kmap() mappings the mapping is only valid in the context of
+ * the caller and cannot be handed to other contexts.
+ *
+ * On CONFIG_HIGHMEM=n kernels and for low memory pages this returns the
+ * virtual address of the direct mapping. Only real highmem pages are
+ * temporarily mapped.
+ *
+ * While it is significantly faster than kmap() for the higmem case it
+ * comes with restrictions about the pointer validity. Only use when really
+ * necessary.
+ *
+ * On HIGHMEM enabled systems mapping a highmem page has the side effect of
+ * disabling migration in order to keep the virtual address stable across
+ * preemption. No caller of kmap_local_page() can rely on this side effect.
+ */
+static inline void *kmap_local_page(struct page *page);
-static inline void *kmap(struct page *page)
-{
- might_sleep();
- return page_address(page);
-}
+/**
+ * kmap_atomic - Atomically map a page for temporary usage - Deprecated!
+ * @page: Pointer to the page to be mapped
+ *
+ * Returns: The virtual address of the mapping
+ *
+ * Effectively a wrapper around kmap_local_page() which disables pagefaults
+ * and preemption.
+ *
+ * Do not use in new code. Use kmap_local_page() instead.
+ */
+static inline void *kmap_atomic(struct page *page);
-static inline void kunmap_high(struct page *page)
-{
-}
+/**
+ * kunmap_atomic - Unmap the virtual address mapped by kmap_atomic()
+ * @addr: Virtual address to be unmapped
+ *
+ * Counterpart to kmap_atomic().
+ *
+ * Effectively a wrapper around kunmap_local() which additionally undoes
+ * the side effects of kmap_atomic(), i.e. reenabling pagefaults and
+ * preemption.
+ */
-static inline void kunmap(struct page *page)
-{
-#ifdef ARCH_HAS_FLUSH_ON_KUNMAP
- kunmap_flush_on_unmap(page_address(page));
-#endif
-}
+/* Highmem related interfaces for management code */
+static inline unsigned int nr_free_highpages(void);
+static inline unsigned long totalhigh_pages(void);
-static inline void *kmap_atomic(struct page *page)
+#ifndef ARCH_HAS_FLUSH_ANON_PAGE
+static inline void flush_anon_page(struct vm_area_struct *vma, struct page *page, unsigned long vmaddr)
{
- preempt_disable();
- pagefault_disable();
- return page_address(page);
}
-#define kmap_atomic_prot(page, prot) kmap_atomic(page)
-
-static inline void kunmap_atomic_high(void *addr)
-{
- /*
- * Mostly nothing to do in the CONFIG_HIGHMEM=n case as kunmap_atomic()
- * handles re-enabling faults + preemption
- */
-#ifdef ARCH_HAS_FLUSH_ON_KUNMAP
- kunmap_flush_on_unmap(addr);
#endif
-}
-
-#define kmap_atomic_pfn(pfn) kmap_atomic(pfn_to_page(pfn))
-#define kmap_flush_unused() do {} while(0)
-
-#endif /* CONFIG_HIGHMEM */
-
-#if defined(CONFIG_HIGHMEM) || defined(CONFIG_X86_32)
-
-DECLARE_PER_CPU(int, __kmap_atomic_idx);
-
-static inline int kmap_atomic_idx_push(void)
+#ifndef ARCH_HAS_FLUSH_KERNEL_DCACHE_PAGE
+static inline void flush_kernel_dcache_page(struct page *page)
{
- int idx = __this_cpu_inc_return(__kmap_atomic_idx) - 1;
-
-#ifdef CONFIG_DEBUG_HIGHMEM
- WARN_ON_ONCE(in_irq() && !irqs_disabled());
- BUG_ON(idx >= KM_TYPE_NR);
-#endif
- return idx;
}
-
-static inline int kmap_atomic_idx(void)
+static inline void flush_kernel_vmap_range(void *vaddr, int size)
{
- return __this_cpu_read(__kmap_atomic_idx) - 1;
}
-
-static inline void kmap_atomic_idx_pop(void)
+static inline void invalidate_kernel_vmap_range(void *vaddr, int size)
{
-#ifdef CONFIG_DEBUG_HIGHMEM
- int idx = __this_cpu_dec_return(__kmap_atomic_idx);
-
- BUG_ON(idx < 0);
-#else
- __this_cpu_dec(__kmap_atomic_idx);
-#endif
}
-
#endif
-/*
- * Prevent people trying to call kunmap_atomic() as if it were kunmap()
- * kunmap_atomic() should get the return value of kmap_atomic, not the page.
- */
-#define kunmap_atomic(addr) \
-do { \
- BUILD_BUG_ON(__same_type((addr), struct page *)); \
- kunmap_atomic_high(addr); \
- pagefault_enable(); \
- preempt_enable(); \
-} while (0)
-
-
/* when CONFIG_HIGHMEM is not set these will be plain clear/copy_page */
#ifndef clear_user_highpage
static inline void clear_user_highpage(struct page *page, unsigned long vaddr)
@@ -284,13 +204,22 @@ static inline void clear_highpage(struct page *page)
kunmap_atomic(kaddr);
}
+/*
+ * If we pass in a base or tail page, we can zero up to PAGE_SIZE.
+ * If we pass in a head page, we can zero up to the size of the compound page.
+ */
+#if defined(CONFIG_HIGHMEM) && defined(CONFIG_TRANSPARENT_HUGEPAGE)
+void zero_user_segments(struct page *page, unsigned start1, unsigned end1,
+ unsigned start2, unsigned end2);
+#else /* !HIGHMEM || !TRANSPARENT_HUGEPAGE */
static inline void zero_user_segments(struct page *page,
- unsigned start1, unsigned end1,
- unsigned start2, unsigned end2)
+ unsigned start1, unsigned end1,
+ unsigned start2, unsigned end2)
{
void *kaddr = kmap_atomic(page);
+ unsigned int i;
- BUG_ON(end1 > PAGE_SIZE || end2 > PAGE_SIZE);
+ BUG_ON(end1 > page_size(page) || end2 > page_size(page));
if (end1 > start1)
memset(kaddr + start1, 0, end1 - start1);
@@ -299,8 +228,10 @@ static inline void zero_user_segments(struct page *page,
memset(kaddr + start2, 0, end2 - start2);
kunmap_atomic(kaddr);
- flush_dcache_page(page);
+ for (i = 0; i < compound_nr(page); i++)
+ flush_dcache_page(page + i);
}
+#endif /* !HIGHMEM || !TRANSPARENT_HUGEPAGE */
static inline void zero_user_segment(struct page *page,
unsigned start, unsigned end)
diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h
index 107cedd7019a..bb5e7b0a4274 100644
--- a/include/linux/hrtimer.h
+++ b/include/linux/hrtimer.h
@@ -447,6 +447,10 @@ static inline void hrtimer_restart(struct hrtimer *timer)
/* Query timers: */
extern ktime_t __hrtimer_get_remaining(const struct hrtimer *timer, bool adjust);
+/**
+ * hrtimer_get_remaining - get remaining time for the timer
+ * @timer: the timer to read
+ */
static inline ktime_t hrtimer_get_remaining(const struct hrtimer *timer)
{
return __hrtimer_get_remaining(timer, false);
@@ -458,7 +462,7 @@ extern u64 hrtimer_next_event_without(const struct hrtimer *exclude);
extern bool hrtimer_active(const struct hrtimer *timer);
/**
- * hrtimer_is_queued = check, whether the timer is on one of the queues
+ * hrtimer_is_queued - check, whether the timer is on one of the queues
* @timer: Timer to check
*
* Returns: True if the timer is queued, false otherwise
diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h
index 0365aa97f8e7..6a19f35f836b 100644
--- a/include/linux/huge_mm.h
+++ b/include/linux/huge_mm.h
@@ -7,43 +7,37 @@
#include <linux/fs.h> /* only for vma_is_dax() */
-extern vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf);
-extern int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
- pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
- struct vm_area_struct *vma);
-extern void huge_pmd_set_accessed(struct vm_fault *vmf, pmd_t orig_pmd);
-extern int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm,
- pud_t *dst_pud, pud_t *src_pud, unsigned long addr,
- struct vm_area_struct *vma);
+vm_fault_t do_huge_pmd_anonymous_page(struct vm_fault *vmf);
+int copy_huge_pmd(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+ pmd_t *dst_pmd, pmd_t *src_pmd, unsigned long addr,
+ struct vm_area_struct *vma);
+void huge_pmd_set_accessed(struct vm_fault *vmf, pmd_t orig_pmd);
+int copy_huge_pud(struct mm_struct *dst_mm, struct mm_struct *src_mm,
+ pud_t *dst_pud, pud_t *src_pud, unsigned long addr,
+ struct vm_area_struct *vma);
#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD
-extern void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud);
+void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud);
#else
static inline void huge_pud_set_accessed(struct vm_fault *vmf, pud_t orig_pud)
{
}
#endif
-extern vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd);
-extern struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
- unsigned long addr,
- pmd_t *pmd,
- unsigned int flags);
-extern bool madvise_free_huge_pmd(struct mmu_gather *tlb,
- struct vm_area_struct *vma,
- pmd_t *pmd, unsigned long addr, unsigned long next);
-extern int zap_huge_pmd(struct mmu_gather *tlb,
- struct vm_area_struct *vma,
- pmd_t *pmd, unsigned long addr);
-extern int zap_huge_pud(struct mmu_gather *tlb,
- struct vm_area_struct *vma,
- pud_t *pud, unsigned long addr);
-extern bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
- unsigned long new_addr,
- pmd_t *old_pmd, pmd_t *new_pmd);
-extern int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd,
- unsigned long addr, pgprot_t newprot,
- unsigned long cp_flags);
+vm_fault_t do_huge_pmd_wp_page(struct vm_fault *vmf, pmd_t orig_pmd);
+struct page *follow_trans_huge_pmd(struct vm_area_struct *vma,
+ unsigned long addr, pmd_t *pmd,
+ unsigned int flags);
+bool madvise_free_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma,
+ pmd_t *pmd, unsigned long addr, unsigned long next);
+int zap_huge_pmd(struct mmu_gather *tlb, struct vm_area_struct *vma, pmd_t *pmd,
+ unsigned long addr);
+int zap_huge_pud(struct mmu_gather *tlb, struct vm_area_struct *vma, pud_t *pud,
+ unsigned long addr);
+bool move_huge_pmd(struct vm_area_struct *vma, unsigned long old_addr,
+ unsigned long new_addr, pmd_t *old_pmd, pmd_t *new_pmd);
+int change_huge_pmd(struct vm_area_struct *vma, pmd_t *pmd, unsigned long addr,
+ pgprot_t newprot, unsigned long cp_flags);
vm_fault_t vmf_insert_pfn_pmd_prot(struct vm_fault *vmf, pfn_t pfn,
pgprot_t pgprot, bool write);
@@ -100,13 +94,13 @@ enum transparent_hugepage_flag {
struct kobject;
struct kobj_attribute;
-extern ssize_t single_hugepage_flag_store(struct kobject *kobj,
- struct kobj_attribute *attr,
- const char *buf, size_t count,
- enum transparent_hugepage_flag flag);
-extern ssize_t single_hugepage_flag_show(struct kobject *kobj,
- struct kobj_attribute *attr, char *buf,
- enum transparent_hugepage_flag flag);
+ssize_t single_hugepage_flag_store(struct kobject *kobj,
+ struct kobj_attribute *attr,
+ const char *buf, size_t count,
+ enum transparent_hugepage_flag flag);
+ssize_t single_hugepage_flag_show(struct kobject *kobj,
+ struct kobj_attribute *attr, char *buf,
+ enum transparent_hugepage_flag flag);
extern struct kobj_attribute shmem_enabled_attr;
#define HPAGE_PMD_ORDER (HPAGE_PMD_SHIFT-PAGE_SHIFT)
@@ -179,12 +173,11 @@ static inline bool transhuge_vma_suitable(struct vm_area_struct *vma,
(transparent_hugepage_flags & \
(1<<TRANSPARENT_HUGEPAGE_USE_ZERO_PAGE_FLAG))
-extern unsigned long thp_get_unmapped_area(struct file *filp,
- unsigned long addr, unsigned long len, unsigned long pgoff,
- unsigned long flags);
+unsigned long thp_get_unmapped_area(struct file *filp, unsigned long addr,
+ unsigned long len, unsigned long pgoff, unsigned long flags);
-extern void prep_transhuge_page(struct page *page);
-extern void free_transhuge_page(struct page *page);
+void prep_transhuge_page(struct page *page);
+void free_transhuge_page(struct page *page);
bool is_transparent_hugepage(struct page *page);
bool can_split_huge_page(struct page *page, int *pextra_pins);
@@ -222,16 +215,12 @@ void __split_huge_pud(struct vm_area_struct *vma, pud_t *pud,
__split_huge_pud(__vma, __pud, __address); \
} while (0)
-extern int hugepage_madvise(struct vm_area_struct *vma,
- unsigned long *vm_flags, int advice);
-extern void vma_adjust_trans_huge(struct vm_area_struct *vma,
- unsigned long start,
- unsigned long end,
- long adjust_next);
-extern spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd,
- struct vm_area_struct *vma);
-extern spinlock_t *__pud_trans_huge_lock(pud_t *pud,
- struct vm_area_struct *vma);
+int hugepage_madvise(struct vm_area_struct *vma, unsigned long *vm_flags,
+ int advice);
+void vma_adjust_trans_huge(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end, long adjust_next);
+spinlock_t *__pmd_trans_huge_lock(pmd_t *pmd, struct vm_area_struct *vma);
+spinlock_t *__pud_trans_huge_lock(pud_t *pud, struct vm_area_struct *vma);
static inline int is_swap_pmd(pmd_t pmd)
{
@@ -294,7 +283,7 @@ struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr,
struct page *follow_devmap_pud(struct vm_area_struct *vma, unsigned long addr,
pud_t *pud, int flags, struct dev_pagemap **pgmap);
-extern vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd);
+vm_fault_t do_huge_pmd_numa_page(struct vm_fault *vmf, pmd_t orig_pmd);
extern struct page *huge_zero_page;
diff --git a/include/linux/io-mapping.h b/include/linux/io-mapping.h
index c75e4d3d8833..c093e81310a9 100644
--- a/include/linux/io-mapping.h
+++ b/include/linux/io-mapping.h
@@ -69,13 +69,32 @@ io_mapping_map_atomic_wc(struct io_mapping *mapping,
BUG_ON(offset >= mapping->size);
phys_addr = mapping->base + offset;
- return iomap_atomic_prot_pfn(PHYS_PFN(phys_addr), mapping->prot);
+ preempt_disable();
+ pagefault_disable();
+ return __iomap_local_pfn_prot(PHYS_PFN(phys_addr), mapping->prot);
}
static inline void
io_mapping_unmap_atomic(void __iomem *vaddr)
{
- iounmap_atomic(vaddr);
+ kunmap_local_indexed((void __force *)vaddr);
+ pagefault_enable();
+ preempt_enable();
+}
+
+static inline void __iomem *
+io_mapping_map_local_wc(struct io_mapping *mapping, unsigned long offset)
+{
+ resource_size_t phys_addr;
+
+ BUG_ON(offset >= mapping->size);
+ phys_addr = mapping->base + offset;
+ return __iomap_local_pfn_prot(PHYS_PFN(phys_addr), mapping->prot);
+}
+
+static inline void io_mapping_unmap_local(void __iomem *vaddr)
+{
+ kunmap_local_indexed((void __force *)vaddr);
}
static inline void __iomem *
@@ -97,7 +116,7 @@ io_mapping_unmap(void __iomem *vaddr)
iounmap(vaddr);
}
-#else
+#else /* HAVE_ATOMIC_IOMAP */
#include <linux/uaccess.h>
@@ -162,7 +181,18 @@ io_mapping_unmap_atomic(void __iomem *vaddr)
preempt_enable();
}
-#endif /* HAVE_ATOMIC_IOMAP */
+static inline void __iomem *
+io_mapping_map_local_wc(struct io_mapping *mapping, unsigned long offset)
+{
+ return io_mapping_map_wc(mapping, offset, PAGE_SIZE);
+}
+
+static inline void io_mapping_unmap_local(void __iomem *vaddr)
+{
+ io_mapping_unmap(vaddr);
+}
+
+#endif /* !HAVE_ATOMIC_IOMAP */
static inline struct io_mapping *
io_mapping_create_wc(resource_size_t base,
diff --git a/include/linux/io-pgtable.h b/include/linux/io-pgtable.h
index 4cde111e425b..fb4d5a763e0c 100644
--- a/include/linux/io-pgtable.h
+++ b/include/linux/io-pgtable.h
@@ -86,6 +86,9 @@ struct io_pgtable_cfg {
*
* IO_PGTABLE_QUIRK_ARM_TTBR1: (ARM LPAE format) Configure the table
* for use in the upper half of a split address space.
+ *
+ * IO_PGTABLE_QUIRK_ARM_OUTER_WBWA: Override the outer-cacheability
+ * attributes set in the TCR for a non-coherent page-table walker.
*/
#define IO_PGTABLE_QUIRK_ARM_NS BIT(0)
#define IO_PGTABLE_QUIRK_NO_PERMS BIT(1)
@@ -93,6 +96,7 @@ struct io_pgtable_cfg {
#define IO_PGTABLE_QUIRK_ARM_MTK_EXT BIT(3)
#define IO_PGTABLE_QUIRK_NON_STRICT BIT(4)
#define IO_PGTABLE_QUIRK_ARM_TTBR1 BIT(5)
+ #define IO_PGTABLE_QUIRK_ARM_OUTER_WBWA BIT(6)
unsigned long quirks;
unsigned long pgsize_bitmap;
unsigned int ias;
@@ -208,6 +212,10 @@ struct io_pgtable {
#define io_pgtable_ops_to_pgtable(x) container_of((x), struct io_pgtable, ops)
+struct io_pgtable_domain_attr {
+ unsigned long quirks;
+};
+
static inline void io_pgtable_tlb_flush_all(struct io_pgtable *iop)
{
iop->cfg.tlb->tlb_flush_all(iop->cookie);
diff --git a/include/linux/iommu.h b/include/linux/iommu.h
index b95a6f8db6ff..ffaa389ea128 100644
--- a/include/linux/iommu.h
+++ b/include/linux/iommu.h
@@ -118,6 +118,7 @@ enum iommu_attr {
DOMAIN_ATTR_FSL_PAMUV1,
DOMAIN_ATTR_NESTING, /* two stages of translation */
DOMAIN_ATTR_DMA_USE_FLUSH_QUEUE,
+ DOMAIN_ATTR_IO_PGTABLE_CFG,
DOMAIN_ATTR_MAX,
};
diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h
index a06a78c67f19..05e22770af51 100644
--- a/include/linux/ipc_namespace.h
+++ b/include/linux/ipc_namespace.h
@@ -27,7 +27,6 @@ struct ipc_ids {
};
struct ipc_namespace {
- refcount_t count;
struct ipc_ids ids[3];
int sem_ctls[4];
@@ -128,7 +127,7 @@ extern struct ipc_namespace *copy_ipcs(unsigned long flags,
static inline struct ipc_namespace *get_ipc_ns(struct ipc_namespace *ns)
{
if (ns)
- refcount_inc(&ns->count);
+ refcount_inc(&ns->ns.count);
return ns;
}
diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h
index 30823780c192..ec2a47a81e42 100644
--- a/include/linux/irq_work.h
+++ b/include/linux/irq_work.h
@@ -14,28 +14,37 @@
*/
struct irq_work {
- union {
- struct __call_single_node node;
- struct {
- struct llist_node llnode;
- atomic_t flags;
- };
- };
+ struct __call_single_node node;
void (*func)(struct irq_work *);
};
+#define __IRQ_WORK_INIT(_func, _flags) (struct irq_work){ \
+ .node = { .u_flags = (_flags), }, \
+ .func = (_func), \
+}
+
+#define IRQ_WORK_INIT(_func) __IRQ_WORK_INIT(_func, 0)
+#define IRQ_WORK_INIT_LAZY(_func) __IRQ_WORK_INIT(_func, IRQ_WORK_LAZY)
+#define IRQ_WORK_INIT_HARD(_func) __IRQ_WORK_INIT(_func, IRQ_WORK_HARD_IRQ)
+
+#define DEFINE_IRQ_WORK(name, _f) \
+ struct irq_work name = IRQ_WORK_INIT(_f)
+
static inline
void init_irq_work(struct irq_work *work, void (*func)(struct irq_work *))
{
- atomic_set(&work->flags, 0);
- work->func = func;
+ *work = IRQ_WORK_INIT(func);
}
-#define DEFINE_IRQ_WORK(name, _f) struct irq_work name = { \
- .flags = ATOMIC_INIT(0), \
- .func = (_f) \
+static inline bool irq_work_is_pending(struct irq_work *work)
+{
+ return atomic_read(&work->node.a_flags) & IRQ_WORK_PENDING;
}
+static inline bool irq_work_is_busy(struct irq_work *work)
+{
+ return atomic_read(&work->node.a_flags) & IRQ_WORK_BUSY;
+}
bool irq_work_queue(struct irq_work *work);
bool irq_work_queue_on(struct irq_work *work, int cpu);
diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h
index 3ed4e8771b64..8de0e1373de7 100644
--- a/include/linux/irqflags.h
+++ b/include/linux/irqflags.h
@@ -107,14 +107,14 @@ do { \
current->irq_config = 0; \
} while (0)
-# define lockdep_irq_work_enter(__work) \
+# define lockdep_irq_work_enter(_flags) \
do { \
- if (!(atomic_read(&__work->flags) & IRQ_WORK_HARD_IRQ))\
+ if (!((_flags) & IRQ_WORK_HARD_IRQ)) \
current->irq_config = 1; \
} while (0)
-# define lockdep_irq_work_exit(__work) \
+# define lockdep_irq_work_exit(_flags) \
do { \
- if (!(atomic_read(&__work->flags) & IRQ_WORK_HARD_IRQ))\
+ if (!((_flags) & IRQ_WORK_HARD_IRQ)) \
current->irq_config = 0; \
} while (0)
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index 2f05e9128201..dbf6018fc312 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -204,6 +204,7 @@ extern int _cond_resched(void);
extern void ___might_sleep(const char *file, int line, int preempt_offset);
extern void __might_sleep(const char *file, int line, int preempt_offset);
extern void __cant_sleep(const char *file, int line, int preempt_offset);
+extern void __cant_migrate(const char *file, int line);
/**
* might_sleep - annotation for functions that can sleep
@@ -227,6 +228,18 @@ extern void __cant_sleep(const char *file, int line, int preempt_offset);
# define cant_sleep() \
do { __cant_sleep(__FILE__, __LINE__, 0); } while (0)
# define sched_annotate_sleep() (current->task_state_change = 0)
+
+/**
+ * cant_migrate - annotation for functions that cannot migrate
+ *
+ * Will print a stack trace if executed in code which is migratable
+ */
+# define cant_migrate() \
+ do { \
+ if (IS_ENABLED(CONFIG_SMP)) \
+ __cant_migrate(__FILE__, __LINE__); \
+ } while (0)
+
/**
* non_block_start - annotate the start of section where sleeping is prohibited
*
@@ -251,6 +264,7 @@ extern void __cant_sleep(const char *file, int line, int preempt_offset);
int preempt_offset) { }
# define might_sleep() do { might_resched(); } while (0)
# define cant_sleep() do { } while (0)
+# define cant_migrate() do { } while (0)
# define sched_annotate_sleep() do { } while (0)
# define non_block_start() do { } while (0)
# define non_block_end() do { } while (0)
@@ -258,13 +272,6 @@ extern void __cant_sleep(const char *file, int line, int preempt_offset);
#define might_sleep_if(cond) do { if (cond) might_sleep(); } while (0)
-#ifndef CONFIG_PREEMPT_RT
-# define cant_migrate() cant_sleep()
-#else
- /* Placeholder for now */
-# define cant_migrate() do { } while (0)
-#endif
-
/**
* abs - return absolute value of an argument
* @x: the value. If it is unsigned type, it is converted to signed type first.
@@ -536,6 +543,7 @@ extern int panic_on_warn;
extern unsigned long panic_on_taint;
extern bool panic_on_taint_nousertaint;
extern int sysctl_panic_on_rcu_stall;
+extern int sysctl_max_rcu_stall_to_panic;
extern int sysctl_panic_on_stackoverflow;
extern bool crash_kexec_post_notifiers;
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 629abaf25681..a79404433812 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -27,6 +27,8 @@
#include <linux/rcupdate.h>
#include <linux/mutex.h>
#include <linux/ftrace.h>
+#include <linux/refcount.h>
+#include <linux/freelist.h>
#include <asm/kprobes.h>
#ifdef CONFIG_KPROBES
@@ -144,6 +146,11 @@ static inline int kprobe_ftrace(struct kprobe *p)
* ignored, due to maxactive being too low.
*
*/
+struct kretprobe_holder {
+ struct kretprobe *rp;
+ refcount_t ref;
+};
+
struct kretprobe {
struct kprobe kp;
kretprobe_handler_t handler;
@@ -151,18 +158,18 @@ struct kretprobe {
int maxactive;
int nmissed;
size_t data_size;
- struct hlist_head free_instances;
- raw_spinlock_t lock;
+ struct freelist_head freelist;
+ struct kretprobe_holder *rph;
};
struct kretprobe_instance {
union {
- struct hlist_node hlist;
+ struct freelist_node freelist;
struct rcu_head rcu;
};
- struct kretprobe *rp;
+ struct llist_node llist;
+ struct kretprobe_holder *rph;
kprobe_opcode_t *ret_addr;
- struct task_struct *task;
void *fp;
char data[];
};
@@ -221,6 +228,14 @@ unsigned long kretprobe_trampoline_handler(struct pt_regs *regs,
return ret;
}
+static nokprobe_inline struct kretprobe *get_kretprobe(struct kretprobe_instance *ri)
+{
+ RCU_LOCKDEP_WARN(!rcu_read_lock_any_held(),
+ "Kretprobe is accessed from instance under preemptive context");
+
+ return READ_ONCE(ri->rph->rp);
+}
+
#else /* CONFIG_KRETPROBES */
static inline void arch_prepare_kretprobe(struct kretprobe *rp,
struct pt_regs *regs)
diff --git a/include/linux/list.h b/include/linux/list.h
index a18c87b63376..89bdc92e75c3 100644
--- a/include/linux/list.h
+++ b/include/linux/list.h
@@ -9,7 +9,7 @@
#include <linux/kernel.h>
/*
- * Simple doubly linked list implementation.
+ * Circular doubly linked list implementation.
*
* Some of the internal functions ("__xxx") are useful when
* manipulating whole lists rather than single entries, as
diff --git a/include/linux/llist.h b/include/linux/llist.h
index 2e9c7215882b..24f207b0190b 100644
--- a/include/linux/llist.h
+++ b/include/linux/llist.h
@@ -197,6 +197,16 @@ static inline struct llist_node *llist_next(struct llist_node *node)
extern bool llist_add_batch(struct llist_node *new_first,
struct llist_node *new_last,
struct llist_head *head);
+
+static inline bool __llist_add_batch(struct llist_node *new_first,
+ struct llist_node *new_last,
+ struct llist_head *head)
+{
+ new_last->next = head->first;
+ head->first = new_first;
+ return new_last->next == NULL;
+}
+
/**
* llist_add - add a new entry
* @new: new entry to be added
@@ -209,6 +219,11 @@ static inline bool llist_add(struct llist_node *new, struct llist_head *head)
return llist_add_batch(new, new, head);
}
+static inline bool __llist_add(struct llist_node *new, struct llist_head *head)
+{
+ return __llist_add_batch(new, new, head);
+}
+
/**
* llist_del_all - delete all entries from lock-less list
* @head: the head of lock-less list to delete all entries
@@ -222,6 +237,14 @@ static inline struct llist_node *llist_del_all(struct llist_head *head)
return xchg(&head->first, NULL);
}
+static inline struct llist_node *__llist_del_all(struct llist_head *head)
+{
+ struct llist_node *first = head->first;
+
+ head->first = NULL;
+ return first;
+}
+
extern struct llist_node *llist_del_first(struct llist_head *head);
struct llist_node *llist_reverse_order(struct llist_node *head);
diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 92771bc1791f..b9e9adec73e8 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -375,6 +375,12 @@ static inline void lockdep_unregister_key(struct lock_class_key *key)
#define lockdep_depth(tsk) (0)
+/*
+ * Dummy forward declarations, allow users to write less ifdef-y code
+ * and depend on dead code elimination.
+ */
+extern int lock_is_held(const void *);
+extern int lockdep_is_held(const void *);
#define lockdep_is_held_type(l, r) (1)
#define lockdep_assert_held(l) do { (void)(l); } while (0)
diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h
index 320369c841f5..f5b4d710f099 100644
--- a/include/linux/memcontrol.h
+++ b/include/linux/memcontrol.h
@@ -235,11 +235,6 @@ struct mem_cgroup {
struct vmpressure vmpressure;
/*
- * Should the accounting and control be hierarchical, per subtree?
- */
- bool use_hierarchy;
-
- /*
* Should the OOM killer kill all belonging tasks, had it kill one?
*/
bool oom_group;
@@ -296,7 +291,6 @@ struct mem_cgroup {
int tcpmem_pressure;
#ifdef CONFIG_MEMCG_KMEM
- /* Index in the kmem_cache->memcg_params.memcg_caches array */
int kmemcg_id;
enum memcg_kmem_state kmem_state;
struct obj_cgroup __rcu *objcg;
@@ -758,8 +752,6 @@ static inline bool mem_cgroup_is_descendant(struct mem_cgroup *memcg,
{
if (root == memcg)
return true;
- if (!root->use_hierarchy)
- return false;
return cgroup_is_descendant(memcg->css.cgroup, root->css.cgroup);
}
@@ -967,19 +959,15 @@ static inline unsigned long lruvec_page_state_local(struct lruvec *lruvec,
void __mod_memcg_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
int val);
-void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
- int val);
-void __mod_lruvec_slab_state(void *p, enum node_stat_item idx, int val);
-
-void mod_memcg_obj_state(void *p, int idx, int val);
+void __mod_lruvec_kmem_state(void *p, enum node_stat_item idx, int val);
-static inline void mod_lruvec_slab_state(void *p, enum node_stat_item idx,
+static inline void mod_lruvec_kmem_state(void *p, enum node_stat_item idx,
int val)
{
unsigned long flags;
local_irq_save(flags);
- __mod_lruvec_slab_state(p, idx, val);
+ __mod_lruvec_kmem_state(p, idx, val);
local_irq_restore(flags);
}
@@ -993,44 +981,6 @@ static inline void mod_memcg_lruvec_state(struct lruvec *lruvec,
local_irq_restore(flags);
}
-static inline void mod_lruvec_state(struct lruvec *lruvec,
- enum node_stat_item idx, int val)
-{
- unsigned long flags;
-
- local_irq_save(flags);
- __mod_lruvec_state(lruvec, idx, val);
- local_irq_restore(flags);
-}
-
-static inline void __mod_lruvec_page_state(struct page *page,
- enum node_stat_item idx, int val)
-{
- struct page *head = compound_head(page); /* rmap on tail pages */
- struct mem_cgroup *memcg = page_memcg(head);
- pg_data_t *pgdat = page_pgdat(page);
- struct lruvec *lruvec;
-
- /* Untracked pages have no memcg, no lruvec. Update only the node */
- if (!memcg) {
- __mod_node_page_state(pgdat, idx, val);
- return;
- }
-
- lruvec = mem_cgroup_lruvec(memcg, pgdat);
- __mod_lruvec_state(lruvec, idx, val);
-}
-
-static inline void mod_lruvec_page_state(struct page *page,
- enum node_stat_item idx, int val)
-{
- unsigned long flags;
-
- local_irq_save(flags);
- __mod_lruvec_page_state(page, idx, val);
- local_irq_restore(flags);
-}
-
unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
gfp_t gfp_mask,
unsigned long *total_scanned);
@@ -1412,31 +1362,7 @@ static inline void __mod_memcg_lruvec_state(struct lruvec *lruvec,
{
}
-static inline void __mod_lruvec_state(struct lruvec *lruvec,
- enum node_stat_item idx, int val)
-{
- __mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
-}
-
-static inline void mod_lruvec_state(struct lruvec *lruvec,
- enum node_stat_item idx, int val)
-{
- mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
-}
-
-static inline void __mod_lruvec_page_state(struct page *page,
- enum node_stat_item idx, int val)
-{
- __mod_node_page_state(page_pgdat(page), idx, val);
-}
-
-static inline void mod_lruvec_page_state(struct page *page,
- enum node_stat_item idx, int val)
-{
- mod_node_page_state(page_pgdat(page), idx, val);
-}
-
-static inline void __mod_lruvec_slab_state(void *p, enum node_stat_item idx,
+static inline void __mod_lruvec_kmem_state(void *p, enum node_stat_item idx,
int val)
{
struct page *page = virt_to_head_page(p);
@@ -1444,7 +1370,7 @@ static inline void __mod_lruvec_slab_state(void *p, enum node_stat_item idx,
__mod_node_page_state(page_pgdat(page), idx, val);
}
-static inline void mod_lruvec_slab_state(void *p, enum node_stat_item idx,
+static inline void mod_lruvec_kmem_state(void *p, enum node_stat_item idx,
int val)
{
struct page *page = virt_to_head_page(p);
@@ -1452,10 +1378,6 @@ static inline void mod_lruvec_slab_state(void *p, enum node_stat_item idx,
mod_node_page_state(page_pgdat(page), idx, val);
}
-static inline void mod_memcg_obj_state(void *p, int idx, int val)
-{
-}
-
static inline
unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order,
gfp_t gfp_mask,
@@ -1519,38 +1441,14 @@ static inline void __dec_memcg_page_state(struct page *page,
__mod_memcg_page_state(page, idx, -1);
}
-static inline void __inc_lruvec_state(struct lruvec *lruvec,
- enum node_stat_item idx)
-{
- __mod_lruvec_state(lruvec, idx, 1);
-}
-
-static inline void __dec_lruvec_state(struct lruvec *lruvec,
- enum node_stat_item idx)
-{
- __mod_lruvec_state(lruvec, idx, -1);
-}
-
-static inline void __inc_lruvec_page_state(struct page *page,
- enum node_stat_item idx)
-{
- __mod_lruvec_page_state(page, idx, 1);
-}
-
-static inline void __dec_lruvec_page_state(struct page *page,
- enum node_stat_item idx)
-{
- __mod_lruvec_page_state(page, idx, -1);
-}
-
-static inline void __inc_lruvec_slab_state(void *p, enum node_stat_item idx)
+static inline void __inc_lruvec_kmem_state(void *p, enum node_stat_item idx)
{
- __mod_lruvec_slab_state(p, idx, 1);
+ __mod_lruvec_kmem_state(p, idx, 1);
}
-static inline void __dec_lruvec_slab_state(void *p, enum node_stat_item idx)
+static inline void __dec_lruvec_kmem_state(void *p, enum node_stat_item idx)
{
- __mod_lruvec_slab_state(p, idx, -1);
+ __mod_lruvec_kmem_state(p, idx, -1);
}
/* idx can be of type enum memcg_stat_item or node_stat_item */
@@ -1581,30 +1479,6 @@ static inline void dec_memcg_page_state(struct page *page,
mod_memcg_page_state(page, idx, -1);
}
-static inline void inc_lruvec_state(struct lruvec *lruvec,
- enum node_stat_item idx)
-{
- mod_lruvec_state(lruvec, idx, 1);
-}
-
-static inline void dec_lruvec_state(struct lruvec *lruvec,
- enum node_stat_item idx)
-{
- mod_lruvec_state(lruvec, idx, -1);
-}
-
-static inline void inc_lruvec_page_state(struct page *page,
- enum node_stat_item idx)
-{
- mod_lruvec_page_state(page, idx, 1);
-}
-
-static inline void dec_lruvec_page_state(struct page *page,
- enum node_stat_item idx)
-{
- mod_lruvec_page_state(page, idx, -1);
-}
-
static inline struct lruvec *parent_lruvec(struct lruvec *lruvec)
{
struct mem_cgroup *memcg;
@@ -1765,9 +1639,8 @@ static inline void memcg_kmem_uncharge(struct mem_cgroup *memcg,
}
/*
- * helper for accessing a memcg's index. It will be used as an index in the
- * child cache array in kmem_cache, and also to derive its name. This function
- * will return -1 when this is not a kmem-limited memcg.
+ * A helper for accessing memcg's kmem_id, used for getting
+ * corresponding LRU lists.
*/
static inline int memcg_cache_id(struct mem_cgroup *memcg)
{
diff --git a/include/linux/migrate.h b/include/linux/migrate.h
index 0f8d1583fa8e..4594838a0f7c 100644
--- a/include/linux/migrate.h
+++ b/include/linux/migrate.h
@@ -45,8 +45,8 @@ extern struct page *alloc_migration_target(struct page *page, unsigned long priv
extern int isolate_movable_page(struct page *page, isolate_mode_t mode);
extern void putback_movable_page(struct page *page);
-extern int migrate_prep(void);
-extern int migrate_prep_local(void);
+extern void migrate_prep(void);
+extern void migrate_prep_local(void);
extern void migrate_page_states(struct page *newpage, struct page *page);
extern void migrate_page_copy(struct page *newpage, struct page *page);
extern int migrate_huge_page_move_mapping(struct address_space *mapping,
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 6b0c9d2c1d10..abc7b3154298 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -557,8 +557,16 @@ enum page_entry_size {
struct vm_operations_struct {
void (*open)(struct vm_area_struct * area);
void (*close)(struct vm_area_struct * area);
- int (*split)(struct vm_area_struct * area, unsigned long addr);
- int (*mremap)(struct vm_area_struct * area);
+ /* Called any time before splitting to check if it's allowed */
+ int (*may_split)(struct vm_area_struct *area, unsigned long addr);
+ int (*mremap)(struct vm_area_struct *area, unsigned long flags);
+ /*
+ * Called by mprotect() to make driver-specific permission
+ * checks before mprotect() is finalised. The VMA must not
+ * be modified. Returns 0 if eprotect() can proceed.
+ */
+ int (*mprotect)(struct vm_area_struct *vma, unsigned long start,
+ unsigned long end, unsigned long newflags);
vm_fault_t (*fault)(struct vm_fault *vmf);
vm_fault_t (*huge_fault)(struct vm_fault *vmf,
enum page_entry_size pe_size);
@@ -1694,8 +1702,8 @@ extern int access_process_vm(struct task_struct *tsk, unsigned long addr,
void *buf, int len, unsigned int gup_flags);
extern int access_remote_vm(struct mm_struct *mm, unsigned long addr,
void *buf, int len, unsigned int gup_flags);
-extern int __access_remote_vm(struct task_struct *tsk, struct mm_struct *mm,
- unsigned long addr, void *buf, int len, unsigned int gup_flags);
+extern int __access_remote_vm(struct mm_struct *mm, unsigned long addr,
+ void *buf, int len, unsigned int gup_flags);
long get_user_pages_remote(struct mm_struct *mm,
unsigned long start, unsigned long nr_pages,
@@ -2181,7 +2189,7 @@ static inline bool pgtable_pte_page_ctor(struct page *page)
if (!ptlock_init(page))
return false;
__SetPageTable(page);
- inc_zone_page_state(page, NR_PAGETABLE);
+ inc_lruvec_page_state(page, NR_PAGETABLE);
return true;
}
@@ -2189,7 +2197,7 @@ static inline void pgtable_pte_page_dtor(struct page *page)
{
ptlock_free(page);
__ClearPageTable(page);
- dec_zone_page_state(page, NR_PAGETABLE);
+ dec_lruvec_page_state(page, NR_PAGETABLE);
}
#define pte_offset_map_lock(mm, pmd, address, ptlp) \
@@ -2276,7 +2284,7 @@ static inline bool pgtable_pmd_page_ctor(struct page *page)
if (!pmd_ptlock_init(page))
return false;
__SetPageTable(page);
- inc_zone_page_state(page, NR_PAGETABLE);
+ inc_lruvec_page_state(page, NR_PAGETABLE);
return true;
}
@@ -2284,7 +2292,7 @@ static inline void pgtable_pmd_page_dtor(struct page *page)
{
pmd_ptlock_free(page);
__ClearPageTable(page);
- dec_zone_page_state(page, NR_PAGETABLE);
+ dec_lruvec_page_state(page, NR_PAGETABLE);
}
/*
@@ -2411,9 +2419,6 @@ static inline int early_pfn_to_nid(unsigned long pfn)
#else
/* please see mm/page_alloc.c */
extern int __meminit early_pfn_to_nid(unsigned long pfn);
-/* there is a per-arch backend function. */
-extern int __meminit __early_pfn_to_nid(unsigned long pfn,
- struct mminit_pfnnid_cache *state);
#endif
extern void set_dma_reserve(unsigned long new_dma_reserve);
@@ -2852,44 +2857,56 @@ extern int apply_to_existing_page_range(struct mm_struct *mm,
unsigned long address, unsigned long size,
pte_fn_t fn, void *data);
+extern void init_mem_debugging_and_hardening(void);
#ifdef CONFIG_PAGE_POISONING
-extern bool page_poisoning_enabled(void);
-extern void kernel_poison_pages(struct page *page, int numpages, int enable);
+extern void __kernel_poison_pages(struct page *page, int numpages);
+extern void __kernel_unpoison_pages(struct page *page, int numpages);
+extern bool _page_poisoning_enabled_early;
+DECLARE_STATIC_KEY_FALSE(_page_poisoning_enabled);
+static inline bool page_poisoning_enabled(void)
+{
+ return _page_poisoning_enabled_early;
+}
+/*
+ * For use in fast paths after init_mem_debugging() has run, or when a
+ * false negative result is not harmful when called too early.
+ */
+static inline bool page_poisoning_enabled_static(void)
+{
+ return static_branch_unlikely(&_page_poisoning_enabled);
+}
+static inline void kernel_poison_pages(struct page *page, int numpages)
+{
+ if (page_poisoning_enabled_static())
+ __kernel_poison_pages(page, numpages);
+}
+static inline void kernel_unpoison_pages(struct page *page, int numpages)
+{
+ if (page_poisoning_enabled_static())
+ __kernel_unpoison_pages(page, numpages);
+}
#else
static inline bool page_poisoning_enabled(void) { return false; }
-static inline void kernel_poison_pages(struct page *page, int numpages,
- int enable) { }
+static inline bool page_poisoning_enabled_static(void) { return false; }
+static inline void __kernel_poison_pages(struct page *page, int nunmpages) { }
+static inline void kernel_poison_pages(struct page *page, int numpages) { }
+static inline void kernel_unpoison_pages(struct page *page, int numpages) { }
#endif
-#ifdef CONFIG_INIT_ON_ALLOC_DEFAULT_ON
-DECLARE_STATIC_KEY_TRUE(init_on_alloc);
-#else
DECLARE_STATIC_KEY_FALSE(init_on_alloc);
-#endif
static inline bool want_init_on_alloc(gfp_t flags)
{
- if (static_branch_unlikely(&init_on_alloc) &&
- !page_poisoning_enabled())
+ if (static_branch_unlikely(&init_on_alloc))
return true;
return flags & __GFP_ZERO;
}
-#ifdef CONFIG_INIT_ON_FREE_DEFAULT_ON
-DECLARE_STATIC_KEY_TRUE(init_on_free);
-#else
DECLARE_STATIC_KEY_FALSE(init_on_free);
-#endif
static inline bool want_init_on_free(void)
{
- return static_branch_unlikely(&init_on_free) &&
- !page_poisoning_enabled();
+ return static_branch_unlikely(&init_on_free);
}
-#ifdef CONFIG_DEBUG_PAGEALLOC
-extern void init_debug_pagealloc(void);
-#else
-static inline void init_debug_pagealloc(void) {}
-#endif
extern bool _debug_pagealloc_enabled_early;
DECLARE_STATIC_KEY_FALSE(_debug_pagealloc_enabled);
@@ -2911,28 +2928,28 @@ static inline bool debug_pagealloc_enabled_static(void)
return static_branch_unlikely(&_debug_pagealloc_enabled);
}
-#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_ARCH_HAS_SET_DIRECT_MAP)
-extern void __kernel_map_pages(struct page *page, int numpages, int enable);
-
+#ifdef CONFIG_DEBUG_PAGEALLOC
/*
- * When called in DEBUG_PAGEALLOC context, the call should most likely be
- * guarded by debug_pagealloc_enabled() or debug_pagealloc_enabled_static()
+ * To support DEBUG_PAGEALLOC architecture must ensure that
+ * __kernel_map_pages() never fails
*/
-static inline void
-kernel_map_pages(struct page *page, int numpages, int enable)
-{
- __kernel_map_pages(page, numpages, enable);
-}
-#ifdef CONFIG_HIBERNATION
-extern bool kernel_page_present(struct page *page);
-#endif /* CONFIG_HIBERNATION */
-#else /* CONFIG_DEBUG_PAGEALLOC || CONFIG_ARCH_HAS_SET_DIRECT_MAP */
-static inline void
-kernel_map_pages(struct page *page, int numpages, int enable) {}
-#ifdef CONFIG_HIBERNATION
-static inline bool kernel_page_present(struct page *page) { return true; }
-#endif /* CONFIG_HIBERNATION */
-#endif /* CONFIG_DEBUG_PAGEALLOC || CONFIG_ARCH_HAS_SET_DIRECT_MAP */
+extern void __kernel_map_pages(struct page *page, int numpages, int enable);
+
+static inline void debug_pagealloc_map_pages(struct page *page, int numpages)
+{
+ if (debug_pagealloc_enabled_static())
+ __kernel_map_pages(page, numpages, 1);
+}
+
+static inline void debug_pagealloc_unmap_pages(struct page *page, int numpages)
+{
+ if (debug_pagealloc_enabled_static())
+ __kernel_map_pages(page, numpages, 0);
+}
+#else /* CONFIG_DEBUG_PAGEALLOC */
+static inline void debug_pagealloc_map_pages(struct page *page, int numpages) {}
+static inline void debug_pagealloc_unmap_pages(struct page *page, int numpages) {}
+#endif /* CONFIG_DEBUG_PAGEALLOC */
#ifdef __HAVE_ARCH_GATE_AREA
extern struct vm_area_struct *get_gate_vma(struct mm_struct *mm);
diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h
index 80f5d755c037..e7de072ade03 100644
--- a/include/linux/mm_types.h
+++ b/include/linux/mm_types.h
@@ -14,6 +14,7 @@
#include <linux/uprobes.h>
#include <linux/page-flags-layout.h>
#include <linux/workqueue.h>
+#include <linux/seqlock.h>
#include <asm/mmu.h>
@@ -443,6 +444,13 @@ struct mm_struct {
*/
atomic_t has_pinned;
+ /**
+ * @write_protect_seq: Locked when any thread is write
+ * protecting pages mapped by this mm to enforce a later COW,
+ * for instance during page table copying for fork().
+ */
+ seqcount_t write_protect_seq;
+
#ifdef CONFIG_MMU
atomic_long_t pgtables_bytes; /* PTE page table pages */
#endif
diff --git a/include/linux/mmap_lock.h b/include/linux/mmap_lock.h
index 18e7eae9b5ba..0540f0156f58 100644
--- a/include/linux/mmap_lock.h
+++ b/include/linux/mmap_lock.h
@@ -1,11 +1,65 @@
#ifndef _LINUX_MMAP_LOCK_H
#define _LINUX_MMAP_LOCK_H
+#include <linux/lockdep.h>
+#include <linux/mm_types.h>
#include <linux/mmdebug.h>
+#include <linux/rwsem.h>
+#include <linux/tracepoint-defs.h>
+#include <linux/types.h>
#define MMAP_LOCK_INITIALIZER(name) \
.mmap_lock = __RWSEM_INITIALIZER((name).mmap_lock),
+DECLARE_TRACEPOINT(mmap_lock_start_locking);
+DECLARE_TRACEPOINT(mmap_lock_acquire_returned);
+DECLARE_TRACEPOINT(mmap_lock_released);
+
+#ifdef CONFIG_TRACING
+
+void __mmap_lock_do_trace_start_locking(struct mm_struct *mm, bool write);
+void __mmap_lock_do_trace_acquire_returned(struct mm_struct *mm, bool write,
+ bool success);
+void __mmap_lock_do_trace_released(struct mm_struct *mm, bool write);
+
+static inline void __mmap_lock_trace_start_locking(struct mm_struct *mm,
+ bool write)
+{
+ if (tracepoint_enabled(mmap_lock_start_locking))
+ __mmap_lock_do_trace_start_locking(mm, write);
+}
+
+static inline void __mmap_lock_trace_acquire_returned(struct mm_struct *mm,
+ bool write, bool success)
+{
+ if (tracepoint_enabled(mmap_lock_acquire_returned))
+ __mmap_lock_do_trace_acquire_returned(mm, write, success);
+}
+
+static inline void __mmap_lock_trace_released(struct mm_struct *mm, bool write)
+{
+ if (tracepoint_enabled(mmap_lock_released))
+ __mmap_lock_do_trace_released(mm, write);
+}
+
+#else /* !CONFIG_TRACING */
+
+static inline void __mmap_lock_trace_start_locking(struct mm_struct *mm,
+ bool write)
+{
+}
+
+static inline void __mmap_lock_trace_acquire_returned(struct mm_struct *mm,
+ bool write, bool success)
+{
+}
+
+static inline void __mmap_lock_trace_released(struct mm_struct *mm, bool write)
+{
+}
+
+#endif /* CONFIG_TRACING */
+
static inline void mmap_init_lock(struct mm_struct *mm)
{
init_rwsem(&mm->mmap_lock);
@@ -13,57 +67,86 @@ static inline void mmap_init_lock(struct mm_struct *mm)
static inline void mmap_write_lock(struct mm_struct *mm)
{
+ __mmap_lock_trace_start_locking(mm, true);
down_write(&mm->mmap_lock);
+ __mmap_lock_trace_acquire_returned(mm, true, true);
}
static inline void mmap_write_lock_nested(struct mm_struct *mm, int subclass)
{
+ __mmap_lock_trace_start_locking(mm, true);
down_write_nested(&mm->mmap_lock, subclass);
+ __mmap_lock_trace_acquire_returned(mm, true, true);
}
static inline int mmap_write_lock_killable(struct mm_struct *mm)
{
- return down_write_killable(&mm->mmap_lock);
+ int ret;
+
+ __mmap_lock_trace_start_locking(mm, true);
+ ret = down_write_killable(&mm->mmap_lock);
+ __mmap_lock_trace_acquire_returned(mm, true, ret == 0);
+ return ret;
}
static inline bool mmap_write_trylock(struct mm_struct *mm)
{
- return down_write_trylock(&mm->mmap_lock) != 0;
+ bool ret;
+
+ __mmap_lock_trace_start_locking(mm, true);
+ ret = down_write_trylock(&mm->mmap_lock) != 0;
+ __mmap_lock_trace_acquire_returned(mm, true, ret);
+ return ret;
}
static inline void mmap_write_unlock(struct mm_struct *mm)
{
up_write(&mm->mmap_lock);
+ __mmap_lock_trace_released(mm, true);
}
static inline void mmap_write_downgrade(struct mm_struct *mm)
{
downgrade_write(&mm->mmap_lock);
+ __mmap_lock_trace_acquire_returned(mm, false, true);
}
static inline void mmap_read_lock(struct mm_struct *mm)
{
+ __mmap_lock_trace_start_locking(mm, false);
down_read(&mm->mmap_lock);
+ __mmap_lock_trace_acquire_returned(mm, false, true);
}
static inline int mmap_read_lock_killable(struct mm_struct *mm)
{
- return down_read_killable(&mm->mmap_lock);
+ int ret;
+
+ __mmap_lock_trace_start_locking(mm, false);
+ ret = down_read_killable(&mm->mmap_lock);
+ __mmap_lock_trace_acquire_returned(mm, false, ret == 0);
+ return ret;
}
static inline bool mmap_read_trylock(struct mm_struct *mm)
{
- return down_read_trylock(&mm->mmap_lock) != 0;
+ bool ret;
+
+ __mmap_lock_trace_start_locking(mm, false);
+ ret = down_read_trylock(&mm->mmap_lock) != 0;
+ __mmap_lock_trace_acquire_returned(mm, false, ret);
+ return ret;
}
static inline void mmap_read_unlock(struct mm_struct *mm)
{
up_read(&mm->mmap_lock);
+ __mmap_lock_trace_released(mm, false);
}
static inline bool mmap_read_trylock_non_owner(struct mm_struct *mm)
{
- if (down_read_trylock(&mm->mmap_lock)) {
+ if (mmap_read_trylock(mm)) {
rwsem_release(&mm->mmap_lock.dep_map, _RET_IP_);
return true;
}
@@ -73,6 +156,7 @@ static inline bool mmap_read_trylock_non_owner(struct mm_struct *mm)
static inline void mmap_read_unlock_non_owner(struct mm_struct *mm)
{
up_read_non_owner(&mm->mmap_lock);
+ __mmap_lock_trace_released(mm, false);
}
static inline void mmap_assert_locked(struct mm_struct *mm)
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index fb3bf696c05e..98a80c01d150 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -152,7 +152,6 @@ enum zone_stat_item {
NR_ZONE_UNEVICTABLE,
NR_ZONE_WRITE_PENDING, /* Count of dirty, writeback and unstable pages */
NR_MLOCK, /* mlock()ed pages found and moved off LRU */
- NR_PAGETABLE, /* used for pagetables */
/* Second 128 byte cacheline */
NR_BOUNCE,
#if IS_ENABLED(CONFIG_ZSMALLOC)
@@ -207,6 +206,7 @@ enum node_stat_item {
#if IS_ENABLED(CONFIG_SHADOW_CALL_STACK)
NR_KERNEL_SCS_KB, /* measured in KiB */
#endif
+ NR_PAGETABLE, /* used for pagetables */
NR_VM_NODE_STAT_ITEMS
};
@@ -354,26 +354,6 @@ enum zone_type {
* DMA mask is assumed when ZONE_DMA32 is defined. Some 64-bit
* platforms may need both zones as they support peripherals with
* different DMA addressing limitations.
- *
- * Some examples:
- *
- * - i386 and x86_64 have a fixed 16M ZONE_DMA and ZONE_DMA32 for the
- * rest of the lower 4G.
- *
- * - arm only uses ZONE_DMA, the size, up to 4G, may vary depending on
- * the specific device.
- *
- * - arm64 has a fixed 1G ZONE_DMA and ZONE_DMA32 for the rest of the
- * lower 4G.
- *
- * - powerpc only uses ZONE_DMA, the size, up to 2G, may vary
- * depending on the specific device.
- *
- * - s390 uses ZONE_DMA fixed to the lower 2G.
- *
- * - ia64 and riscv only use ZONE_DMA32.
- *
- * - parisc uses neither.
*/
#ifdef CONFIG_ZONE_DMA
ZONE_DMA,
@@ -470,6 +450,12 @@ struct zone {
#endif
struct pglist_data *zone_pgdat;
struct per_cpu_pageset __percpu *pageset;
+ /*
+ * the high and batch values are copied to individual pagesets for
+ * faster access
+ */
+ int pageset_high;
+ int pageset_batch;
#ifndef CONFIG_SPARSEMEM
/*
@@ -1429,17 +1415,6 @@ void sparse_init(void);
#endif /* CONFIG_SPARSEMEM */
/*
- * During memory init memblocks map pfns to nids. The search is expensive and
- * this caches recent lookups. The implementation of __early_pfn_to_nid
- * may treat start/end as pfns or sections.
- */
-struct mminit_pfnnid_cache {
- unsigned long last_start;
- unsigned long last_end;
- int last_nid;
-};
-
-/*
* If it is possible to have holes within a MAX_ORDER_NR_PAGES, then we
* need to check pfn validity within that MAX_ORDER_NR_PAGES block.
* pfn_valid_within() should be used in this case; we optimise this away
@@ -1451,37 +1426,6 @@ struct mminit_pfnnid_cache {
#define pfn_valid_within(pfn) (1)
#endif
-#ifdef CONFIG_ARCH_HAS_HOLES_MEMORYMODEL
-/*
- * pfn_valid() is meant to be able to tell if a given PFN has valid memmap
- * associated with it or not. This means that a struct page exists for this
- * pfn. The caller cannot assume the page is fully initialized in general.
- * Hotplugable pages might not have been onlined yet. pfn_to_online_page()
- * will ensure the struct page is fully online and initialized. Special pages
- * (e.g. ZONE_DEVICE) are never onlined and should be treated accordingly.
- *
- * In FLATMEM, it is expected that holes always have valid memmap as long as
- * there is valid PFNs either side of the hole. In SPARSEMEM, it is assumed
- * that a valid section has a memmap for the entire section.
- *
- * However, an ARM, and maybe other embedded architectures in the future
- * free memmap backing holes to save memory on the assumption the memmap is
- * never used. The page_zone linkages are then broken even though pfn_valid()
- * returns true. A walker of the full memmap must then do this additional
- * check to ensure the memmap they are looking at is sane by making sure
- * the zone and PFN linkages are still valid. This is expensive, but walkers
- * of the full memmap are extremely rare.
- */
-bool memmap_valid_within(unsigned long pfn,
- struct page *page, struct zone *zone);
-#else
-static inline bool memmap_valid_within(unsigned long pfn,
- struct page *page, struct zone *zone)
-{
- return true;
-}
-#endif /* CONFIG_ARCH_HAS_HOLES_MEMORYMODEL */
-
#endif /* !__GENERATING_BOUNDS.H */
#endif /* !__ASSEMBLY__ */
#endif /* _LINUX_MMZONE_H */
diff --git a/include/linux/msi.h b/include/linux/msi.h
index 6b584cc4757c..360a0a7e7341 100644
--- a/include/linux/msi.h
+++ b/include/linux/msi.h
@@ -4,11 +4,50 @@
#include <linux/kobject.h>
#include <linux/list.h>
+#include <asm/msi.h>
+
+/* Dummy shadow structures if an architecture does not define them */
+#ifndef arch_msi_msg_addr_lo
+typedef struct arch_msi_msg_addr_lo {
+ u32 address_lo;
+} __attribute__ ((packed)) arch_msi_msg_addr_lo_t;
+#endif
+
+#ifndef arch_msi_msg_addr_hi
+typedef struct arch_msi_msg_addr_hi {
+ u32 address_hi;
+} __attribute__ ((packed)) arch_msi_msg_addr_hi_t;
+#endif
+
+#ifndef arch_msi_msg_data
+typedef struct arch_msi_msg_data {
+ u32 data;
+} __attribute__ ((packed)) arch_msi_msg_data_t;
+#endif
+/**
+ * msi_msg - Representation of a MSI message
+ * @address_lo: Low 32 bits of msi message address
+ * @arch_addrlo: Architecture specific shadow of @address_lo
+ * @address_hi: High 32 bits of msi message address
+ * (only used when device supports it)
+ * @arch_addrhi: Architecture specific shadow of @address_hi
+ * @data: MSI message data (usually 16 bits)
+ * @arch_data: Architecture specific shadow of @data
+ */
struct msi_msg {
- u32 address_lo; /* low 32 bits of msi message address */
- u32 address_hi; /* high 32 bits of msi message address */
- u32 data; /* 16 bits of msi message data */
+ union {
+ u32 address_lo;
+ arch_msi_msg_addr_lo_t arch_addr_lo;
+ };
+ union {
+ u32 address_hi;
+ arch_msi_msg_addr_hi_t arch_addr_hi;
+ };
+ union {
+ u32 data;
+ arch_msi_msg_data_t arch_data;
+ };
};
extern int pci_msi_ignore_mask;
@@ -243,7 +282,6 @@ struct msi_controller {
#ifdef CONFIG_GENERIC_MSI_IRQ_DOMAIN
#include <linux/irqhandler.h>
-#include <asm/msi.h>
struct irq_domain;
struct irq_domain_ops;
diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h
index 5fbc4000358f..0f1d024bd958 100644
--- a/include/linux/ns_common.h
+++ b/include/linux/ns_common.h
@@ -2,12 +2,15 @@
#ifndef _LINUX_NS_COMMON_H
#define _LINUX_NS_COMMON_H
+#include <linux/refcount.h>
+
struct proc_ns_operations;
struct ns_common {
atomic_long_t stashed;
const struct proc_ns_operations *ops;
unsigned int inum;
+ refcount_t count;
};
#endif
diff --git a/include/linux/of.h b/include/linux/of.h
index 5d51891cbf1a..9ed5b8532c30 100644
--- a/include/linux/of.h
+++ b/include/linux/of.h
@@ -558,6 +558,8 @@ int of_map_id(struct device_node *np, u32 id,
const char *map_name, const char *map_mask_name,
struct device_node **target, u32 *id_out);
+phys_addr_t of_dma_get_max_cpu_address(struct device_node *np);
+
#else /* CONFIG_OF */
static inline void of_core_init(void)
@@ -995,6 +997,11 @@ static inline int of_map_id(struct device_node *np, u32 id,
return -EINVAL;
}
+static inline phys_addr_t of_dma_get_max_cpu_address(struct device_node *np)
+{
+ return PHYS_ADDR_MAX;
+}
+
#define of_match_ptr(_ptr) NULL
#define of_match_node(_matches, _node) NULL
#endif /* CONFIG_OF */
diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h
index fc0e1bd48e73..b5eb0fc15053 100644
--- a/include/linux/page-flags.h
+++ b/include/linux/page-flags.h
@@ -86,8 +86,7 @@
*/
/*
- * Don't use the *_dontuse flags. Use the macros. Otherwise you'll break
- * locked- and dirty-page accounting.
+ * Don't use the pageflags directly. Use the PageFoo macros.
*
* The page flags field is split into two parts, the main flags area
* which extends from the low bits upwards, and the fields area which
@@ -363,8 +362,7 @@ PAGEFLAG(SwapBacked, swapbacked, PF_NO_TAIL)
* for its own purposes.
* - PG_private and PG_private_2 cause releasepage() and co to be invoked
*/
-PAGEFLAG(Private, private, PF_ANY) __SETPAGEFLAG(Private, private, PF_ANY)
- __CLEARPAGEFLAG(Private, private, PF_ANY)
+PAGEFLAG(Private, private, PF_ANY)
PAGEFLAG(Private2, private_2, PF_ANY) TESTSCFLAG(Private2, private_2, PF_ANY)
PAGEFLAG(OwnerPriv1, owner_priv_1, PF_ANY)
TESTCLEARFLAG(OwnerPriv1, owner_priv_1, PF_ANY)
diff --git a/include/linux/page_ext.h b/include/linux/page_ext.h
index cfce186f0c4e..aff81ba31bd8 100644
--- a/include/linux/page_ext.h
+++ b/include/linux/page_ext.h
@@ -44,8 +44,12 @@ static inline void page_ext_init_flatmem(void)
{
}
extern void page_ext_init(void);
+static inline void page_ext_init_flatmem_late(void)
+{
+}
#else
extern void page_ext_init_flatmem(void);
+extern void page_ext_init_flatmem_late(void);
static inline void page_ext_init(void)
{
}
@@ -76,6 +80,10 @@ static inline void page_ext_init(void)
{
}
+static inline void page_ext_init_flatmem_late(void)
+{
+}
+
static inline void page_ext_init_flatmem(void)
{
}
diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h
index 081d934eda64..ad4ddc17d403 100644
--- a/include/linux/pagevec.h
+++ b/include/linux/pagevec.h
@@ -43,9 +43,6 @@ static inline unsigned pagevec_lookup(struct pagevec *pvec,
unsigned pagevec_lookup_range_tag(struct pagevec *pvec,
struct address_space *mapping, pgoff_t *index, pgoff_t end,
xa_mark_t tag);
-unsigned pagevec_lookup_range_nr_tag(struct pagevec *pvec,
- struct address_space *mapping, pgoff_t *index, pgoff_t end,
- xa_mark_t tag, unsigned max_pages);
static inline unsigned pagevec_lookup_tag(struct pagevec *pvec,
struct address_space *mapping, pgoff_t *index, xa_mark_t tag)
{
diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h
index 505480217cf1..bf7966776c55 100644
--- a/include/linux/perf/arm_pmu.h
+++ b/include/linux/perf/arm_pmu.h
@@ -163,6 +163,8 @@ int arm_pmu_acpi_probe(armpmu_init_fn init_fn);
static inline int arm_pmu_acpi_probe(armpmu_init_fn init_fn) { return 0; }
#endif
+bool arm_pmu_irq_is_nmi(void);
+
/* Internal functions only for core arm_pmu code */
struct arm_pmu *armpmu_alloc(void);
struct arm_pmu *armpmu_alloc_atomic(void);
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 96450f6fb1de..9a38f579bc76 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1028,6 +1028,8 @@ struct perf_sample_data {
u64 phys_addr;
u64 cgroup;
+ u64 data_page_size;
+ u64 code_page_size;
} ____cacheline_aligned;
/* default value for data source */
@@ -1585,4 +1587,8 @@ extern void __weak arch_perf_update_userpage(struct perf_event *event,
struct perf_event_mmap_page *userpg,
u64 now);
+#ifdef CONFIG_MMU
+extern __weak u64 arch_perf_get_page_size(struct mm_struct *mm, unsigned long addr);
+#endif
+
#endif /* _LINUX_PERF_EVENT_H */
diff --git a/include/linux/pgtable.h b/include/linux/pgtable.h
index e237004d498d..8fcdfa52eb4b 100644
--- a/include/linux/pgtable.h
+++ b/include/linux/pgtable.h
@@ -258,6 +258,61 @@ static inline pte_t ptep_get(pte_t *ptep)
}
#endif
+#ifdef CONFIG_GUP_GET_PTE_LOW_HIGH
+/*
+ * WARNING: only to be used in the get_user_pages_fast() implementation.
+ *
+ * With get_user_pages_fast(), we walk down the pagetables without taking any
+ * locks. For this we would like to load the pointers atomically, but sometimes
+ * that is not possible (e.g. without expensive cmpxchg8b on x86_32 PAE). What
+ * we do have is the guarantee that a PTE will only either go from not present
+ * to present, or present to not present or both -- it will not switch to a
+ * completely different present page without a TLB flush in between; something
+ * that we are blocking by holding interrupts off.
+ *
+ * Setting ptes from not present to present goes:
+ *
+ * ptep->pte_high = h;
+ * smp_wmb();
+ * ptep->pte_low = l;
+ *
+ * And present to not present goes:
+ *
+ * ptep->pte_low = 0;
+ * smp_wmb();
+ * ptep->pte_high = 0;
+ *
+ * We must ensure here that the load of pte_low sees 'l' IFF pte_high sees 'h'.
+ * We load pte_high *after* loading pte_low, which ensures we don't see an older
+ * value of pte_high. *Then* we recheck pte_low, which ensures that we haven't
+ * picked up a changed pte high. We might have gotten rubbish values from
+ * pte_low and pte_high, but we are guaranteed that pte_low will not have the
+ * present bit set *unless* it is 'l'. Because get_user_pages_fast() only
+ * operates on present ptes we're safe.
+ */
+static inline pte_t ptep_get_lockless(pte_t *ptep)
+{
+ pte_t pte;
+
+ do {
+ pte.pte_low = ptep->pte_low;
+ smp_rmb();
+ pte.pte_high = ptep->pte_high;
+ smp_rmb();
+ } while (unlikely(pte.pte_low != ptep->pte_low));
+
+ return pte;
+}
+#else /* CONFIG_GUP_GET_PTE_LOW_HIGH */
+/*
+ * We require that the PTE can be read atomically.
+ */
+static inline pte_t ptep_get_lockless(pte_t *ptep)
+{
+ return ptep_get(ptep);
+}
+#endif /* CONFIG_GUP_GET_PTE_LOW_HIGH */
+
#ifdef CONFIG_TRANSPARENT_HUGEPAGE
#ifndef __HAVE_ARCH_PMDP_HUGE_GET_AND_CLEAR
static inline pmd_t pmdp_huge_get_and_clear(struct mm_struct *mm,
@@ -1494,4 +1549,20 @@ typedef unsigned int pgtbl_mod_mask;
#define pmd_leaf(x) 0
#endif
+#ifndef pgd_leaf_size
+#define pgd_leaf_size(x) (1ULL << PGDIR_SHIFT)
+#endif
+#ifndef p4d_leaf_size
+#define p4d_leaf_size(x) P4D_SIZE
+#endif
+#ifndef pud_leaf_size
+#define pud_leaf_size(x) PUD_SIZE
+#endif
+#ifndef pmd_leaf_size
+#define pmd_leaf_size(x) PMD_SIZE
+#endif
+#ifndef pte_leaf_size
+#define pte_leaf_size(x) PAGE_SIZE
+#endif
+
#endif /* _LINUX_PGTABLE_H */
diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h
index 5a5cb45ac57e..7c7e627503d2 100644
--- a/include/linux/pid_namespace.h
+++ b/include/linux/pid_namespace.h
@@ -8,7 +8,6 @@
#include <linux/workqueue.h>
#include <linux/threads.h>
#include <linux/nsproxy.h>
-#include <linux/kref.h>
#include <linux/ns_common.h>
#include <linux/idr.h>
@@ -18,7 +17,6 @@
struct fs_pin;
struct pid_namespace {
- struct kref kref;
struct idr idr;
struct rcu_head rcu;
unsigned int pid_allocated;
@@ -43,7 +41,7 @@ extern struct pid_namespace init_pid_ns;
static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns)
{
if (ns != &init_pid_ns)
- kref_get(&ns->kref);
+ refcount_inc(&ns->ns.count);
return ns;
}
diff --git a/include/linux/platform_data/media/coda.h b/include/linux/platform_data/media/coda.h
deleted file mode 100644
index 293b61b60c9d..000000000000
--- a/include/linux/platform_data/media/coda.h
+++ /dev/null
@@ -1,14 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0-or-later */
-/*
- * Copyright (C) 2013 Philipp Zabel, Pengutronix
- */
-#ifndef PLATFORM_CODA_H
-#define PLATFORM_CODA_H
-
-struct device;
-
-struct coda_platform_data {
- struct device *iram_dev;
-};
-
-#endif
diff --git a/include/linux/platform_data/shmob_drm.h b/include/linux/platform_data/shmob_drm.h
index fe815d7d9f58..d661399b217d 100644
--- a/include/linux/platform_data/shmob_drm.h
+++ b/include/linux/platform_data/shmob_drm.h
@@ -10,8 +10,6 @@
#ifndef __SHMOB_DRM_H__
#define __SHMOB_DRM_H__
-#include <linux/kernel.h>
-
#include <drm/drm_mode.h>
enum shmob_drm_clk_source {
diff --git a/include/linux/poison.h b/include/linux/poison.h
index dc8ae5d8db03..aff1c9250c82 100644
--- a/include/linux/poison.h
+++ b/include/linux/poison.h
@@ -27,11 +27,7 @@
#define TIMER_ENTRY_STATIC ((void *) 0x300 + POISON_POINTER_DELTA)
/********** mm/page_poison.c **********/
-#ifdef CONFIG_PAGE_POISONING_ZERO
-#define PAGE_POISON 0x00
-#else
#define PAGE_POISON 0xaa
-#endif
/********** mm/page_alloc.c ************/
diff --git a/include/linux/preempt.h b/include/linux/preempt.h
index 7d9c1c0e149c..6df63cbe8bb0 100644
--- a/include/linux/preempt.h
+++ b/include/linux/preempt.h
@@ -322,34 +322,71 @@ static inline void preempt_notifier_init(struct preempt_notifier *notifier,
#endif
-/**
- * migrate_disable - Prevent migration of the current task
+#ifdef CONFIG_SMP
+
+/*
+ * Migrate-Disable and why it is undesired.
*
- * Maps to preempt_disable() which also disables preemption. Use
- * migrate_disable() to annotate that the intent is to prevent migration,
- * but not necessarily preemption.
+ * When a preempted task becomes elegible to run under the ideal model (IOW it
+ * becomes one of the M highest priority tasks), it might still have to wait
+ * for the preemptee's migrate_disable() section to complete. Thereby suffering
+ * a reduction in bandwidth in the exact duration of the migrate_disable()
+ * section.
*
- * Can be invoked nested like preempt_disable() and needs the corresponding
- * number of migrate_enable() invocations.
- */
-static __always_inline void migrate_disable(void)
-{
- preempt_disable();
-}
-
-/**
- * migrate_enable - Allow migration of the current task
+ * Per this argument, the change from preempt_disable() to migrate_disable()
+ * gets us:
+ *
+ * - a higher priority tasks gains reduced wake-up latency; with preempt_disable()
+ * it would have had to wait for the lower priority task.
+ *
+ * - a lower priority tasks; which under preempt_disable() could've instantly
+ * migrated away when another CPU becomes available, is now constrained
+ * by the ability to push the higher priority task away, which might itself be
+ * in a migrate_disable() section, reducing it's available bandwidth.
+ *
+ * IOW it trades latency / moves the interference term, but it stays in the
+ * system, and as long as it remains unbounded, the system is not fully
+ * deterministic.
+ *
+ *
+ * The reason we have it anyway.
*
- * Counterpart to migrate_disable().
+ * PREEMPT_RT breaks a number of assumptions traditionally held. By forcing a
+ * number of primitives into becoming preemptible, they would also allow
+ * migration. This turns out to break a bunch of per-cpu usage. To this end,
+ * all these primitives employ migirate_disable() to restore this implicit
+ * assumption.
*
- * As migrate_disable() can be invoked nested, only the outermost invocation
- * reenables migration.
+ * This is a 'temporary' work-around at best. The correct solution is getting
+ * rid of the above assumptions and reworking the code to employ explicit
+ * per-cpu locking or short preempt-disable regions.
+ *
+ * The end goal must be to get rid of migrate_disable(), alternatively we need
+ * a schedulability theory that does not depend on abritrary migration.
+ *
+ *
+ * Notes on the implementation.
+ *
+ * The implementation is particularly tricky since existing code patterns
+ * dictate neither migrate_disable() nor migrate_enable() is allowed to block.
+ * This means that it cannot use cpus_read_lock() to serialize against hotplug,
+ * nor can it easily migrate itself into a pending affinity mask change on
+ * migrate_enable().
+ *
+ *
+ * Note: even non-work-conserving schedulers like semi-partitioned depends on
+ * migration, so migrate_disable() is not only a problem for
+ * work-conserving schedulers.
*
- * Currently mapped to preempt_enable().
*/
-static __always_inline void migrate_enable(void)
-{
- preempt_enable();
-}
+extern void migrate_disable(void);
+extern void migrate_enable(void);
+
+#else
+
+static inline void migrate_disable(void) { }
+static inline void migrate_enable(void) { }
+
+#endif /* CONFIG_SMP */
#endif /* __LINUX_PREEMPT_H */
diff --git a/include/linux/purgatory.h b/include/linux/purgatory.h
index b950e961cfa8..d7dc1559427f 100644
--- a/include/linux/purgatory.h
+++ b/include/linux/purgatory.h
@@ -3,7 +3,7 @@
#define _LINUX_PURGATORY_H
#include <linux/types.h>
-#include <crypto/sha.h>
+#include <crypto/sha2.h>
#include <uapi/linux/kexec.h>
struct kexec_sha_region {
diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h
index 6cdd0152c253..de0826411311 100644
--- a/include/linux/rcupdate.h
+++ b/include/linux/rcupdate.h
@@ -241,6 +241,11 @@ bool rcu_lockdep_current_cpu_online(void);
static inline bool rcu_lockdep_current_cpu_online(void) { return true; }
#endif /* #else #if defined(CONFIG_HOTPLUG_CPU) && defined(CONFIG_PROVE_RCU) */
+extern struct lockdep_map rcu_lock_map;
+extern struct lockdep_map rcu_bh_lock_map;
+extern struct lockdep_map rcu_sched_lock_map;
+extern struct lockdep_map rcu_callback_map;
+
#ifdef CONFIG_DEBUG_LOCK_ALLOC
static inline void rcu_lock_acquire(struct lockdep_map *map)
@@ -253,10 +258,6 @@ static inline void rcu_lock_release(struct lockdep_map *map)
lock_release(map, _THIS_IP_);
}
-extern struct lockdep_map rcu_lock_map;
-extern struct lockdep_map rcu_bh_lock_map;
-extern struct lockdep_map rcu_sched_lock_map;
-extern struct lockdep_map rcu_callback_map;
int debug_lockdep_rcu_enabled(void);
int rcu_read_lock_held(void);
int rcu_read_lock_bh_held(void);
@@ -327,7 +328,7 @@ static inline void rcu_preempt_sleep_check(void) { }
#else /* #ifdef CONFIG_PROVE_RCU */
-#define RCU_LOCKDEP_WARN(c, s) do { } while (0)
+#define RCU_LOCKDEP_WARN(c, s) do { } while (0 && (c))
#define rcu_sleep_check() do { } while (0)
#endif /* #else #ifdef CONFIG_PROVE_RCU */
diff --git a/include/linux/rcupdate_trace.h b/include/linux/rcupdate_trace.h
index 3e7919fc5f34..86c8f6c98412 100644
--- a/include/linux/rcupdate_trace.h
+++ b/include/linux/rcupdate_trace.h
@@ -11,10 +11,10 @@
#include <linux/sched.h>
#include <linux/rcupdate.h>
-#ifdef CONFIG_DEBUG_LOCK_ALLOC
-
extern struct lockdep_map rcu_trace_lock_map;
+#ifdef CONFIG_DEBUG_LOCK_ALLOC
+
static inline int rcu_read_lock_trace_held(void)
{
return lock_is_held(&rcu_trace_lock_map);
diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h
index 7c1ecdb356d8..2a97334eb786 100644
--- a/include/linux/rcutiny.h
+++ b/include/linux/rcutiny.h
@@ -89,6 +89,8 @@ static inline void rcu_irq_enter_irqson(void) { }
static inline void rcu_irq_exit(void) { }
static inline void rcu_irq_exit_preempt(void) { }
static inline void rcu_irq_exit_check_preempt(void) { }
+#define rcu_is_idle_cpu(cpu) \
+ (is_idle_task(current) && !in_nmi() && !in_irq() && !in_serving_softirq())
static inline void exit_rcu(void) { }
static inline bool rcu_preempt_need_deferred_qs(struct task_struct *t)
{
diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h
index 59eb5cd567d7..df578b73960f 100644
--- a/include/linux/rcutree.h
+++ b/include/linux/rcutree.h
@@ -50,6 +50,7 @@ void rcu_irq_exit(void);
void rcu_irq_exit_preempt(void);
void rcu_irq_enter_irqson(void);
void rcu_irq_exit_irqson(void);
+bool rcu_is_idle_cpu(int cpu);
#ifdef CONFIG_PROVE_RCU
void rcu_irq_exit_check_preempt(void);
diff --git a/include/linux/refcount.h b/include/linux/refcount.h
index 497990c69b0b..b8a6e387f8f9 100644
--- a/include/linux/refcount.h
+++ b/include/linux/refcount.h
@@ -101,7 +101,7 @@
struct mutex;
/**
- * struct refcount_t - variant of atomic_t specialized for reference counts
+ * typedef refcount_t - variant of atomic_t specialized for reference counts
* @refs: atomic_t counter field
*
* The counter saturates at REFCOUNT_SATURATED and will not move once
diff --git a/include/linux/rmap.h b/include/linux/rmap.h
index 3a6adfa70fb0..70085ca1a3fc 100644
--- a/include/linux/rmap.h
+++ b/include/linux/rmap.h
@@ -91,7 +91,6 @@ enum ttu_flags {
TTU_SPLIT_HUGE_PMD = 0x4, /* split huge PMD if any */
TTU_IGNORE_MLOCK = 0x8, /* ignore mlock */
- TTU_IGNORE_ACCESS = 0x10, /* don't age */
TTU_IGNORE_HWPOISON = 0x20, /* corrupted page is recoverable */
TTU_BATCH_FLUSH = 0x40, /* Batch TLB flushes where possible
* and caller guarantees they will
diff --git a/include/linux/rtc.h b/include/linux/rtc.h
index 22d1575e4991..b829382de6c3 100644
--- a/include/linux/rtc.h
+++ b/include/linux/rtc.h
@@ -110,13 +110,36 @@ struct rtc_device {
/* Some hardware can't support UIE mode */
int uie_unsupported;
- /* Number of nsec it takes to set the RTC clock. This influences when
- * the set ops are called. An offset:
- * - of 0.5 s will call RTC set for wall clock time 10.0 s at 9.5 s
- * - of 1.5 s will call RTC set for wall clock time 10.0 s at 8.5 s
- * - of -0.5 s will call RTC set for wall clock time 10.0 s at 10.5 s
+ /*
+ * This offset specifies the update timing of the RTC.
+ *
+ * tsched t1 write(t2.tv_sec - 1sec)) t2 RTC increments seconds
+ *
+ * The offset defines how tsched is computed so that the write to
+ * the RTC (t2.tv_sec - 1sec) is correct versus the time required
+ * for the transport of the write and the time which the RTC needs
+ * to increment seconds the first time after the write (t2).
+ *
+ * For direct accessible RTCs tsched ~= t1 because the write time
+ * is negligible. For RTCs behind slow busses the transport time is
+ * significant and has to be taken into account.
+ *
+ * The time between the write (t1) and the first increment after
+ * the write (t2) is RTC specific. For a MC146818 RTC it's 500ms,
+ * for many others it's exactly 1 second. Consult the datasheet.
+ *
+ * The value of this offset is also used to calculate the to be
+ * written value (t2.tv_sec - 1sec) at tsched.
+ *
+ * The default value for this is NSEC_PER_SEC + 10 msec default
+ * transport time. The offset can be adjusted by drivers so the
+ * calculation for the to be written value at tsched becomes
+ * correct:
+ *
+ * newval = tsched + set_offset_nsec - NSEC_PER_SEC
+ * and (tsched + set_offset_nsec) % NSEC_PER_SEC == 0
*/
- long set_offset_nsec;
+ unsigned long set_offset_nsec;
bool registered;
@@ -165,7 +188,6 @@ int __rtc_register_device(struct module *owner, struct rtc_device *rtc);
extern int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm);
extern int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm);
-extern int rtc_set_ntp_time(struct timespec64 now, unsigned long *target_nsec);
int __rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm);
extern int rtc_read_alarm(struct rtc_device *rtc,
struct rtc_wkalrm *alrm);
@@ -205,39 +227,6 @@ static inline bool is_leap_year(unsigned int year)
return (!(year % 4) && (year % 100)) || !(year % 400);
}
-/* Determine if we can call to driver to set the time. Drivers can only be
- * called to set a second aligned time value, and the field set_offset_nsec
- * specifies how far away from the second aligned time to call the driver.
- *
- * This also computes 'to_set' which is the time we are trying to set, and has
- * a zero in tv_nsecs, such that:
- * to_set - set_delay_nsec == now +/- FUZZ
- *
- */
-static inline bool rtc_tv_nsec_ok(s64 set_offset_nsec,
- struct timespec64 *to_set,
- const struct timespec64 *now)
-{
- /* Allowed error in tv_nsec, arbitarily set to 5 jiffies in ns. */
- const unsigned long TIME_SET_NSEC_FUZZ = TICK_NSEC * 5;
- struct timespec64 delay = {.tv_sec = 0,
- .tv_nsec = set_offset_nsec};
-
- *to_set = timespec64_add(*now, delay);
-
- if (to_set->tv_nsec < TIME_SET_NSEC_FUZZ) {
- to_set->tv_nsec = 0;
- return true;
- }
-
- if (to_set->tv_nsec > NSEC_PER_SEC - TIME_SET_NSEC_FUZZ) {
- to_set->tv_sec++;
- to_set->tv_nsec = 0;
- return true;
- }
- return false;
-}
-
#define rtc_register_device(device) \
__rtc_register_device(THIS_MODULE, device)
diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h
index 25e3fde85617..4c715be48717 100644
--- a/include/linux/rwsem.h
+++ b/include/linux/rwsem.h
@@ -123,6 +123,7 @@ static inline int rwsem_is_contended(struct rw_semaphore *sem)
* lock for reading
*/
extern void down_read(struct rw_semaphore *sem);
+extern int __must_check down_read_interruptible(struct rw_semaphore *sem);
extern int __must_check down_read_killable(struct rw_semaphore *sem);
/*
@@ -171,6 +172,7 @@ extern void downgrade_write(struct rw_semaphore *sem);
* See Documentation/locking/lockdep-design.rst for more details.)
*/
extern void down_read_nested(struct rw_semaphore *sem, int subclass);
+extern int __must_check down_read_killable_nested(struct rw_semaphore *sem, int subclass);
extern void down_write_nested(struct rw_semaphore *sem, int subclass);
extern int down_write_killable_nested(struct rw_semaphore *sem, int subclass);
extern void _down_write_nest_lock(struct rw_semaphore *sem, struct lockdep_map *nest_lock);
@@ -191,6 +193,7 @@ extern void down_read_non_owner(struct rw_semaphore *sem);
extern void up_read_non_owner(struct rw_semaphore *sem);
#else
# define down_read_nested(sem, subclass) down_read(sem)
+# define down_read_killable_nested(sem, subclass) down_read_killable(sem)
# define down_write_nest_lock(sem, nest_lock) down_write(sem)
# define down_write_nested(sem, subclass) down_write(sem)
# define down_write_killable_nested(sem, subclass) down_write_killable(sem)
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index 36c47e7e66a2..6f70572b2938 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -19,12 +19,6 @@ struct scatterlist {
};
/*
- * Since the above length field is an unsigned int, below we define the maximum
- * length in bytes that can be stored in one scatterlist entry.
- */
-#define SCATTERLIST_MAX_SEGMENT (UINT_MAX & PAGE_MASK)
-
-/*
* These macros should be used after a dma_map_sg call has been done
* to get bus addresses of each of the SG entries and their lengths.
* You should only work with the number of sg entries dma_map_sg
diff --git a/include/linux/sched.h b/include/linux/sched.h
index 76cd21fa5501..51d535b69bd6 100644
--- a/include/linux/sched.h
+++ b/include/linux/sched.h
@@ -28,12 +28,14 @@
#include <linux/sched/prio.h>
#include <linux/sched/types.h>
#include <linux/signal_types.h>
+#include <linux/syscall_user_dispatch.h>
#include <linux/mm_types_task.h>
#include <linux/task_io_accounting.h>
#include <linux/posix-timers.h>
#include <linux/rseq.h>
#include <linux/seqlock.h>
#include <linux/kcsan.h>
+#include <asm/kmap_size.h>
/* task_struct member predeclarations (sorted alphabetically): */
struct audit_context;
@@ -637,6 +639,13 @@ struct wake_q_node {
struct wake_q_node *next;
};
+struct kmap_ctrl {
+#ifdef CONFIG_KMAP_LOCAL
+ int idx;
+ pte_t pteval[KM_MAX_IDX];
+#endif
+};
+
struct task_struct {
#ifdef CONFIG_THREAD_INFO_IN_TASK
/*
@@ -722,6 +731,11 @@ struct task_struct {
int nr_cpus_allowed;
const cpumask_t *cpus_ptr;
cpumask_t cpus_mask;
+ void *migration_pending;
+#ifdef CONFIG_SMP
+ unsigned short migration_disabled;
+#endif
+ unsigned short migration_flags;
#ifdef CONFIG_PREEMPT_RCU
int rcu_read_lock_nesting;
@@ -987,6 +1001,7 @@ struct task_struct {
unsigned int sessionid;
#endif
struct seccomp seccomp;
+ struct syscall_user_dispatch syscall_dispatch;
/* Thread group tracking: */
u64 parent_exec_id;
@@ -1311,6 +1326,7 @@ struct task_struct {
unsigned int sequential_io;
unsigned int sequential_io_avg;
#endif
+ struct kmap_ctrl kmap_ctrl;
#ifdef CONFIG_DEBUG_ATOMIC_SLEEP
unsigned long task_state_change;
#endif
@@ -1348,6 +1364,10 @@ struct task_struct {
struct callback_head mce_kill_me;
#endif
+#ifdef CONFIG_KRETPROBES
+ struct llist_head kretprobe_instances;
+#endif
+
/*
* New fields for task_struct should be added above here, so that
* they are included in the randomized portion of task_struct.
diff --git a/include/linux/sched/hotplug.h b/include/linux/sched/hotplug.h
index 9a62ffdd296f..412cdaba33eb 100644
--- a/include/linux/sched/hotplug.h
+++ b/include/linux/sched/hotplug.h
@@ -11,8 +11,10 @@ extern int sched_cpu_activate(unsigned int cpu);
extern int sched_cpu_deactivate(unsigned int cpu);
#ifdef CONFIG_HOTPLUG_CPU
+extern int sched_cpu_wait_empty(unsigned int cpu);
extern int sched_cpu_dying(unsigned int cpu);
#else
+# define sched_cpu_wait_empty NULL
# define sched_cpu_dying NULL
#endif
diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h
index d5ece7a9a403..1ae08b8462a4 100644
--- a/include/linux/sched/mm.h
+++ b/include/linux/sched/mm.h
@@ -181,6 +181,22 @@ static inline void fs_reclaim_release(gfp_t gfp_mask) { }
#endif
/**
+ * might_alloc - Mark possible allocation sites
+ * @gfp_mask: gfp_t flags that would be used to allocate
+ *
+ * Similar to might_sleep() and other annotations, this can be used in functions
+ * that might allocate, but often don't. Compiles to nothing without
+ * CONFIG_LOCKDEP. Includes a conditional might_sleep() if @gfp allows blocking.
+ */
+static inline void might_alloc(gfp_t gfp_mask)
+{
+ fs_reclaim_acquire(gfp_mask);
+ fs_reclaim_release(gfp_mask);
+
+ might_sleep_if(gfpflags_allow_blocking(gfp_mask));
+}
+
+/**
* memalloc_noio_save - Marks implicit GFP_NOIO allocation scope.
*
* This functions marks the beginning of the GFP_NOIO allocation scope.
@@ -347,6 +363,8 @@ static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm)
extern void membarrier_exec_mmap(struct mm_struct *mm);
+extern void membarrier_update_current_mm(struct mm_struct *next_mm);
+
#else
#ifdef CONFIG_ARCH_HAS_MEMBARRIER_CALLBACKS
static inline void membarrier_arch_switch_mm(struct mm_struct *prev,
@@ -361,6 +379,9 @@ static inline void membarrier_exec_mmap(struct mm_struct *mm)
static inline void membarrier_mm_sync_core_before_usermode(struct mm_struct *mm)
{
}
+static inline void membarrier_update_current_mm(struct mm_struct *next_mm)
+{
+}
#endif
#endif /* _LINUX_SCHED_MM_H */
diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h
index 1bad18a1d8ba..bd5afa076189 100644
--- a/include/linux/sched/signal.h
+++ b/include/linux/sched/signal.h
@@ -353,11 +353,25 @@ static inline int restart_syscall(void)
return -ERESTARTNOINTR;
}
-static inline int signal_pending(struct task_struct *p)
+static inline int task_sigpending(struct task_struct *p)
{
return unlikely(test_tsk_thread_flag(p,TIF_SIGPENDING));
}
+static inline int signal_pending(struct task_struct *p)
+{
+#if defined(TIF_NOTIFY_SIGNAL)
+ /*
+ * TIF_NOTIFY_SIGNAL isn't really a signal, but it requires the same
+ * behavior in terms of ensuring that we break out of wait loops
+ * so that notify signal callbacks can be processed.
+ */
+ if (unlikely(test_tsk_thread_flag(p, TIF_NOTIFY_SIGNAL)))
+ return 1;
+#endif
+ return task_sigpending(p);
+}
+
static inline int __fatal_signal_pending(struct task_struct *p)
{
return unlikely(sigismember(&p->pending.signal, SIGKILL));
@@ -365,7 +379,7 @@ static inline int __fatal_signal_pending(struct task_struct *p)
static inline int fatal_signal_pending(struct task_struct *p)
{
- return signal_pending(p) && __fatal_signal_pending(p);
+ return task_sigpending(p) && __fatal_signal_pending(p);
}
static inline int signal_pending_state(long state, struct task_struct *p)
@@ -502,7 +516,7 @@ extern int set_user_sigmask(const sigset_t __user *umask, size_t sigsetsize);
static inline void restore_saved_sigmask_unless(bool interrupted)
{
if (interrupted)
- WARN_ON(!test_thread_flag(TIF_SIGPENDING));
+ WARN_ON(!signal_pending(current));
else
restore_saved_sigmask();
}
diff --git a/include/linux/sched/task.h b/include/linux/sched/task.h
index 85fb2f34c59b..c0f71f2e7160 100644
--- a/include/linux/sched/task.h
+++ b/include/linux/sched/task.h
@@ -47,9 +47,7 @@ extern spinlock_t mmlist_lock;
extern union thread_union init_thread_union;
extern struct task_struct init_task;
-#ifdef CONFIG_PROVE_RCU
extern int lockdep_tasklist_lock_is_held(void);
-#endif /* #ifdef CONFIG_PROVE_RCU */
extern asmlinkage void schedule_tail(struct task_struct *prev);
extern void init_idle(struct task_struct *idle, int cpu);
diff --git a/include/linux/sched/topology.h b/include/linux/sched/topology.h
index 9ef7bf686a9f..8f0f778b7c91 100644
--- a/include/linux/sched/topology.h
+++ b/include/linux/sched/topology.h
@@ -225,6 +225,14 @@ static inline bool cpus_share_cache(int this_cpu, int that_cpu)
#endif /* !CONFIG_SMP */
+#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
+extern void rebuild_sched_domains_energy(void);
+#else
+static inline void rebuild_sched_domains_energy(void)
+{
+}
+#endif
+
#ifndef arch_scale_cpu_capacity
/**
* arch_scale_cpu_capacity - get the capacity scale factor of a given CPU.
diff --git a/include/linux/scs.h b/include/linux/scs.h
index 6dec390cf154..18122d9e17ff 100644
--- a/include/linux/scs.h
+++ b/include/linux/scs.h
@@ -15,24 +15,18 @@
#ifdef CONFIG_SHADOW_CALL_STACK
-/*
- * In testing, 1 KiB shadow stack size (i.e. 128 stack frames on a 64-bit
- * architecture) provided ~40% safety margin on stack usage while keeping
- * memory allocation overhead reasonable.
- */
-#define SCS_SIZE SZ_1K
+#define SCS_ORDER 0
+#define SCS_SIZE (PAGE_SIZE << SCS_ORDER)
#define GFP_SCS (GFP_KERNEL | __GFP_ZERO)
/* An illegal pointer value to mark the end of the shadow stack. */
#define SCS_END_MAGIC (0x5f6UL + POISON_POINTER_DELTA)
-/* Allocate a static per-CPU shadow stack */
-#define DEFINE_SCS(name) \
- DEFINE_PER_CPU(unsigned long [SCS_SIZE/sizeof(long)], name) \
-
#define task_scs(tsk) (task_thread_info(tsk)->scs_base)
#define task_scs_sp(tsk) (task_thread_info(tsk)->scs_sp)
+void *scs_alloc(int node);
+void scs_free(void *s);
void scs_init(void);
int scs_prepare(struct task_struct *tsk, int node);
void scs_release(struct task_struct *tsk);
@@ -61,6 +55,8 @@ static inline bool task_scs_end_corrupted(struct task_struct *tsk)
#else /* CONFIG_SHADOW_CALL_STACK */
+static inline void *scs_alloc(int node) { return NULL; }
+static inline void scs_free(void *s) {}
static inline void scs_init(void) {}
static inline void scs_task_reset(struct task_struct *tsk) {}
static inline int scs_prepare(struct task_struct *tsk, int node) { return 0; }
diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h
index 02aef2844c38..47763f3999f7 100644
--- a/include/linux/seccomp.h
+++ b/include/linux/seccomp.h
@@ -42,7 +42,7 @@ struct seccomp {
extern int __secure_computing(const struct seccomp_data *sd);
static inline int secure_computing(void)
{
- if (unlikely(test_thread_flag(TIF_SECCOMP)))
+ if (unlikely(test_syscall_work(SECCOMP)))
return __secure_computing(NULL);
return 0;
}
diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h
index cbfc78b92b65..2f7bb92b4c9e 100644
--- a/include/linux/seqlock.h
+++ b/include/linux/seqlock.h
@@ -307,10 +307,10 @@ SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mutex, true, &s->lock->base, ww_mu
__seqprop_case((s), mutex, prop), \
__seqprop_case((s), ww_mutex, prop))
-#define __seqcount_ptr(s) __seqprop(s, ptr)
-#define __seqcount_sequence(s) __seqprop(s, sequence)
-#define __seqcount_lock_preemptible(s) __seqprop(s, preemptible)
-#define __seqcount_assert_lock_held(s) __seqprop(s, assert)
+#define seqprop_ptr(s) __seqprop(s, ptr)
+#define seqprop_sequence(s) __seqprop(s, sequence)
+#define seqprop_preemptible(s) __seqprop(s, preemptible)
+#define seqprop_assert(s) __seqprop(s, assert)
/**
* __read_seqcount_begin() - begin a seqcount_t read section w/o barrier
@@ -328,13 +328,13 @@ SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mutex, true, &s->lock->base, ww_mu
*/
#define __read_seqcount_begin(s) \
({ \
- unsigned seq; \
+ unsigned __seq; \
\
- while ((seq = __seqcount_sequence(s)) & 1) \
+ while ((__seq = seqprop_sequence(s)) & 1) \
cpu_relax(); \
\
kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); \
- seq; \
+ __seq; \
})
/**
@@ -345,10 +345,10 @@ SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mutex, true, &s->lock->base, ww_mu
*/
#define raw_read_seqcount_begin(s) \
({ \
- unsigned seq = __read_seqcount_begin(s); \
+ unsigned _seq = __read_seqcount_begin(s); \
\
smp_rmb(); \
- seq; \
+ _seq; \
})
/**
@@ -359,7 +359,7 @@ SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mutex, true, &s->lock->base, ww_mu
*/
#define read_seqcount_begin(s) \
({ \
- seqcount_lockdep_reader_access(__seqcount_ptr(s)); \
+ seqcount_lockdep_reader_access(seqprop_ptr(s)); \
raw_read_seqcount_begin(s); \
})
@@ -376,11 +376,11 @@ SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mutex, true, &s->lock->base, ww_mu
*/
#define raw_read_seqcount(s) \
({ \
- unsigned seq = __seqcount_sequence(s); \
+ unsigned __seq = seqprop_sequence(s); \
\
smp_rmb(); \
kcsan_atomic_next(KCSAN_SEQLOCK_REGION_MAX); \
- seq; \
+ __seq; \
})
/**
@@ -425,9 +425,9 @@ SEQCOUNT_LOCKNAME(ww_mutex, struct ww_mutex, true, &s->lock->base, ww_mu
* Return: true if a read section retry is required, else false
*/
#define __read_seqcount_retry(s, start) \
- __read_seqcount_t_retry(__seqcount_ptr(s), start)
+ do___read_seqcount_retry(seqprop_ptr(s), start)
-static inline int __read_seqcount_t_retry(const seqcount_t *s, unsigned start)
+static inline int do___read_seqcount_retry(const seqcount_t *s, unsigned start)
{
kcsan_atomic_next(0);
return unlikely(READ_ONCE(s->sequence) != start);
@@ -445,27 +445,29 @@ static inline int __read_seqcount_t_retry(const seqcount_t *s, unsigned start)
* Return: true if a read section retry is required, else false
*/
#define read_seqcount_retry(s, start) \
- read_seqcount_t_retry(__seqcount_ptr(s), start)
+ do_read_seqcount_retry(seqprop_ptr(s), start)
-static inline int read_seqcount_t_retry(const seqcount_t *s, unsigned start)
+static inline int do_read_seqcount_retry(const seqcount_t *s, unsigned start)
{
smp_rmb();
- return __read_seqcount_t_retry(s, start);
+ return do___read_seqcount_retry(s, start);
}
/**
* raw_write_seqcount_begin() - start a seqcount_t write section w/o lockdep
* @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
+ *
+ * Context: check write_seqcount_begin()
*/
#define raw_write_seqcount_begin(s) \
do { \
- if (__seqcount_lock_preemptible(s)) \
+ if (seqprop_preemptible(s)) \
preempt_disable(); \
\
- raw_write_seqcount_t_begin(__seqcount_ptr(s)); \
+ do_raw_write_seqcount_begin(seqprop_ptr(s)); \
} while (0)
-static inline void raw_write_seqcount_t_begin(seqcount_t *s)
+static inline void do_raw_write_seqcount_begin(seqcount_t *s)
{
kcsan_nestable_atomic_begin();
s->sequence++;
@@ -475,16 +477,18 @@ static inline void raw_write_seqcount_t_begin(seqcount_t *s)
/**
* raw_write_seqcount_end() - end a seqcount_t write section w/o lockdep
* @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
+ *
+ * Context: check write_seqcount_end()
*/
#define raw_write_seqcount_end(s) \
do { \
- raw_write_seqcount_t_end(__seqcount_ptr(s)); \
+ do_raw_write_seqcount_end(seqprop_ptr(s)); \
\
- if (__seqcount_lock_preemptible(s)) \
+ if (seqprop_preemptible(s)) \
preempt_enable(); \
} while (0)
-static inline void raw_write_seqcount_t_end(seqcount_t *s)
+static inline void do_raw_write_seqcount_end(seqcount_t *s)
{
smp_wmb();
s->sequence++;
@@ -498,20 +502,21 @@ static inline void raw_write_seqcount_t_end(seqcount_t *s)
* @subclass: lockdep nesting level
*
* See Documentation/locking/lockdep-design.rst
+ * Context: check write_seqcount_begin()
*/
#define write_seqcount_begin_nested(s, subclass) \
do { \
- __seqcount_assert_lock_held(s); \
+ seqprop_assert(s); \
\
- if (__seqcount_lock_preemptible(s)) \
+ if (seqprop_preemptible(s)) \
preempt_disable(); \
\
- write_seqcount_t_begin_nested(__seqcount_ptr(s), subclass); \
+ do_write_seqcount_begin_nested(seqprop_ptr(s), subclass); \
} while (0)
-static inline void write_seqcount_t_begin_nested(seqcount_t *s, int subclass)
+static inline void do_write_seqcount_begin_nested(seqcount_t *s, int subclass)
{
- raw_write_seqcount_t_begin(s);
+ do_raw_write_seqcount_begin(s);
seqcount_acquire(&s->dep_map, subclass, 0, _RET_IP_);
}
@@ -519,46 +524,46 @@ static inline void write_seqcount_t_begin_nested(seqcount_t *s, int subclass)
* write_seqcount_begin() - start a seqcount_t write side critical section
* @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
*
- * write_seqcount_begin opens a write side critical section of the given
- * seqcount_t.
- *
- * Context: seqcount_t write side critical sections must be serialized and
- * non-preemptible. If readers can be invoked from hardirq or softirq
+ * Context: sequence counter write side sections must be serialized and
+ * non-preemptible. Preemption will be automatically disabled if and
+ * only if the seqcount write serialization lock is associated, and
+ * preemptible. If readers can be invoked from hardirq or softirq
* context, interrupts or bottom halves must be respectively disabled.
*/
#define write_seqcount_begin(s) \
do { \
- __seqcount_assert_lock_held(s); \
+ seqprop_assert(s); \
\
- if (__seqcount_lock_preemptible(s)) \
+ if (seqprop_preemptible(s)) \
preempt_disable(); \
\
- write_seqcount_t_begin(__seqcount_ptr(s)); \
+ do_write_seqcount_begin(seqprop_ptr(s)); \
} while (0)
-static inline void write_seqcount_t_begin(seqcount_t *s)
+static inline void do_write_seqcount_begin(seqcount_t *s)
{
- write_seqcount_t_begin_nested(s, 0);
+ do_write_seqcount_begin_nested(s, 0);
}
/**
* write_seqcount_end() - end a seqcount_t write side critical section
* @s: Pointer to seqcount_t or any of the seqcount_LOCKNAME_t variants
*
- * The write section must've been opened with write_seqcount_begin().
+ * Context: Preemption will be automatically re-enabled if and only if
+ * the seqcount write serialization lock is associated, and preemptible.
*/
#define write_seqcount_end(s) \
do { \
- write_seqcount_t_end(__seqcount_ptr(s)); \
+ do_write_seqcount_end(seqprop_ptr(s)); \
\
- if (__seqcount_lock_preemptible(s)) \
+ if (seqprop_preemptible(s)) \
preempt_enable(); \
} while (0)
-static inline void write_seqcount_t_end(seqcount_t *s)
+static inline void do_write_seqcount_end(seqcount_t *s)
{
seqcount_release(&s->dep_map, _RET_IP_);
- raw_write_seqcount_t_end(s);
+ do_raw_write_seqcount_end(s);
}
/**
@@ -603,9 +608,9 @@ static inline void write_seqcount_t_end(seqcount_t *s)
* }
*/
#define raw_write_seqcount_barrier(s) \
- raw_write_seqcount_t_barrier(__seqcount_ptr(s))
+ do_raw_write_seqcount_barrier(seqprop_ptr(s))
-static inline void raw_write_seqcount_t_barrier(seqcount_t *s)
+static inline void do_raw_write_seqcount_barrier(seqcount_t *s)
{
kcsan_nestable_atomic_begin();
s->sequence++;
@@ -623,9 +628,9 @@ static inline void raw_write_seqcount_t_barrier(seqcount_t *s)
* will complete successfully and see data older than this.
*/
#define write_seqcount_invalidate(s) \
- write_seqcount_t_invalidate(__seqcount_ptr(s))
+ do_write_seqcount_invalidate(seqprop_ptr(s))
-static inline void write_seqcount_t_invalidate(seqcount_t *s)
+static inline void do_write_seqcount_invalidate(seqcount_t *s)
{
smp_wmb();
kcsan_nestable_atomic_begin();
@@ -865,9 +870,9 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
}
/*
- * For all seqlock_t write side functions, use write_seqcount_*t*_begin()
- * instead of the generic write_seqcount_begin(). This way, no redundant
- * lockdep_assert_held() checks are added.
+ * For all seqlock_t write side functions, use the the internal
+ * do_write_seqcount_begin() instead of generic write_seqcount_begin().
+ * This way, no redundant lockdep_assert_held() checks are added.
*/
/**
@@ -886,7 +891,7 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start)
static inline void write_seqlock(seqlock_t *sl)
{
spin_lock(&sl->lock);
- write_seqcount_t_begin(&sl->seqcount.seqcount);
+ do_write_seqcount_begin(&sl->seqcount.seqcount);
}
/**
@@ -898,7 +903,7 @@ static inline void write_seqlock(seqlock_t *sl)
*/
static inline void write_sequnlock(seqlock_t *sl)
{
- write_seqcount_t_end(&sl->seqcount.seqcount);
+ do_write_seqcount_end(&sl->seqcount.seqcount);
spin_unlock(&sl->lock);
}
@@ -912,7 +917,7 @@ static inline void write_sequnlock(seqlock_t *sl)
static inline void write_seqlock_bh(seqlock_t *sl)
{
spin_lock_bh(&sl->lock);
- write_seqcount_t_begin(&sl->seqcount.seqcount);
+ do_write_seqcount_begin(&sl->seqcount.seqcount);
}
/**
@@ -925,7 +930,7 @@ static inline void write_seqlock_bh(seqlock_t *sl)
*/
static inline void write_sequnlock_bh(seqlock_t *sl)
{
- write_seqcount_t_end(&sl->seqcount.seqcount);
+ do_write_seqcount_end(&sl->seqcount.seqcount);
spin_unlock_bh(&sl->lock);
}
@@ -939,7 +944,7 @@ static inline void write_sequnlock_bh(seqlock_t *sl)
static inline void write_seqlock_irq(seqlock_t *sl)
{
spin_lock_irq(&sl->lock);
- write_seqcount_t_begin(&sl->seqcount.seqcount);
+ do_write_seqcount_begin(&sl->seqcount.seqcount);
}
/**
@@ -951,7 +956,7 @@ static inline void write_seqlock_irq(seqlock_t *sl)
*/
static inline void write_sequnlock_irq(seqlock_t *sl)
{
- write_seqcount_t_end(&sl->seqcount.seqcount);
+ do_write_seqcount_end(&sl->seqcount.seqcount);
spin_unlock_irq(&sl->lock);
}
@@ -960,7 +965,7 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl)
unsigned long flags;
spin_lock_irqsave(&sl->lock, flags);
- write_seqcount_t_begin(&sl->seqcount.seqcount);
+ do_write_seqcount_begin(&sl->seqcount.seqcount);
return flags;
}
@@ -989,7 +994,7 @@ static inline unsigned long __write_seqlock_irqsave(seqlock_t *sl)
static inline void
write_sequnlock_irqrestore(seqlock_t *sl, unsigned long flags)
{
- write_seqcount_t_end(&sl->seqcount.seqcount);
+ do_write_seqcount_end(&sl->seqcount.seqcount);
spin_unlock_irqrestore(&sl->lock, flags);
}
diff --git a/include/linux/set_memory.h b/include/linux/set_memory.h
index 860e0f843c12..fe1aa4e54680 100644
--- a/include/linux/set_memory.h
+++ b/include/linux/set_memory.h
@@ -23,6 +23,11 @@ static inline int set_direct_map_default_noflush(struct page *page)
{
return 0;
}
+
+static inline bool kernel_page_present(struct page *page)
+{
+ return true;
+}
#endif
#ifndef set_mce_nospec
diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h
index a5a5d1d4d7b1..d82b6f396588 100644
--- a/include/linux/shmem_fs.h
+++ b/include/linux/shmem_fs.h
@@ -67,7 +67,11 @@ extern unsigned long shmem_get_unmapped_area(struct file *, unsigned long addr,
unsigned long len, unsigned long pgoff, unsigned long flags);
extern int shmem_lock(struct file *file, int lock, struct user_struct *user);
#ifdef CONFIG_SHMEM
-extern bool shmem_mapping(struct address_space *mapping);
+extern const struct address_space_operations shmem_aops;
+static inline bool shmem_mapping(struct address_space *mapping)
+{
+ return mapping->a_ops == &shmem_aops;
+}
#else
static inline bool shmem_mapping(struct address_space *mapping)
{
diff --git a/include/linux/signal.h b/include/linux/signal.h
index b256f9c65661..205526c4003a 100644
--- a/include/linux/signal.h
+++ b/include/linux/signal.h
@@ -469,4 +469,18 @@ struct seq_file;
extern void render_sigset_t(struct seq_file *, const char *, sigset_t *);
#endif
+#ifndef arch_untagged_si_addr
+/*
+ * Given a fault address and a signal and si_code which correspond to the
+ * _sigfault union member, returns the address that must appear in si_addr if
+ * the signal handler does not have SA_EXPOSE_TAGBITS enabled in sa_flags.
+ */
+static inline void __user *arch_untagged_si_addr(void __user *addr,
+ unsigned long sig,
+ unsigned long si_code)
+{
+ return addr;
+}
+#endif
+
#endif /* _LINUX_SIGNAL_H */
diff --git a/include/linux/signal_types.h b/include/linux/signal_types.h
index f8a90ae9c6ec..68e06c75c5b2 100644
--- a/include/linux/signal_types.h
+++ b/include/linux/signal_types.h
@@ -68,4 +68,16 @@ struct ksignal {
int sig;
};
+#ifndef __ARCH_UAPI_SA_FLAGS
+#ifdef SA_RESTORER
+#define __ARCH_UAPI_SA_FLAGS SA_RESTORER
+#else
+#define __ARCH_UAPI_SA_FLAGS 0
+#endif
+#endif
+
+#define UAPI_SA_FLAGS \
+ (SA_NOCLDSTOP | SA_NOCLDWAIT | SA_SIGINFO | SA_ONSTACK | SA_RESTART | \
+ SA_NODEFER | SA_RESETHAND | SA_EXPOSE_TAGBITS | __ARCH_UAPI_SA_FLAGS)
+
#endif /* _LINUX_SIGNAL_TYPES_H */
diff --git a/include/linux/slab.h b/include/linux/slab.h
index dd6897f62010..be4ba5867ac5 100644
--- a/include/linux/slab.h
+++ b/include/linux/slab.h
@@ -593,6 +593,24 @@ static inline void *kmalloc_array(size_t n, size_t size, gfp_t flags)
}
/**
+ * krealloc_array - reallocate memory for an array.
+ * @p: pointer to the memory chunk to reallocate
+ * @new_n: new number of elements to alloc
+ * @new_size: new size of a single member of the array
+ * @flags: the type of memory to allocate (see kmalloc)
+ */
+static __must_check inline void *
+krealloc_array(void *p, size_t new_n, size_t new_size, gfp_t flags)
+{
+ size_t bytes;
+
+ if (unlikely(check_mul_overflow(new_n, new_size, &bytes)))
+ return NULL;
+
+ return krealloc(p, bytes, flags);
+}
+
+/**
* kcalloc - allocate memory for an array. The memory is set to zero.
* @n: number of elements.
* @size: element size.
diff --git a/include/linux/smp.h b/include/linux/smp.h
index 9f13966d3d92..70c6f6284dcf 100644
--- a/include/linux/smp.h
+++ b/include/linux/smp.h
@@ -21,24 +21,23 @@ typedef bool (*smp_cond_func_t)(int cpu, void *info);
* structure shares (partial) layout with struct irq_work
*/
struct __call_single_data {
- union {
- struct __call_single_node node;
- struct {
- struct llist_node llist;
- unsigned int flags;
-#ifdef CONFIG_64BIT
- u16 src, dst;
-#endif
- };
- };
+ struct __call_single_node node;
smp_call_func_t func;
void *info;
};
+#define CSD_INIT(_func, _info) \
+ (struct __call_single_data){ .func = (_func), .info = (_info), }
+
/* Use __aligned() to avoid to use 2 cache lines for 1 csd */
typedef struct __call_single_data call_single_data_t
__aligned(sizeof(struct __call_single_data));
+#define INIT_CSD(_csd, _func, _info) \
+do { \
+ *(_csd) = CSD_INIT((_func), (_info)); \
+} while (0)
+
/*
* Enqueue a llist_node on the call_single_queue; be very careful, read
* flush_smp_call_function_queue() in detail.
diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h
index 76d8b09384a7..30577c3aecf8 100644
--- a/include/linux/stop_machine.h
+++ b/include/linux/stop_machine.h
@@ -24,6 +24,7 @@ typedef int (*cpu_stop_fn_t)(void *arg);
struct cpu_stop_work {
struct list_head list; /* cpu_stopper->works */
cpu_stop_fn_t fn;
+ unsigned long caller;
void *arg;
struct cpu_stop_done *done;
};
@@ -36,6 +37,8 @@ void stop_machine_park(int cpu);
void stop_machine_unpark(int cpu);
void stop_machine_yield(const struct cpumask *cpumask);
+extern void print_stop_info(const char *log_lvl, struct task_struct *task);
+
#else /* CONFIG_SMP */
#include <linux/workqueue.h>
@@ -80,6 +83,8 @@ static inline bool stop_one_cpu_nowait(unsigned int cpu,
return false;
}
+static inline void print_stop_info(const char *log_lvl, struct task_struct *task) { }
+
#endif /* CONFIG_SMP */
/*
diff --git a/include/linux/syscall_user_dispatch.h b/include/linux/syscall_user_dispatch.h
new file mode 100644
index 000000000000..a0ae443fb7df
--- /dev/null
+++ b/include/linux/syscall_user_dispatch.h
@@ -0,0 +1,40 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) 2020 Collabora Ltd.
+ */
+#ifndef _SYSCALL_USER_DISPATCH_H
+#define _SYSCALL_USER_DISPATCH_H
+
+#include <linux/thread_info.h>
+
+#ifdef CONFIG_GENERIC_ENTRY
+
+struct syscall_user_dispatch {
+ char __user *selector;
+ unsigned long offset;
+ unsigned long len;
+ bool on_dispatch;
+};
+
+int set_syscall_user_dispatch(unsigned long mode, unsigned long offset,
+ unsigned long len, char __user *selector);
+
+#define clear_syscall_work_syscall_user_dispatch(tsk) \
+ clear_task_syscall_work(tsk, SYSCALL_USER_DISPATCH)
+
+#else
+struct syscall_user_dispatch {};
+
+static inline int set_syscall_user_dispatch(unsigned long mode, unsigned long offset,
+ unsigned long len, char __user *selector)
+{
+ return -EINVAL;
+}
+
+static inline void clear_syscall_work_syscall_user_dispatch(struct task_struct *tsk)
+{
+}
+
+#endif /* CONFIG_GENERIC_ENTRY */
+
+#endif /* _SYSCALL_USER_DISPATCH_H */
diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h
index e93e249a4e9b..c8a974cead73 100644
--- a/include/linux/thread_info.h
+++ b/include/linux/thread_info.h
@@ -35,6 +35,24 @@ enum {
GOOD_STACK,
};
+#ifdef CONFIG_GENERIC_ENTRY
+enum syscall_work_bit {
+ SYSCALL_WORK_BIT_SECCOMP,
+ SYSCALL_WORK_BIT_SYSCALL_TRACEPOINT,
+ SYSCALL_WORK_BIT_SYSCALL_TRACE,
+ SYSCALL_WORK_BIT_SYSCALL_EMU,
+ SYSCALL_WORK_BIT_SYSCALL_AUDIT,
+ SYSCALL_WORK_BIT_SYSCALL_USER_DISPATCH,
+};
+
+#define SYSCALL_WORK_SECCOMP BIT(SYSCALL_WORK_BIT_SECCOMP)
+#define SYSCALL_WORK_SYSCALL_TRACEPOINT BIT(SYSCALL_WORK_BIT_SYSCALL_TRACEPOINT)
+#define SYSCALL_WORK_SYSCALL_TRACE BIT(SYSCALL_WORK_BIT_SYSCALL_TRACE)
+#define SYSCALL_WORK_SYSCALL_EMU BIT(SYSCALL_WORK_BIT_SYSCALL_EMU)
+#define SYSCALL_WORK_SYSCALL_AUDIT BIT(SYSCALL_WORK_BIT_SYSCALL_AUDIT)
+#define SYSCALL_WORK_SYSCALL_USER_DISPATCH BIT(SYSCALL_WORK_BIT_SYSCALL_USER_DISPATCH)
+#endif
+
#include <asm/thread_info.h>
#ifdef __KERNEL__
@@ -97,6 +115,38 @@ static inline int test_ti_thread_flag(struct thread_info *ti, int flag)
#define test_thread_flag(flag) \
test_ti_thread_flag(current_thread_info(), flag)
+#ifdef CONFIG_GENERIC_ENTRY
+#define set_syscall_work(fl) \
+ set_bit(SYSCALL_WORK_BIT_##fl, &current_thread_info()->syscall_work)
+#define test_syscall_work(fl) \
+ test_bit(SYSCALL_WORK_BIT_##fl, &current_thread_info()->syscall_work)
+#define clear_syscall_work(fl) \
+ clear_bit(SYSCALL_WORK_BIT_##fl, &current_thread_info()->syscall_work)
+
+#define set_task_syscall_work(t, fl) \
+ set_bit(SYSCALL_WORK_BIT_##fl, &task_thread_info(t)->syscall_work)
+#define test_task_syscall_work(t, fl) \
+ test_bit(SYSCALL_WORK_BIT_##fl, &task_thread_info(t)->syscall_work)
+#define clear_task_syscall_work(t, fl) \
+ clear_bit(SYSCALL_WORK_BIT_##fl, &task_thread_info(t)->syscall_work)
+
+#else /* CONFIG_GENERIC_ENTRY */
+
+#define set_syscall_work(fl) \
+ set_ti_thread_flag(current_thread_info(), TIF_##fl)
+#define test_syscall_work(fl) \
+ test_ti_thread_flag(current_thread_info(), TIF_##fl)
+#define clear_syscall_work(fl) \
+ clear_ti_thread_flag(current_thread_info(), TIF_##fl)
+
+#define set_task_syscall_work(t, fl) \
+ set_ti_thread_flag(task_thread_info(t), TIF_##fl)
+#define test_task_syscall_work(t, fl) \
+ test_ti_thread_flag(task_thread_info(t), TIF_##fl)
+#define clear_task_syscall_work(t, fl) \
+ clear_ti_thread_flag(task_thread_info(t), TIF_##fl)
+#endif /* !CONFIG_GENERIC_ENTRY */
+
#define tif_need_resched() test_thread_flag(TIF_NEED_RESCHED)
#ifndef CONFIG_HAVE_ARCH_WITHIN_STACK_FRAMES
diff --git a/include/linux/time_namespace.h b/include/linux/time_namespace.h
index 5b6031385db0..3146f1c056c9 100644
--- a/include/linux/time_namespace.h
+++ b/include/linux/time_namespace.h
@@ -4,7 +4,6 @@
#include <linux/sched.h>
-#include <linux/kref.h>
#include <linux/nsproxy.h>
#include <linux/ns_common.h>
#include <linux/err.h>
@@ -18,7 +17,6 @@ struct timens_offsets {
};
struct time_namespace {
- struct kref kref;
struct user_namespace *user_ns;
struct ucounts *ucounts;
struct ns_common ns;
@@ -37,20 +35,21 @@ extern void timens_commit(struct task_struct *tsk, struct time_namespace *ns);
static inline struct time_namespace *get_time_ns(struct time_namespace *ns)
{
- kref_get(&ns->kref);
+ refcount_inc(&ns->ns.count);
return ns;
}
struct time_namespace *copy_time_ns(unsigned long flags,
struct user_namespace *user_ns,
struct time_namespace *old_ns);
-void free_time_ns(struct kref *kref);
-int timens_on_fork(struct nsproxy *nsproxy, struct task_struct *tsk);
+void free_time_ns(struct time_namespace *ns);
+void timens_on_fork(struct nsproxy *nsproxy, struct task_struct *tsk);
struct vdso_data *arch_get_vdso_data(void *vvar_page);
static inline void put_time_ns(struct time_namespace *ns)
{
- kref_put(&ns->kref, free_time_ns);
+ if (refcount_dec_and_test(&ns->ns.count))
+ free_time_ns(ns);
}
void proc_timens_show_offsets(struct task_struct *p, struct seq_file *m);
@@ -77,6 +76,20 @@ static inline void timens_add_boottime(struct timespec64 *ts)
*ts = timespec64_add(*ts, ns_offsets->boottime);
}
+static inline u64 timens_add_boottime_ns(u64 nsec)
+{
+ struct timens_offsets *ns_offsets = &current->nsproxy->time_ns->offsets;
+
+ return nsec + timespec64_to_ns(&ns_offsets->boottime);
+}
+
+static inline void timens_sub_boottime(struct timespec64 *ts)
+{
+ struct timens_offsets *ns_offsets = &current->nsproxy->time_ns->offsets;
+
+ *ts = timespec64_sub(*ts, ns_offsets->boottime);
+}
+
ktime_t do_timens_ktime_to_host(clockid_t clockid, ktime_t tim,
struct timens_offsets *offsets);
@@ -122,14 +135,22 @@ struct time_namespace *copy_time_ns(unsigned long flags,
return old_ns;
}
-static inline int timens_on_fork(struct nsproxy *nsproxy,
+static inline void timens_on_fork(struct nsproxy *nsproxy,
struct task_struct *tsk)
{
- return 0;
+ return;
}
static inline void timens_add_monotonic(struct timespec64 *ts) { }
static inline void timens_add_boottime(struct timespec64 *ts) { }
+
+static inline u64 timens_add_boottime_ns(u64 nsec)
+{
+ return nsec;
+}
+
+static inline void timens_sub_boottime(struct timespec64 *ts) { }
+
static inline ktime_t timens_ktime_to_host(clockid_t clockid, ktime_t tim)
{
return tim;
diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h
index 7f7e4a3f4394..929d3f3937c0 100644
--- a/include/linux/timekeeping.h
+++ b/include/linux/timekeeping.h
@@ -303,6 +303,8 @@ extern int persistent_clock_is_local;
extern void read_persistent_clock64(struct timespec64 *ts);
void read_persistent_wall_and_boot_offset(struct timespec64 *wall_clock,
struct timespec64 *boot_offset);
+#ifdef CONFIG_GENERIC_CMOS_UPDATE
extern int update_persistent_clock64(struct timespec64 now);
+#endif
#endif
diff --git a/include/linux/timer.h b/include/linux/timer.h
index d10bc7e73b41..fda13c9d1256 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -193,7 +193,6 @@ extern int try_to_del_timer_sync(struct timer_list *timer);
#define del_singleshot_timer_sync(t) del_timer_sync(t)
extern void init_timers(void);
-extern void run_local_timers(void);
struct hrtimer;
extern enum hrtimer_restart it_real_fn(struct hrtimer *);
diff --git a/include/linux/timex.h b/include/linux/timex.h
index ce0859763670..9c2e54faf9b7 100644
--- a/include/linux/timex.h
+++ b/include/linux/timex.h
@@ -157,7 +157,6 @@ extern int do_clock_adjtime(const clockid_t which_clock, struct __kernel_timex *
extern void hardpps(const struct timespec64 *, const struct timespec64 *);
int read_current_timer(unsigned long *timer_val);
-void ntp_notify_cmos_timer(void);
/* The clock frequency of the i8253/i8254 PIT */
#define PIT_TICK_RATE 1193182ul
diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h
index b480e1a07ed8..54b925224a13 100644
--- a/include/linux/tracehook.h
+++ b/include/linux/tracehook.h
@@ -83,11 +83,12 @@ static inline int ptrace_report_syscall(struct pt_regs *regs,
* tracehook_report_syscall_entry - task is about to attempt a system call
* @regs: user register state of current task
*
- * This will be called if %TIF_SYSCALL_TRACE or %TIF_SYSCALL_EMU have been set,
- * when the current task has just entered the kernel for a system call.
- * Full user register state is available here. Changing the values
- * in @regs can affect the system call number and arguments to be tried.
- * It is safe to block here, preventing the system call from beginning.
+ * This will be called if %SYSCALL_WORK_SYSCALL_TRACE or
+ * %SYSCALL_WORK_SYSCALL_EMU have been set, when the current task has just
+ * entered the kernel for a system call. Full user register state is
+ * available here. Changing the values in @regs can affect the system
+ * call number and arguments to be tried. It is safe to block here,
+ * preventing the system call from beginning.
*
* Returns zero normally, or nonzero if the calling arch code should abort
* the system call. That must prevent normal entry so no system call is
@@ -109,15 +110,15 @@ static inline __must_check int tracehook_report_syscall_entry(
* @regs: user register state of current task
* @step: nonzero if simulating single-step or block-step
*
- * This will be called if %TIF_SYSCALL_TRACE has been set, when the
- * current task has just finished an attempted system call. Full
+ * This will be called if %SYSCALL_WORK_SYSCALL_TRACE has been set, when
+ * the current task has just finished an attempted system call. Full
* user register state is available here. It is safe to block here,
* preventing signals from being processed.
*
* If @step is nonzero, this report is also in lieu of the normal
* trap that would follow the system call instruction because
* user_enable_block_step() or user_enable_single_step() was used.
- * In this case, %TIF_SYSCALL_TRACE might not be set.
+ * In this case, %SYSCALL_WORK_SYSCALL_TRACE might not be set.
*
* Called without locks, just before checking for pending signals.
*/
@@ -198,4 +199,31 @@ static inline void tracehook_notify_resume(struct pt_regs *regs)
blkcg_maybe_throttle_current();
}
+/*
+ * called by exit_to_user_mode_loop() if ti_work & _TIF_NOTIFY_SIGNAL. This
+ * is currently used by TWA_SIGNAL based task_work, which requires breaking
+ * wait loops to ensure that task_work is noticed and run.
+ */
+static inline void tracehook_notify_signal(void)
+{
+#if defined(TIF_NOTIFY_SIGNAL)
+ clear_thread_flag(TIF_NOTIFY_SIGNAL);
+ smp_mb__after_atomic();
+ if (current->task_works)
+ task_work_run();
+#endif
+}
+
+/*
+ * Called when we have work to process from exit_to_user_mode_loop()
+ */
+static inline void set_notify_signal(struct task_struct *task)
+{
+#if defined(TIF_NOTIFY_SIGNAL)
+ if (!test_and_set_tsk_thread_flag(task, TIF_NOTIFY_SIGNAL) &&
+ !wake_up_state(task, TASK_INTERRUPTIBLE))
+ kick_process(task);
+#endif
+}
+
#endif /* <linux/tracehook.h> */
diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h
index 6ef1c7109fc4..64cf8ebdc4ec 100644
--- a/include/linux/user_namespace.h
+++ b/include/linux/user_namespace.h
@@ -57,7 +57,6 @@ struct user_namespace {
struct uid_gid_map uid_map;
struct uid_gid_map gid_map;
struct uid_gid_map projid_map;
- atomic_t count;
struct user_namespace *parent;
int level;
kuid_t owner;
@@ -109,7 +108,7 @@ void dec_ucount(struct ucounts *ucounts, enum ucount_type type);
static inline struct user_namespace *get_user_ns(struct user_namespace *ns)
{
if (ns)
- atomic_inc(&ns->count);
+ refcount_inc(&ns->ns.count);
return ns;
}
@@ -119,7 +118,7 @@ extern void __put_user_ns(struct user_namespace *ns);
static inline void put_user_ns(struct user_namespace *ns)
{
- if (ns && atomic_dec_and_test(&ns->count))
+ if (ns && refcount_dec_and_test(&ns->ns.count))
__put_user_ns(ns);
}
diff --git a/include/linux/utsname.h b/include/linux/utsname.h
index 44429d9142ca..2b1737c9b244 100644
--- a/include/linux/utsname.h
+++ b/include/linux/utsname.h
@@ -4,7 +4,6 @@
#include <linux/sched.h>
-#include <linux/kref.h>
#include <linux/nsproxy.h>
#include <linux/ns_common.h>
#include <linux/err.h>
@@ -22,7 +21,6 @@ struct user_namespace;
extern struct user_namespace init_user_ns;
struct uts_namespace {
- struct kref kref;
struct new_utsname name;
struct user_namespace *user_ns;
struct ucounts *ucounts;
@@ -33,16 +31,17 @@ extern struct uts_namespace init_uts_ns;
#ifdef CONFIG_UTS_NS
static inline void get_uts_ns(struct uts_namespace *ns)
{
- kref_get(&ns->kref);
+ refcount_inc(&ns->ns.count);
}
extern struct uts_namespace *copy_utsname(unsigned long flags,
struct user_namespace *user_ns, struct uts_namespace *old_ns);
-extern void free_uts_ns(struct kref *kref);
+extern void free_uts_ns(struct uts_namespace *ns);
static inline void put_uts_ns(struct uts_namespace *ns)
{
- kref_put(&ns->kref, free_uts_ns);
+ if (refcount_dec_and_test(&ns->ns.count))
+ free_uts_ns(ns);
}
void uts_ns_init(void);
diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h
index 938eaf9517e2..80c0181c411d 100644
--- a/include/linux/vmalloc.h
+++ b/include/linux/vmalloc.h
@@ -72,16 +72,14 @@ struct vmap_area {
struct list_head list; /* address sorted list */
/*
- * The following three variables can be packed, because
- * a vmap_area object is always one of the three states:
+ * The following two variables can be packed, because
+ * a vmap_area object can be either:
* 1) in "free" tree (root is vmap_area_root)
- * 2) in "busy" tree (root is free_vmap_area_root)
- * 3) in purge list (head is vmap_purge_list)
+ * 2) or "busy" tree (root is free_vmap_area_root)
*/
union {
unsigned long subtree_max_size; /* in "free" tree */
struct vm_struct *vm; /* in "busy" tree */
- struct llist_node purge_list; /* in purge list */
};
};
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 322dcbfcc933..773135fc6e19 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -450,4 +450,108 @@ static inline const char *vm_event_name(enum vm_event_item item)
}
#endif /* CONFIG_VM_EVENT_COUNTERS || CONFIG_MEMCG */
+#ifdef CONFIG_MEMCG
+
+void __mod_lruvec_state(struct lruvec *lruvec, enum node_stat_item idx,
+ int val);
+
+static inline void mod_lruvec_state(struct lruvec *lruvec,
+ enum node_stat_item idx, int val)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ __mod_lruvec_state(lruvec, idx, val);
+ local_irq_restore(flags);
+}
+
+void __mod_lruvec_page_state(struct page *page,
+ enum node_stat_item idx, int val);
+
+static inline void mod_lruvec_page_state(struct page *page,
+ enum node_stat_item idx, int val)
+{
+ unsigned long flags;
+
+ local_irq_save(flags);
+ __mod_lruvec_page_state(page, idx, val);
+ local_irq_restore(flags);
+}
+
+#else
+
+static inline void __mod_lruvec_state(struct lruvec *lruvec,
+ enum node_stat_item idx, int val)
+{
+ __mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
+}
+
+static inline void mod_lruvec_state(struct lruvec *lruvec,
+ enum node_stat_item idx, int val)
+{
+ mod_node_page_state(lruvec_pgdat(lruvec), idx, val);
+}
+
+static inline void __mod_lruvec_page_state(struct page *page,
+ enum node_stat_item idx, int val)
+{
+ __mod_node_page_state(page_pgdat(page), idx, val);
+}
+
+static inline void mod_lruvec_page_state(struct page *page,
+ enum node_stat_item idx, int val)
+{
+ mod_node_page_state(page_pgdat(page), idx, val);
+}
+
+#endif /* CONFIG_MEMCG */
+
+static inline void __inc_lruvec_state(struct lruvec *lruvec,
+ enum node_stat_item idx)
+{
+ __mod_lruvec_state(lruvec, idx, 1);
+}
+
+static inline void __dec_lruvec_state(struct lruvec *lruvec,
+ enum node_stat_item idx)
+{
+ __mod_lruvec_state(lruvec, idx, -1);
+}
+
+static inline void __inc_lruvec_page_state(struct page *page,
+ enum node_stat_item idx)
+{
+ __mod_lruvec_page_state(page, idx, 1);
+}
+
+static inline void __dec_lruvec_page_state(struct page *page,
+ enum node_stat_item idx)
+{
+ __mod_lruvec_page_state(page, idx, -1);
+}
+
+static inline void inc_lruvec_state(struct lruvec *lruvec,
+ enum node_stat_item idx)
+{
+ mod_lruvec_state(lruvec, idx, 1);
+}
+
+static inline void dec_lruvec_state(struct lruvec *lruvec,
+ enum node_stat_item idx)
+{
+ mod_lruvec_state(lruvec, idx, -1);
+}
+
+static inline void inc_lruvec_page_state(struct page *page,
+ enum node_stat_item idx)
+{
+ mod_lruvec_page_state(page, idx, 1);
+}
+
+static inline void dec_lruvec_page_state(struct page *page,
+ enum node_stat_item idx)
+{
+ mod_lruvec_page_state(page, idx, -1);
+}
+
#endif /* _LINUX_VMSTAT_H */