13 files changed, 595 insertions, 68 deletions
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 7937265ef879..5ea0c1773b0a 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1335,6 +1335,7 @@ config LOCKDEP_SMALL
 config DEBUG_LOCKDEP
 	bool "Lock dependency engine debugging"
 	depends on DEBUG_KERNEL && LOCKDEP
+	select DEBUG_IRQFLAGS
 	help
 	  If you say Y here, the lock dependency engine will do
 	  additional runtime checks to debug itself, at the price
@@ -1423,6 +1424,13 @@ config TRACE_IRQFLAGS_NMI
 	depends on TRACE_IRQFLAGS
 	depends on TRACE_IRQFLAGS_NMI_SUPPORT
 
+config DEBUG_IRQFLAGS
+	bool "Debug IRQ flag manipulation"
+	help
+	  Enables checks for potentially unsafe enabling or disabling of
+	  interrupts, such as calling raw_local_irq_restore() when interrupts
+	  are enabled.
+
 config STACKTRACE
 	bool "Stack backtrace support"
 	depends on STACKTRACE_SUPPORT
diff --git a/lib/Makefile b/lib/Makefile
index afeff05fa8c5..fb7d946bb8c3 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -27,16 +27,14 @@ KASAN_SANITIZE_string.o := n
 CFLAGS_string.o += -fno-stack-protector
 endif
 
-# Used by KCSAN while enabled, avoid recursion.
-KCSAN_SANITIZE_random32.o := n
-
 lib-y := ctype.o string.o vsprintf.o cmdline.o \
 	 rbtree.o radix-tree.o timerqueue.o xarray.o \
 	 idr.o extable.o sha1.o irq_regs.o argv_split.o \
 	 flex_proportions.o ratelimit.o show_mem.o \
 	 is_single_threaded.o plist.o decompress.o kobject_uevent.o \
 	 earlycpio.o seq_buf.o siphash.o dec_and_lock.o \
-	 nmi_backtrace.o nodemask.o win_minmax.o memcat_p.o
+	 nmi_backtrace.o nodemask.o win_minmax.o memcat_p.o \
+	 buildid.o
 
 lib-$(CONFIG_PRINTK) += dump_stack.o
 lib-$(CONFIG_SMP) += cpumask.o
diff --git a/lib/buildid.c b/lib/buildid.c
new file mode 100644
index 000000000000..6156997c3895
--- /dev/null
+++ b/lib/buildid.c
@@ -0,0 +1,149 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/buildid.h>
+#include <linux/elf.h>
+#include <linux/pagemap.h>
+
+#define BUILD_ID 3
+/*
+ * Parse build id from the note segment. This logic can be shared between
+ * 32-bit and 64-bit system, because Elf32_Nhdr and Elf64_Nhdr are
+ * identical.
+ */
+static inline int parse_build_id(void *page_addr,
+				 unsigned char *build_id,
+				 __u32 *size,
+				 void *note_start,
+				 Elf32_Word note_size)
+{
+	Elf32_Word note_offs = 0, new_offs;
+
+	/* check for overflow */
+	if (note_start < page_addr || note_start + note_size < note_start)
+		return -EINVAL;
+
+	/* only supports note that fits in the first page */
+	if (note_start + note_size > page_addr + PAGE_SIZE)
+		return -EINVAL;
+
+	while (note_offs + sizeof(Elf32_Nhdr) < note_size) {
+		Elf32_Nhdr *nhdr = (Elf32_Nhdr *)(note_start + note_offs);
+
+		if (nhdr->n_type == BUILD_ID &&
+		    nhdr->n_namesz == sizeof("GNU") &&
+		    nhdr->n_descsz > 0 &&
+		    nhdr->n_descsz <= BUILD_ID_SIZE_MAX) {
+			memcpy(build_id,
+			       note_start + note_offs +
+			       ALIGN(sizeof("GNU"), 4) + sizeof(Elf32_Nhdr),
+			       nhdr->n_descsz);
+			memset(build_id + nhdr->n_descsz, 0,
+			       BUILD_ID_SIZE_MAX - nhdr->n_descsz);
+			if (size)
+				*size = nhdr->n_descsz;
+			return 0;
+		}
+		new_offs = note_offs + sizeof(Elf32_Nhdr) +
+			ALIGN(nhdr->n_namesz, 4) + ALIGN(nhdr->n_descsz, 4);
+		if (new_offs <= note_offs)  /* overflow */
+			break;
+		note_offs = new_offs;
+	}
+	return -EINVAL;
+}
+
+/* Parse build ID from 32-bit ELF */
+static int get_build_id_32(void *page_addr, unsigned char *build_id,
+			   __u32 *size)
+{
+	Elf32_Ehdr *ehdr = (Elf32_Ehdr *)page_addr;
+	Elf32_Phdr *phdr;
+	int i;
+
+	/* only supports phdr that fits in one page */
+	if (ehdr->e_phnum >
+	    (PAGE_SIZE - sizeof(Elf32_Ehdr)) / sizeof(Elf32_Phdr))
+		return -EINVAL;
+
+	phdr = (Elf32_Phdr *)(page_addr + sizeof(Elf32_Ehdr));
+
+	for (i = 0; i < ehdr->e_phnum; ++i) {
+		if (phdr[i].p_type == PT_NOTE &&
+		    !parse_build_id(page_addr, build_id, size,
+				    page_addr + phdr[i].p_offset,
+				    phdr[i].p_filesz))
+			return 0;
+	}
+	return -EINVAL;
+}
+
+/* Parse build ID from 64-bit ELF */
+static int get_build_id_64(void *page_addr, unsigned char *build_id,
+			   __u32 *size)
+{
+	Elf64_Ehdr *ehdr = (Elf64_Ehdr *)page_addr;
+	Elf64_Phdr *phdr;
+	int i;
+
+	/* only supports phdr that fits in one page */
+	if (ehdr->e_phnum >
+	    (PAGE_SIZE - sizeof(Elf64_Ehdr)) / sizeof(Elf64_Phdr))
+		return -EINVAL;
+
+	phdr = (Elf64_Phdr *)(page_addr + sizeof(Elf64_Ehdr));
+
+	for (i = 0; i < ehdr->e_phnum; ++i) {
+		if (phdr[i].p_type == PT_NOTE &&
+		    !parse_build_id(page_addr, build_id, size,
+				    page_addr + phdr[i].p_offset,
+				    phdr[i].p_filesz))
+			return 0;
+	}
+	return -EINVAL;
+}
+
+/*
+ * Parse build ID of ELF file mapped to vma
+ * @vma:      vma object
+ * @build_id: buffer to store build id, at least BUILD_ID_SIZE long
+ * @size:     returns actual build id size in case of success
+ *
+ * Returns 0 on success, otherwise error (< 0).
+ */
+int build_id_parse(struct vm_area_struct *vma, unsigned char *build_id,
+		   __u32 *size)
+{
+	Elf32_Ehdr *ehdr;
+	struct page *page;
+	void *page_addr;
+	int ret;
+
+	/* only works for page backed storage  */
+	if (!vma->vm_file)
+		return -EINVAL;
+
+	page = find_get_page(vma->vm_file->f_mapping, 0);
+	if (!page)
+		return -EFAULT;	/* page not mapped */
+
+	ret = -EINVAL;
+	page_addr = kmap_atomic(page);
+	ehdr = (Elf32_Ehdr *)page_addr;
+
+	/* compare magic x7f "ELF" */
+	if (memcmp(ehdr->e_ident, ELFMAG, SELFMAG) != 0)
+		goto out;
+
+	/* only support executable file and shared object file */
+	if (ehdr->e_type != ET_EXEC && ehdr->e_type != ET_DYN)
+		goto out;
+
+	if (ehdr->e_ident[EI_CLASS] == ELFCLASS32)
+		ret = get_build_id_32(page_addr, build_id, size);
+	else if (ehdr->e_ident[EI_CLASS] == ELFCLASS64)
+		ret = get_build_id_64(page_addr, build_id, size);
+out:
+	kunmap_atomic(page_addr);
+	put_page(page);
+	return ret;
+}
diff --git a/lib/cpumask.c b/lib/cpumask.c
index 35924025097b..c3c76b833384 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -6,7 +6,6 @@
 #include <linux/export.h>
 #include <linux/memblock.h>
 #include <linux/numa.h>
-#include <linux/sched/isolation.h>
 
 /**
  * cpumask_next - get the next cpu in a cpumask
@@ -206,27 +205,22 @@ void __init free_bootmem_cpumask_var(cpumask_var_t mask)
  */
 unsigned int cpumask_local_spread(unsigned int i, int node)
 {
-	int cpu, hk_flags;
-	const struct cpumask *mask;
+	int cpu;
 
-	hk_flags = HK_FLAG_DOMAIN | HK_FLAG_MANAGED_IRQ;
-	mask = housekeeping_cpumask(hk_flags);
 	/* Wrap: we always want a cpu. */
-	i %= cpumask_weight(mask);
+	i %= num_online_cpus();
 
 	if (node == NUMA_NO_NODE) {
-		for_each_cpu(cpu, mask) {
+		for_each_cpu(cpu, cpu_online_mask)
 			if (i-- == 0)
 				return cpu;
-		}
 	} else {
 		/* NUMA first. */
-		for_each_cpu_and(cpu, cpumask_of_node(node), mask) {
+		for_each_cpu_and(cpu, cpumask_of_node(node), cpu_online_mask)
 			if (i-- == 0)
 				return cpu;
-		}
 
-		for_each_cpu(cpu, mask) {
+		for_each_cpu(cpu, cpu_online_mask) {
 			/* Skip NUMA nodes, done above. */
 			if (cpumask_test_cpu(cpu, cpumask_of_node(node)))
 				continue;
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index a21e6a5792c5..d8ca336ff9cd 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -72,8 +72,6 @@
 	__start.bi_bvec_done = skip;			\
 	__start.bi_idx = 0;				\
 	for_each_bvec(__v, i->bvec, __bi, __start) {	\
-		if (!__v.bv_len)			\
-			continue;			\
 		(void)(STEP);				\
 	}						\
 }
@@ -592,14 +590,15 @@ static __wsum csum_and_memcpy(void *to, const void *from, size_t len,
 }
 
 static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes,
-				__wsum *csum, struct iov_iter *i)
+					 struct csum_state *csstate,
+					 struct iov_iter *i)
 {
 	struct pipe_inode_info *pipe = i->pipe;
 	unsigned int p_mask = pipe->ring_size - 1;
+	__wsum sum = csstate->csum;
+	size_t off = csstate->off;
 	unsigned int i_head;
 	size_t n, r;
-	size_t off = 0;
-	__wsum sum = *csum;
 
 	if (!sanity(i))
 		return 0;
@@ -621,7 +620,8 @@ static size_t csum_and_copy_to_pipe_iter(const void *addr, size_t bytes,
 		i_head++;
 	} while (n);
 	i->count -= bytes;
-	*csum = sum;
+	csstate->csum = sum;
+	csstate->off = off;
 	return bytes;
 }
 
@@ -1067,6 +1067,21 @@ static void pipe_advance(struct iov_iter *i, size_t size)
 	pipe_truncate(i);
 }
 
+static void iov_iter_bvec_advance(struct iov_iter *i, size_t size)
+{
+	struct bvec_iter bi;
+
+	bi.bi_size = i->count;
+	bi.bi_bvec_done = i->iov_offset;
+	bi.bi_idx = 0;
+	bvec_iter_advance(i->bvec, &bi, size);
+
+	i->bvec += bi.bi_idx;
+	i->nr_segs -= bi.bi_idx;
+	i->count = bi.bi_size;
+	i->iov_offset = bi.bi_bvec_done;
+}
+
 void iov_iter_advance(struct iov_iter *i, size_t size)
 {
 	if (unlikely(iov_iter_is_pipe(i))) {
@@ -1077,6 +1092,10 @@ void iov_iter_advance(struct iov_iter *i, size_t size)
 		i->count -= size;
 		return;
 	}
+	if (iov_iter_is_bvec(i)) {
+		iov_iter_bvec_advance(i, size);
+		return;
+	}
 	iterate_and_advance(i, size, v, 0, 0, 0)
 }
 EXPORT_SYMBOL(iov_iter_advance);
@@ -1522,18 +1541,19 @@ bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum,
 }
 EXPORT_SYMBOL(csum_and_copy_from_iter_full);
 
-size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csump,
+size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *_csstate,
 			     struct iov_iter *i)
 {
+	struct csum_state *csstate = _csstate;
 	const char *from = addr;
-	__wsum *csum = csump;
 	__wsum sum, next;
-	size_t off = 0;
+	size_t off;
 
 	if (unlikely(iov_iter_is_pipe(i)))
-		return csum_and_copy_to_pipe_iter(addr, bytes, csum, i);
+		return csum_and_copy_to_pipe_iter(addr, bytes, _csstate, i);
 
-	sum = *csum;
+	sum = csstate->csum;
+	off = csstate->off;
 	if (unlikely(iov_iter_is_discard(i))) {
 		WARN_ON(1);	/* for now */
 		return 0;
@@ -1561,7 +1581,8 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csump,
 		off += v.iov_len;
 	})
 	)
-	*csum = sum;
+	csstate->csum = sum;
+	csstate->off = off;
 	return bytes;
 }
 EXPORT_SYMBOL(csum_and_copy_to_iter);
diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
index 9959ea23529e..2d85abac1744 100644
--- a/lib/locking-selftest.c
+++ b/lib/locking-selftest.c
@@ -24,6 +24,7 @@
 #include <linux/debug_locks.h>
 #include <linux/irqflags.h>
 #include <linux/rtmutex.h>
+#include <linux/local_lock.h>
 
 /*
  * Change this to 1 if you want to see the failure printouts:
@@ -51,6 +52,7 @@ __setup("debug_locks_verbose=", setup_debug_locks_verbose);
 #define LOCKTYPE_RWSEM	0x8
 #define LOCKTYPE_WW	0x10
 #define LOCKTYPE_RTMUTEX 0x20
+#define LOCKTYPE_LL	0x40
 
 static struct ww_acquire_ctx t, t2;
 static struct ww_mutex o, o2, o3;
@@ -64,6 +66,9 @@ static DEFINE_SPINLOCK(lock_B);
 static DEFINE_SPINLOCK(lock_C);
 static DEFINE_SPINLOCK(lock_D);
 
+static DEFINE_RAW_SPINLOCK(raw_lock_A);
+static DEFINE_RAW_SPINLOCK(raw_lock_B);
+
 static DEFINE_RWLOCK(rwlock_A);
 static DEFINE_RWLOCK(rwlock_B);
 static DEFINE_RWLOCK(rwlock_C);
@@ -133,6 +138,8 @@ static DEFINE_RT_MUTEX(rtmutex_Z2);
 
 #endif
 
+static local_lock_t local_A = INIT_LOCAL_LOCK(local_A);
+
 /*
  * non-inlined runtime initializers, to let separate locks share
  * the same lock-class:
@@ -1306,19 +1313,23 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion3_soft_wlock)
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 # define I_SPINLOCK(x)	lockdep_reset_lock(&lock_##x.dep_map)
+# define I_RAW_SPINLOCK(x)	lockdep_reset_lock(&raw_lock_##x.dep_map)
 # define I_RWLOCK(x)	lockdep_reset_lock(&rwlock_##x.dep_map)
 # define I_MUTEX(x)	lockdep_reset_lock(&mutex_##x.dep_map)
 # define I_RWSEM(x)	lockdep_reset_lock(&rwsem_##x.dep_map)
 # define I_WW(x)	lockdep_reset_lock(&x.dep_map)
+# define I_LOCAL_LOCK(x) lockdep_reset_lock(&local_##x.dep_map)
 #ifdef CONFIG_RT_MUTEXES
 # define I_RTMUTEX(x)	lockdep_reset_lock(&rtmutex_##x.dep_map)
 #endif
 #else
 # define I_SPINLOCK(x)
+# define I_RAW_SPINLOCK(x)
 # define I_RWLOCK(x)
 # define I_MUTEX(x)
 # define I_RWSEM(x)
 # define I_WW(x)
+# define I_LOCAL_LOCK(x)
 #endif
 
 #ifndef I_RTMUTEX
@@ -1358,9 +1369,16 @@ static void reset_locks(void)
 	I1(A); I1(B); I1(C); I1(D);
 	I1(X1); I1(X2); I1(Y1); I1(Y2); I1(Z1); I1(Z2);
 	I_WW(t); I_WW(t2); I_WW(o.base); I_WW(o2.base); I_WW(o3.base);
+	I_RAW_SPINLOCK(A); I_RAW_SPINLOCK(B);
+	I_LOCAL_LOCK(A);
+
 	lockdep_reset();
+
 	I2(A); I2(B); I2(C); I2(D);
 	init_shared_classes();
+	raw_spin_lock_init(&raw_lock_A);
+	raw_spin_lock_init(&raw_lock_B);
+	local_lock_init(&local_A);
 
 	ww_mutex_init(&o, &ww_lockdep); ww_mutex_init(&o2, &ww_lockdep); ww_mutex_init(&o3, &ww_lockdep);
 	memset(&t, 0, sizeof(t)); memset(&t2, 0, sizeof(t2));
@@ -1382,6 +1400,8 @@ static void dotest(void (*testcase_fn)(void), int expected, int lockclass_mask)
 
 	WARN_ON(irqs_disabled());
 
+	debug_locks_silent = !(debug_locks_verbose & lockclass_mask);
+
 	testcase_fn();
 	/*
 	 * Filter out expected failures:
@@ -1402,7 +1422,7 @@ static void dotest(void (*testcase_fn)(void), int expected, int lockclass_mask)
 	}
 	testcase_total++;
 
-	if (debug_locks_verbose)
+	if (debug_locks_verbose & lockclass_mask)
 		pr_cont(" lockclass mask: %x, debug_locks: %d, expected: %d\n",
 			lockclass_mask, debug_locks, expected);
 	/*
@@ -2419,6 +2439,311 @@ static void fs_reclaim_tests(void)
 	pr_cont("\n");
 }
 
+#define __guard(cleanup) __maybe_unused __attribute__((__cleanup__(cleanup)))
+
+static void hardirq_exit(int *_)
+{
+	HARDIRQ_EXIT();
+}
+
+#define HARDIRQ_CONTEXT(name, ...)					\
+	int hardirq_guard_##name __guard(hardirq_exit);			\
+	HARDIRQ_ENTER();
+
+#define NOTTHREADED_HARDIRQ_CONTEXT(name, ...)				\
+	int notthreaded_hardirq_guard_##name __guard(hardirq_exit);	\
+	local_irq_disable();						\
+	__irq_enter();							\
+	WARN_ON(!in_irq());
+
+static void softirq_exit(int *_)
+{
+	SOFTIRQ_EXIT();
+}
+
+#define SOFTIRQ_CONTEXT(name, ...)				\
+	int softirq_guard_##name __guard(softirq_exit);		\
+	SOFTIRQ_ENTER();
+
+static void rcu_exit(int *_)
+{
+	rcu_read_unlock();
+}
+
+#define RCU_CONTEXT(name, ...)					\
+	int rcu_guard_##name __guard(rcu_exit);			\
+	rcu_read_lock();
+
+static void rcu_bh_exit(int *_)
+{
+	rcu_read_unlock_bh();
+}
+
+#define RCU_BH_CONTEXT(name, ...)				\
+	int rcu_bh_guard_##name __guard(rcu_bh_exit);		\
+	rcu_read_lock_bh();
+
+static void rcu_sched_exit(int *_)
+{
+	rcu_read_unlock_sched();
+}
+
+#define RCU_SCHED_CONTEXT(name, ...)				\
+	int rcu_sched_guard_##name __guard(rcu_sched_exit);	\
+	rcu_read_lock_sched();
+
+static void rcu_callback_exit(int *_)
+{
+	rcu_lock_release(&rcu_callback_map);
+}
+
+#define RCU_CALLBACK_CONTEXT(name, ...)					\
+	int rcu_callback_guard_##name __guard(rcu_callback_exit);	\
+	rcu_lock_acquire(&rcu_callback_map);
+
+
+static void raw_spinlock_exit(raw_spinlock_t **lock)
+{
+	raw_spin_unlock(*lock);
+}
+
+#define RAW_SPINLOCK_CONTEXT(name, lock)						\
+	raw_spinlock_t *raw_spinlock_guard_##name __guard(raw_spinlock_exit) = &(lock);	\
+	raw_spin_lock(&(lock));
+
+static void spinlock_exit(spinlock_t **lock)
+{
+	spin_unlock(*lock);
+}
+
+#define SPINLOCK_CONTEXT(name, lock)						\
+	spinlock_t *spinlock_guard_##name __guard(spinlock_exit) = &(lock);	\
+	spin_lock(&(lock));
+
+static void mutex_exit(struct mutex **lock)
+{
+	mutex_unlock(*lock);
+}
+
+#define MUTEX_CONTEXT(name, lock)					\
+	struct mutex *mutex_guard_##name __guard(mutex_exit) = &(lock);	\
+	mutex_lock(&(lock));
+
+#define GENERATE_2_CONTEXT_TESTCASE(outer, outer_lock, inner, inner_lock)	\
+										\
+static void __maybe_unused inner##_in_##outer(void)				\
+{										\
+	outer##_CONTEXT(_, outer_lock);						\
+	{									\
+		inner##_CONTEXT(_, inner_lock);					\
+	}									\
+}
+
+/*
+ * wait contexts (considering PREEMPT_RT)
+ *
+ * o: inner is allowed in outer
+ * x: inner is disallowed in outer
+ *
+ *       \  inner |  RCU  | RAW_SPIN | SPIN | MUTEX
+ * outer  \       |       |          |      |
+ * ---------------+-------+----------+------+-------
+ * HARDIRQ        |   o   |    o     |  o   |  x
+ * ---------------+-------+----------+------+-------
+ * NOTTHREADED_IRQ|   o   |    o     |  x   |  x
+ * ---------------+-------+----------+------+-------
+ * SOFTIRQ        |   o   |    o     |  o   |  x
+ * ---------------+-------+----------+------+-------
+ * RCU            |   o   |    o     |  o   |  x
+ * ---------------+-------+----------+------+-------
+ * RCU_BH         |   o   |    o     |  o   |  x
+ * ---------------+-------+----------+------+-------
+ * RCU_CALLBACK   |   o   |    o     |  o   |  x
+ * ---------------+-------+----------+------+-------
+ * RCU_SCHED      |   o   |    o     |  x   |  x
+ * ---------------+-------+----------+------+-------
+ * RAW_SPIN       |   o   |    o     |  x   |  x
+ * ---------------+-------+----------+------+-------
+ * SPIN           |   o   |    o     |  o   |  x
+ * ---------------+-------+----------+------+-------
+ * MUTEX          |   o   |    o     |  o   |  o
+ * ---------------+-------+----------+------+-------
+ */
+
+#define GENERATE_2_CONTEXT_TESTCASE_FOR_ALL_OUTER(inner, inner_lock)		\
+GENERATE_2_CONTEXT_TESTCASE(HARDIRQ, , inner, inner_lock)			\
+GENERATE_2_CONTEXT_TESTCASE(NOTTHREADED_HARDIRQ, , inner, inner_lock)		\
+GENERATE_2_CONTEXT_TESTCASE(SOFTIRQ, , inner, inner_lock)			\
+GENERATE_2_CONTEXT_TESTCASE(RCU, , inner, inner_lock)				\
+GENERATE_2_CONTEXT_TESTCASE(RCU_BH, , inner, inner_lock)			\
+GENERATE_2_CONTEXT_TESTCASE(RCU_CALLBACK, , inner, inner_lock)			\
+GENERATE_2_CONTEXT_TESTCASE(RCU_SCHED, , inner, inner_lock)			\
+GENERATE_2_CONTEXT_TESTCASE(RAW_SPINLOCK, raw_lock_A, inner, inner_lock)	\
+GENERATE_2_CONTEXT_TESTCASE(SPINLOCK, lock_A, inner, inner_lock)		\
+GENERATE_2_CONTEXT_TESTCASE(MUTEX, mutex_A, inner, inner_lock)
+
+GENERATE_2_CONTEXT_TESTCASE_FOR_ALL_OUTER(RCU, )
+GENERATE_2_CONTEXT_TESTCASE_FOR_ALL_OUTER(RAW_SPINLOCK, raw_lock_B)
+GENERATE_2_CONTEXT_TESTCASE_FOR_ALL_OUTER(SPINLOCK, lock_B)
+GENERATE_2_CONTEXT_TESTCASE_FOR_ALL_OUTER(MUTEX, mutex_B)
+
+/* the outer context allows all kinds of preemption */
+#define DO_CONTEXT_TESTCASE_OUTER_PREEMPTIBLE(outer)			\
+	dotest(RCU_in_##outer, SUCCESS, LOCKTYPE_RWLOCK);		\
+	dotest(RAW_SPINLOCK_in_##outer, SUCCESS, LOCKTYPE_SPIN);	\
+	dotest(SPINLOCK_in_##outer, SUCCESS, LOCKTYPE_SPIN);		\
+	dotest(MUTEX_in_##outer, SUCCESS, LOCKTYPE_MUTEX);		\
+
+/*
+ * the outer context only allows the preemption introduced by spinlock_t (which
+ * is a sleepable lock for PREEMPT_RT)
+ */
+#define DO_CONTEXT_TESTCASE_OUTER_LIMITED_PREEMPTIBLE(outer)		\
+	dotest(RCU_in_##outer, SUCCESS, LOCKTYPE_RWLOCK);		\
+	dotest(RAW_SPINLOCK_in_##outer, SUCCESS, LOCKTYPE_SPIN);	\
+	dotest(SPINLOCK_in_##outer, SUCCESS, LOCKTYPE_SPIN);		\
+	dotest(MUTEX_in_##outer, FAILURE, LOCKTYPE_MUTEX);		\
+
+/* the outer doesn't allows any kind of preemption */
+#define DO_CONTEXT_TESTCASE_OUTER_NOT_PREEMPTIBLE(outer)			\
+	dotest(RCU_in_##outer, SUCCESS, LOCKTYPE_RWLOCK);		\
+	dotest(RAW_SPINLOCK_in_##outer, SUCCESS, LOCKTYPE_SPIN);	\
+	dotest(SPINLOCK_in_##outer, FAILURE, LOCKTYPE_SPIN);		\
+	dotest(MUTEX_in_##outer, FAILURE, LOCKTYPE_MUTEX);		\
+
+static void wait_context_tests(void)
+{
+	printk("  --------------------------------------------------------------------------\n");
+	printk("  | wait context tests |\n");
+	printk("  --------------------------------------------------------------------------\n");
+	printk("                                 | rcu  | raw  | spin |mutex |\n");
+	printk("  --------------------------------------------------------------------------\n");
+	print_testname("in hardirq context");
+	DO_CONTEXT_TESTCASE_OUTER_LIMITED_PREEMPTIBLE(HARDIRQ);
+	pr_cont("\n");
+
+	print_testname("in hardirq context (not threaded)");
+	DO_CONTEXT_TESTCASE_OUTER_NOT_PREEMPTIBLE(NOTTHREADED_HARDIRQ);
+	pr_cont("\n");
+
+	print_testname("in softirq context");
+	DO_CONTEXT_TESTCASE_OUTER_LIMITED_PREEMPTIBLE(SOFTIRQ);
+	pr_cont("\n");
+
+	print_testname("in RCU context");
+	DO_CONTEXT_TESTCASE_OUTER_LIMITED_PREEMPTIBLE(RCU);
+	pr_cont("\n");
+
+	print_testname("in RCU-bh context");
+	DO_CONTEXT_TESTCASE_OUTER_LIMITED_PREEMPTIBLE(RCU_BH);
+	pr_cont("\n");
+
+	print_testname("in RCU callback context");
+	DO_CONTEXT_TESTCASE_OUTER_LIMITED_PREEMPTIBLE(RCU_CALLBACK);
+	pr_cont("\n");
+
+	print_testname("in RCU-sched context");
+	DO_CONTEXT_TESTCASE_OUTER_NOT_PREEMPTIBLE(RCU_SCHED);
+	pr_cont("\n");
+
+	print_testname("in RAW_SPINLOCK context");
+	DO_CONTEXT_TESTCASE_OUTER_NOT_PREEMPTIBLE(RAW_SPINLOCK);
+	pr_cont("\n");
+
+	print_testname("in SPINLOCK context");
+	DO_CONTEXT_TESTCASE_OUTER_LIMITED_PREEMPTIBLE(SPINLOCK);
+	pr_cont("\n");
+
+	print_testname("in MUTEX context");
+	DO_CONTEXT_TESTCASE_OUTER_PREEMPTIBLE(MUTEX);
+	pr_cont("\n");
+}
+
+static void local_lock_2(void)
+{
+	local_lock_acquire(&local_A);	/* IRQ-ON */
+	local_lock_release(&local_A);
+
+	HARDIRQ_ENTER();
+	spin_lock(&lock_A);		/* IN-IRQ */
+	spin_unlock(&lock_A);
+	HARDIRQ_EXIT()
+
+	HARDIRQ_DISABLE();
+	spin_lock(&lock_A);
+	local_lock_acquire(&local_A);	/* IN-IRQ <-> IRQ-ON cycle, false */
+	local_lock_release(&local_A);
+	spin_unlock(&lock_A);
+	HARDIRQ_ENABLE();
+}
+
+static void local_lock_3A(void)
+{
+	local_lock_acquire(&local_A);	/* IRQ-ON */
+	spin_lock(&lock_B);		/* IRQ-ON */
+	spin_unlock(&lock_B);
+	local_lock_release(&local_A);
+
+	HARDIRQ_ENTER();
+	spin_lock(&lock_A);		/* IN-IRQ */
+	spin_unlock(&lock_A);
+	HARDIRQ_EXIT()
+
+	HARDIRQ_DISABLE();
+	spin_lock(&lock_A);
+	local_lock_acquire(&local_A);	/* IN-IRQ <-> IRQ-ON cycle only if we count local_lock(), false */
+	local_lock_release(&local_A);
+	spin_unlock(&lock_A);
+	HARDIRQ_ENABLE();
+}
+
+static void local_lock_3B(void)
+{
+	local_lock_acquire(&local_A);	/* IRQ-ON */
+	spin_lock(&lock_B);		/* IRQ-ON */
+	spin_unlock(&lock_B);
+	local_lock_release(&local_A);
+
+	HARDIRQ_ENTER();
+	spin_lock(&lock_A);		/* IN-IRQ */
+	spin_unlock(&lock_A);
+	HARDIRQ_EXIT()
+
+	HARDIRQ_DISABLE();
+	spin_lock(&lock_A);
+	local_lock_acquire(&local_A);	/* IN-IRQ <-> IRQ-ON cycle only if we count local_lock(), false */
+	local_lock_release(&local_A);
+	spin_unlock(&lock_A);
+	HARDIRQ_ENABLE();
+
+	HARDIRQ_DISABLE();
+	spin_lock(&lock_A);
+	spin_lock(&lock_B);		/* IN-IRQ <-> IRQ-ON cycle, true */
+	spin_unlock(&lock_B);
+	spin_unlock(&lock_A);
+	HARDIRQ_DISABLE();
+
+}
+
+static void local_lock_tests(void)
+{
+	printk("  --------------------------------------------------------------------------\n");
+	printk("  | local_lock tests |\n");
+	printk("  ---------------------\n");
+
+	print_testname("local_lock inversion  2");
+	dotest(local_lock_2, SUCCESS, LOCKTYPE_LL);
+	pr_cont("\n");
+
+	print_testname("local_lock inversion 3A");
+	dotest(local_lock_3A, SUCCESS, LOCKTYPE_LL);
+	pr_cont("\n");
+
+	print_testname("local_lock inversion 3B");
+	dotest(local_lock_3B, FAILURE, LOCKTYPE_LL);
+	pr_cont("\n");
+}
+
 void locking_selftest(void)
 {
 	/*
@@ -2446,7 +2771,6 @@ void locking_selftest(void)
 	printk("  --------------------------------------------------------------------------\n");
 
 	init_shared_classes();
-	debug_locks_silent = !debug_locks_verbose;
 	lockdep_set_selftest_task(current);
 
 	DO_TESTCASE_6R("A-A deadlock", AA);
@@ -2542,6 +2866,12 @@ void locking_selftest(void)
 
 	fs_reclaim_tests();
 
+	/* Wait context test cases that are specific for RAW_LOCK_NESTING */
+	if (IS_ENABLED(CONFIG_PROVE_RAW_LOCK_NESTING))
+		wait_context_tests();
+
+	local_lock_tests();
+
 	if (unexpected_testcase_failures) {
 		printk("-----------------------------------------------------------------\n");
 		debug_locks = 0;
diff --git a/lib/parman.c b/lib/parman.c
index c6e42a8db824..a11f2f667639 100644
--- a/lib/parman.c
+++ b/lib/parman.c
@@ -85,7 +85,6 @@ static int parman_shrink(struct parman *parman)
 }
 
 static bool parman_prio_used(struct parman_prio *prio)
-
 {
 	return !list_empty(&prio->item_list);
 }
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index e59eda07305e..a1071cdefb5a 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -5,6 +5,7 @@
 #include <linux/sched.h>
 #include <linux/wait.h>
 #include <linux/slab.h>
+#include <linux/mm.h>
 #include <linux/percpu-refcount.h>
 
 /*
@@ -168,6 +169,7 @@ static void percpu_ref_switch_to_atomic_rcu(struct rcu_head *rcu)
 			struct percpu_ref_data, rcu);
 	struct percpu_ref *ref = data->ref;
 	unsigned long __percpu *percpu_count = percpu_count_ptr(ref);
+	static atomic_t underflows;
 	unsigned long count = 0;
 	int cpu;
 
@@ -191,9 +193,13 @@ static void percpu_ref_switch_to_atomic_rcu(struct rcu_head *rcu)
 	 */
 	atomic_long_add((long)count - PERCPU_COUNT_BIAS, &data->count);
 
-	WARN_ONCE(atomic_long_read(&data->count) <= 0,
-		  "percpu ref (%ps) <= 0 (%ld) after switching to atomic",
-		  data->release, atomic_long_read(&data->count));
+	if (WARN_ONCE(atomic_long_read(&data->count) <= 0,
+		      "percpu ref (%ps) <= 0 (%ld) after switching to atomic",
+		      data->release, atomic_long_read(&data->count)) &&
+	    atomic_inc_return(&underflows) < 4) {
+		pr_err("%s(): percpu_ref underflow", __func__);
+		mem_dump_obj(data);
+	}
 
 	/* @ref is viewed as dead on all CPUs, send out switch confirmation */
 	percpu_ref_call_confirm_rcu(rcu);
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index ca7d635bccd9..4dc4dcbecd12 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -345,7 +345,7 @@ static int __bpf_fill_ja(struct bpf_test *self, unsigned int len,
 
 static int bpf_fill_maxinsns11(struct bpf_test *self)
 {
-	/* Hits 70 passes on x86_64, so cannot get JITed there. */
+	/* Hits 70 passes on x86_64 and triggers NOPs padding. */
 	return __bpf_fill_ja(self, BPF_MAXINSNS, 68);
 }
 
@@ -4295,13 +4295,13 @@ static struct bpf_test tests[] = {
 		{ { 0, 0xffffffff } },
 		.stack_depth = 40,
 	},
-	/* BPF_STX | BPF_XADD | BPF_W/DW */
+	/* BPF_STX | BPF_ATOMIC | BPF_W/DW */
 	{
 		"STX_XADD_W: Test: 0x12 + 0x10 = 0x22",
 		.u.insns_int = {
 			BPF_ALU32_IMM(BPF_MOV, R0, 0x12),
 			BPF_ST_MEM(BPF_W, R10, -40, 0x10),
-			BPF_STX_XADD(BPF_W, R10, R0, -40),
+			BPF_ATOMIC_OP(BPF_W, BPF_ADD, R10, R0, -40),
 			BPF_LDX_MEM(BPF_W, R0, R10, -40),
 			BPF_EXIT_INSN(),
 		},
@@ -4316,7 +4316,7 @@ static struct bpf_test tests[] = {
 			BPF_ALU64_REG(BPF_MOV, R1, R10),
 			BPF_ALU32_IMM(BPF_MOV, R0, 0x12),
 			BPF_ST_MEM(BPF_W, R10, -40, 0x10),
-			BPF_STX_XADD(BPF_W, R10, R0, -40),
+			BPF_ATOMIC_OP(BPF_W, BPF_ADD, R10, R0, -40),
 			BPF_ALU64_REG(BPF_MOV, R0, R10),
 			BPF_ALU64_REG(BPF_SUB, R0, R1),
 			BPF_EXIT_INSN(),
@@ -4331,7 +4331,7 @@ static struct bpf_test tests[] = {
 		.u.insns_int = {
 			BPF_ALU32_IMM(BPF_MOV, R0, 0x12),
 			BPF_ST_MEM(BPF_W, R10, -40, 0x10),
-			BPF_STX_XADD(BPF_W, R10, R0, -40),
+			BPF_ATOMIC_OP(BPF_W, BPF_ADD, R10, R0, -40),
 			BPF_EXIT_INSN(),
 		},
 		INTERNAL,
@@ -4352,7 +4352,7 @@ static struct bpf_test tests[] = {
 		.u.insns_int = {
 			BPF_ALU32_IMM(BPF_MOV, R0, 0x12),
 			BPF_ST_MEM(BPF_DW, R10, -40, 0x10),
-			BPF_STX_XADD(BPF_DW, R10, R0, -40),
+			BPF_ATOMIC_OP(BPF_DW, BPF_ADD, R10, R0, -40),
 			BPF_LDX_MEM(BPF_DW, R0, R10, -40),
 			BPF_EXIT_INSN(),
 		},
@@ -4367,7 +4367,7 @@ static struct bpf_test tests[] = {
 			BPF_ALU64_REG(BPF_MOV, R1, R10),
 			BPF_ALU32_IMM(BPF_MOV, R0, 0x12),
 			BPF_ST_MEM(BPF_DW, R10, -40, 0x10),
-			BPF_STX_XADD(BPF_DW, R10, R0, -40),
+			BPF_ATOMIC_OP(BPF_DW, BPF_ADD, R10, R0, -40),
 			BPF_ALU64_REG(BPF_MOV, R0, R10),
 			BPF_ALU64_REG(BPF_SUB, R0, R1),
 			BPF_EXIT_INSN(),
@@ -4382,7 +4382,7 @@ static struct bpf_test tests[] = {
 		.u.insns_int = {
 			BPF_ALU32_IMM(BPF_MOV, R0, 0x12),
 			BPF_ST_MEM(BPF_DW, R10, -40, 0x10),
-			BPF_STX_XADD(BPF_DW, R10, R0, -40),
+			BPF_ATOMIC_OP(BPF_DW, BPF_ADD, R10, R0, -40),
 			BPF_EXIT_INSN(),
 		},
 		INTERNAL,
@@ -5318,15 +5318,10 @@ static struct bpf_test tests[] = {
 	{
 		"BPF_MAXINSNS: Jump, gap, jump, ...",
 		{ },
-#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_X86)
-		CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
-#else
 		CLASSIC | FLAG_NO_DATA,
-#endif
 		{ },
 		{ { 0, 0xababcbac } },
 		.fill_helper = bpf_fill_maxinsns11,
-		.expected_errcode = -ENOTSUPP,
 	},
 	{
 		"BPF_MAXINSNS: jump over MSH",
diff --git a/lib/test_fpu.c b/lib/test_fpu.c
index c33764aa3eb8..e82db19fed84 100644
--- a/lib/test_fpu.c
+++ b/lib/test_fpu.c
@@ -63,7 +63,7 @@ static int test_fpu_get(void *data, u64 *val)
 	return status;
 }
 
-DEFINE_SIMPLE_ATTRIBUTE(test_fpu_fops, test_fpu_get, NULL, "%lld\n");
+DEFINE_DEBUGFS_ATTRIBUTE(test_fpu_fops, test_fpu_get, NULL, "%lld\n");
 static struct dentry *selftest_dir;
 
 static int __init test_fpu_init(void)
@@ -72,8 +72,8 @@ static int __init test_fpu_init(void)
 	if (!selftest_dir)
 		return -ENOMEM;
 
-	debugfs_create_file("test_fpu", 0444, selftest_dir, NULL,
-			    &test_fpu_fops);
+	debugfs_create_file_unsafe("test_fpu", 0444, selftest_dir, NULL,
+				   &test_fpu_fops);
 
 	return 0;
 }
diff --git a/lib/timerqueue.c b/lib/timerqueue.c
index c52710964593..cdb9c7658478 100644
--- a/lib/timerqueue.c
+++ b/lib/timerqueue.c
@@ -14,6 +14,14 @@
 #include <linux/rbtree.h>
 #include <linux/export.h>
 
+#define __node_2_tq(_n) \
+	rb_entry((_n), struct timerqueue_node, node)
+
+static inline bool __timerqueue_less(struct rb_node *a, const struct rb_node *b)
+{
+	return __node_2_tq(a)->expires < __node_2_tq(b)->expires;
+}
+
 /**
  * timerqueue_add - Adds timer to timerqueue.
  *
@@ -26,28 +34,10 @@
  */
 bool timerqueue_add(struct timerqueue_head *head, struct timerqueue_node *node)
 {
-	struct rb_node **p = &head->rb_root.rb_root.rb_node;
-	struct rb_node *parent = NULL;
-	struct timerqueue_node *ptr;
-	bool leftmost = true;
-
 	/* Make sure we don't add nodes that are already added */
 	WARN_ON_ONCE(!RB_EMPTY_NODE(&node->node));
 
-	while (*p) {
-		parent = *p;
-		ptr = rb_entry(parent, struct timerqueue_node, node);
-		if (node->expires < ptr->expires) {
-			p = &(*p)->rb_left;
-		} else {
-			p = &(*p)->rb_right;
-			leftmost = false;
-		}
-	}
-	rb_link_node(&node->node, parent, p);
-	rb_insert_color_cached(&node->node, &head->rb_root, leftmost);
-
-	return leftmost;
+	return rb_add_cached(&node->node, &head->rb_root, __timerqueue_less);
 }
 EXPORT_SYMBOL_GPL(timerqueue_add);
 
diff --git a/lib/ubsan.c b/lib/ubsan.c
index 3e3352f3d0da..bec38c64d6a6 100644
--- a/lib/ubsan.c
+++ b/lib/ubsan.c
@@ -427,3 +427,34 @@ void __ubsan_handle_load_invalid_value(void *_data, void *val)
 	ubsan_epilogue();
 }
 EXPORT_SYMBOL(__ubsan_handle_load_invalid_value);
+
+void __ubsan_handle_alignment_assumption(void *_data, unsigned long ptr,
+					 unsigned long align,
+					 unsigned long offset);
+void __ubsan_handle_alignment_assumption(void *_data, unsigned long ptr,
+					 unsigned long align,
+					 unsigned long offset)
+{
+	struct alignment_assumption_data *data = _data;
+	unsigned long real_ptr;
+
+	if (suppress_report(&data->location))
+		return;
+
+	ubsan_prologue(&data->location, "alignment-assumption");
+
+	if (offset)
+		pr_err("assumption of %lu byte alignment (with offset of %lu byte) for pointer of type %s failed",
+		       align, offset, data->type->type_name);
+	else
+		pr_err("assumption of %lu byte alignment for pointer of type %s failed",
+		       align, data->type->type_name);
+
+	real_ptr = ptr - offset;
+	pr_err("%saddress is %lu aligned, misalignment offset is %lu bytes",
+	       offset ? "offset " : "", BIT(real_ptr ? __ffs(real_ptr) : 0),
+	       real_ptr & (align - 1));
+
+	ubsan_epilogue();
+}
+EXPORT_SYMBOL(__ubsan_handle_alignment_assumption);
diff --git a/lib/ubsan.h b/lib/ubsan.h
index 7b56c09473a9..9a0b71c5ff9f 100644
--- a/lib/ubsan.h
+++ b/lib/ubsan.h
@@ -78,6 +78,12 @@ struct invalid_value_data {
 	struct type_descriptor *type;
 };
 
+struct alignment_assumption_data {
+	struct source_location location;
+	struct source_location assumption_location;
+	struct type_descriptor *type;
+};
+
 #if defined(CONFIG_ARCH_SUPPORTS_INT128)
 typedef __int128 s_max;
 typedef unsigned __int128 u_max;