44 files changed, 1341 insertions, 866 deletions
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 64d7c19d3167..51c6bf0d93c6 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -165,7 +165,7 @@ config DEBUG_INFO_REDUCED
 
 config DEBUG_INFO_SPLIT
 	bool "Produce split debuginfo in .dwo files"
-	depends on DEBUG_INFO && !FRV
+	depends on DEBUG_INFO
 	help
 	  Generate debug info into separate .dwo files. This significantly
 	  reduces the build directory size for builds with DEBUG_INFO,
@@ -217,7 +217,7 @@ config ENABLE_MUST_CHECK
 config FRAME_WARN
 	int "Warn for stack frames larger than (needs gcc 4.4)"
 	range 0 8192
-	default 0 if KASAN
+	default 3072 if KASAN_EXTRA
 	default 2048 if GCC_PLUGIN_LATENT_ENTROPY
 	default 1280 if (!64BIT && PARISC)
 	default 1024 if (!64BIT && !PARISC)
@@ -324,11 +324,11 @@ config DEBUG_SECTION_MISMATCH
 	    the analysis would not catch the illegal reference.
 	    This option tells gcc to inline less (but it does result in
 	    a larger kernel).
-	  - Run the section mismatch analysis for each module/built-in.o file.
+	  - Run the section mismatch analysis for each module/built-in.a file.
 	    When we run the section mismatch analysis on vmlinux.o, we
 	    lose valuable information about where the mismatch was
 	    introduced.
-	    Running the analysis for each module/built-in.o file
+	    Running the analysis for each module/built-in.a file
 	    tells where the mismatch happens much closer to the
 	    source. The drawback is that the same mismatch is
 	    reported at least twice.
@@ -351,14 +351,10 @@ config SECTION_MISMATCH_WARN_ONLY
 #
 config ARCH_WANT_FRAME_POINTERS
 	bool
-	help
 
 config FRAME_POINTER
 	bool "Compile the kernel with frame pointers"
-	depends on DEBUG_KERNEL && \
-		(CRIS || M68K || FRV || UML || \
-		 SUPERH || BLACKFIN || MN10300 || METAG) || \
-		ARCH_WANT_FRAME_POINTERS
+	depends on DEBUG_KERNEL && (M68K || UML || SUPERH) || ARCH_WANT_FRAME_POINTERS
 	default y if (DEBUG_INFO && UML) || ARCH_WANT_FRAME_POINTERS
 	help
 	  If you say Y here the resulting kernel image will be slightly
@@ -1035,69 +1031,20 @@ config DEBUG_PREEMPT
 
 menu "Lock Debugging (spinlocks, mutexes, etc...)"
 
-config DEBUG_RT_MUTEXES
-	bool "RT Mutex debugging, deadlock detection"
-	depends on DEBUG_KERNEL && RT_MUTEXES
-	help
-	 This allows rt mutex semantics violations and rt mutex related
-	 deadlocks (lockups) to be detected and reported automatically.
-
-config DEBUG_SPINLOCK
-	bool "Spinlock and rw-lock debugging: basic checks"
-	depends on DEBUG_KERNEL
-	select UNINLINE_SPIN_UNLOCK
-	help
-	  Say Y here and build SMP to catch missing spinlock initialization
-	  and certain other kinds of spinlock errors commonly made.  This is
-	  best used in conjunction with the NMI watchdog so that spinlock
-	  deadlocks are also debuggable.
-
-config DEBUG_MUTEXES
-	bool "Mutex debugging: basic checks"
-	depends on DEBUG_KERNEL
-	help
-	 This feature allows mutex semantics violations to be detected and
-	 reported.
-
-config DEBUG_WW_MUTEX_SLOWPATH
-	bool "Wait/wound mutex debugging: Slowpath testing"
-	depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
-	select DEBUG_LOCK_ALLOC
-	select DEBUG_SPINLOCK
-	select DEBUG_MUTEXES
-	help
-	 This feature enables slowpath testing for w/w mutex users by
-	 injecting additional -EDEADLK wound/backoff cases. Together with
-	 the full mutex checks enabled with (CONFIG_PROVE_LOCKING) this
-	 will test all possible w/w mutex interface abuse with the
-	 exception of simply not acquiring all the required locks.
-	 Note that this feature can introduce significant overhead, so
-	 it really should not be enabled in a production or distro kernel,
-	 even a debug kernel.  If you are a driver writer, enable it.  If
-	 you are a distro, do not.
-
-config DEBUG_LOCK_ALLOC
-	bool "Lock debugging: detect incorrect freeing of live locks"
-	depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
-	select DEBUG_SPINLOCK
-	select DEBUG_MUTEXES
-	select DEBUG_RT_MUTEXES if RT_MUTEXES
-	select LOCKDEP
-	help
-	 This feature will check whether any held lock (spinlock, rwlock,
-	 mutex or rwsem) is incorrectly freed by the kernel, via any of the
-	 memory-freeing routines (kfree(), kmem_cache_free(), free_pages(),
-	 vfree(), etc.), whether a live lock is incorrectly reinitialized via
-	 spin_lock_init()/mutex_init()/etc., or whether there is any lock
-	 held during task exit.
+config LOCK_DEBUGGING_SUPPORT
+	bool
+	depends on TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
+	default y
 
 config PROVE_LOCKING
 	bool "Lock debugging: prove locking correctness"
-	depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
+	depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT
 	select LOCKDEP
 	select DEBUG_SPINLOCK
 	select DEBUG_MUTEXES
 	select DEBUG_RT_MUTEXES if RT_MUTEXES
+	select DEBUG_RWSEMS if RWSEM_SPIN_ON_OWNER
+	select DEBUG_WW_MUTEX_SLOWPATH
 	select DEBUG_LOCK_ALLOC
 	select TRACE_IRQFLAGS
 	default n
@@ -1135,20 +1082,9 @@ config PROVE_LOCKING
 
 	 For more details, see Documentation/locking/lockdep-design.txt.
 
-config LOCKDEP
-	bool
-	depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
-	select STACKTRACE
-	select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 && !MICROBLAZE && !ARC && !SCORE && !X86
-	select KALLSYMS
-	select KALLSYMS_ALL
-
-config LOCKDEP_SMALL
-	bool
-
 config LOCK_STAT
 	bool "Lock usage statistics"
-	depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT
+	depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT
 	select LOCKDEP
 	select DEBUG_SPINLOCK
 	select DEBUG_MUTEXES
@@ -1168,6 +1104,80 @@ config LOCK_STAT
 	 CONFIG_LOCK_STAT defines "contended" and "acquired" lock events.
 	 (CONFIG_LOCKDEP defines "acquire" and "release" events.)
 
+config DEBUG_RT_MUTEXES
+	bool "RT Mutex debugging, deadlock detection"
+	depends on DEBUG_KERNEL && RT_MUTEXES
+	help
+	 This allows rt mutex semantics violations and rt mutex related
+	 deadlocks (lockups) to be detected and reported automatically.
+
+config DEBUG_SPINLOCK
+	bool "Spinlock and rw-lock debugging: basic checks"
+	depends on DEBUG_KERNEL
+	select UNINLINE_SPIN_UNLOCK
+	help
+	  Say Y here and build SMP to catch missing spinlock initialization
+	  and certain other kinds of spinlock errors commonly made.  This is
+	  best used in conjunction with the NMI watchdog so that spinlock
+	  deadlocks are also debuggable.
+
+config DEBUG_MUTEXES
+	bool "Mutex debugging: basic checks"
+	depends on DEBUG_KERNEL
+	help
+	 This feature allows mutex semantics violations to be detected and
+	 reported.
+
+config DEBUG_WW_MUTEX_SLOWPATH
+	bool "Wait/wound mutex debugging: Slowpath testing"
+	depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT
+	select DEBUG_LOCK_ALLOC
+	select DEBUG_SPINLOCK
+	select DEBUG_MUTEXES
+	help
+	 This feature enables slowpath testing for w/w mutex users by
+	 injecting additional -EDEADLK wound/backoff cases. Together with
+	 the full mutex checks enabled with (CONFIG_PROVE_LOCKING) this
+	 will test all possible w/w mutex interface abuse with the
+	 exception of simply not acquiring all the required locks.
+	 Note that this feature can introduce significant overhead, so
+	 it really should not be enabled in a production or distro kernel,
+	 even a debug kernel.  If you are a driver writer, enable it.  If
+	 you are a distro, do not.
+
+config DEBUG_RWSEMS
+	bool "RW Semaphore debugging: basic checks"
+	depends on DEBUG_KERNEL && RWSEM_SPIN_ON_OWNER
+	help
+	  This debugging feature allows mismatched rw semaphore locks and unlocks
+	  to be detected and reported.
+
+config DEBUG_LOCK_ALLOC
+	bool "Lock debugging: detect incorrect freeing of live locks"
+	depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT
+	select DEBUG_SPINLOCK
+	select DEBUG_MUTEXES
+	select DEBUG_RT_MUTEXES if RT_MUTEXES
+	select LOCKDEP
+	help
+	 This feature will check whether any held lock (spinlock, rwlock,
+	 mutex or rwsem) is incorrectly freed by the kernel, via any of the
+	 memory-freeing routines (kfree(), kmem_cache_free(), free_pages(),
+	 vfree(), etc.), whether a live lock is incorrectly reinitialized via
+	 spin_lock_init()/mutex_init()/etc., or whether there is any lock
+	 held during task exit.
+
+config LOCKDEP
+	bool
+	depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT
+	select STACKTRACE
+	select FRAME_POINTER if !MIPS && !PPC && !ARM_UNWIND && !S390 && !MICROBLAZE && !ARC && !X86
+	select KALLSYMS
+	select KALLSYMS_ALL
+
+config LOCKDEP_SMALL
+	bool
+
 config DEBUG_LOCKDEP
 	bool "Lock dependency engine debugging"
 	depends on DEBUG_KERNEL && LOCKDEP
@@ -1572,7 +1582,7 @@ config FAULT_INJECTION_STACKTRACE_FILTER
 	depends on FAULT_INJECTION_DEBUG_FS && STACKTRACE_SUPPORT
 	depends on !X86_64
 	select STACKTRACE
-	select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM_UNWIND && !ARC && !SCORE && !X86
+	select FRAME_POINTER if !MIPS && !PPC && !S390 && !MICROBLAZE && !ARM_UNWIND && !ARC && !X86
 	help
 	  Provide stacktrace filter for fault-injection capabilities
 
@@ -1641,7 +1651,11 @@ config DMA_API_DEBUG
 
 	  If unsure, say N.
 
-menu "Runtime Testing"
+menuconfig RUNTIME_TESTING_MENU
+	bool "Runtime Testing"
+	def_bool y
+
+if RUNTIME_TESTING_MENU
 
 config LKDTM
 	tristate "Linux Kernel Dump Test Tool Module"
@@ -1841,7 +1855,7 @@ config TEST_BPF
 
 	  If unsure, say N.
 
-config TEST_FIND_BIT
+config FIND_BIT_BENCHMARK
 	tristate "Test find_bit functions"
 	default n
 	help
@@ -1929,7 +1943,7 @@ config TEST_DEBUG_VIRTUAL
 
 	  If unsure, say N.
 
-endmenu # runtime tests
+endif # RUNTIME_TESTING_MENU
 
 config MEMTEST
 	bool "Memtest"
@@ -1966,7 +1980,7 @@ config STRICT_DEVMEM
 	bool "Filter access to /dev/mem"
 	depends on MMU && DEVMEM
 	depends on ARCH_HAS_DEVMEM_IS_ALLOWED
-	default y if TILE || PPC || X86 || ARM64
+	default y if PPC || X86 || ARM64
 	---help---
 	  If this option is disabled, you allow userspace (root) access to all
 	  of memory, including kernel and userspace memory. Accidental
diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan
index bd38aab05929..3d35d062970d 100644
--- a/lib/Kconfig.kasan
+++ b/lib/Kconfig.kasan
@@ -20,6 +20,17 @@ config KASAN
 	  Currently CONFIG_KASAN doesn't work with CONFIG_DEBUG_SLAB
 	  (the resulting kernel does not boot).
 
+config KASAN_EXTRA
+	bool "KAsan: extra checks"
+	depends on KASAN && DEBUG_KERNEL && !COMPILE_TEST
+	help
+	  This enables further checks in the kernel address sanitizer, for now
+	  it only includes the address-use-after-scope check that can lead
+	  to excessive kernel stack usage, frame size warnings and longer
+	  compile time.
+	  https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81715 has more
+
+
 choice
 	prompt "Instrumentation type"
 	depends on KASAN
diff --git a/lib/Makefile b/lib/Makefile
index 6ae3bd481379..0bd50d71f423 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -47,8 +47,8 @@ obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o
 obj-y += hexdump.o
 obj-$(CONFIG_TEST_HEXDUMP) += test_hexdump.o
 obj-y += kstrtox.o
+obj-$(CONFIG_FIND_BIT_BENCHMARK) += find_bit_benchmark.o
 obj-$(CONFIG_TEST_BPF) += test_bpf.o
-obj-$(CONFIG_TEST_FIND_BIT) += test_find_bit.o
 obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o
 obj-$(CONFIG_TEST_SYSCTL) += test_sysctl.o
 obj-$(CONFIG_TEST_HASH) += test_hash.o test_siphash.o
diff --git a/lib/bitmap.c b/lib/bitmap.c
index d8f0c094b18e..9e498c77ed0e 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -1106,111 +1106,80 @@ int bitmap_allocate_region(unsigned long *bitmap, unsigned int pos, int order)
 EXPORT_SYMBOL(bitmap_allocate_region);
 
 /**
- * bitmap_from_u32array - copy the contents of a u32 array of bits to bitmap
- *	@bitmap: array of unsigned longs, the destination bitmap, non NULL
- *	@nbits: number of bits in @bitmap
- *	@buf: array of u32 (in host byte order), the source bitmap, non NULL
- *	@nwords: number of u32 words in @buf
- *
- * copy min(nbits, 32*nwords) bits from @buf to @bitmap, remaining
- * bits between nword and nbits in @bitmap (if any) are cleared. In
- * last word of @bitmap, the bits beyond nbits (if any) are kept
- * unchanged.
+ * bitmap_copy_le - copy a bitmap, putting the bits into little-endian order.
+ * @dst:   destination buffer
+ * @src:   bitmap to copy
+ * @nbits: number of bits in the bitmap
  *
- * Return the number of bits effectively copied.
+ * Require nbits % BITS_PER_LONG == 0.
  */
-unsigned int
-bitmap_from_u32array(unsigned long *bitmap, unsigned int nbits,
-		     const u32 *buf, unsigned int nwords)
+#ifdef __BIG_ENDIAN
+void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int nbits)
 {
-	unsigned int dst_idx, src_idx;
-
-	for (src_idx = dst_idx = 0; dst_idx < BITS_TO_LONGS(nbits); ++dst_idx) {
-		unsigned long part = 0;
-
-		if (src_idx < nwords)
-			part = buf[src_idx++];
-
-#if BITS_PER_LONG == 64
-		if (src_idx < nwords)
-			part |= ((unsigned long) buf[src_idx++]) << 32;
-#endif
-
-		if (dst_idx < nbits/BITS_PER_LONG)
-			bitmap[dst_idx] = part;
-		else {
-			unsigned long mask = BITMAP_LAST_WORD_MASK(nbits);
+	unsigned int i;
 
-			bitmap[dst_idx] = (bitmap[dst_idx] & ~mask)
-				| (part & mask);
-		}
+	for (i = 0; i < nbits/BITS_PER_LONG; i++) {
+		if (BITS_PER_LONG == 64)
+			dst[i] = cpu_to_le64(src[i]);
+		else
+			dst[i] = cpu_to_le32(src[i]);
 	}
-
-	return min_t(unsigned int, nbits, 32*nwords);
 }
-EXPORT_SYMBOL(bitmap_from_u32array);
+EXPORT_SYMBOL(bitmap_copy_le);
+#endif
 
+#if BITS_PER_LONG == 64
 /**
- * bitmap_to_u32array - copy the contents of bitmap to a u32 array of bits
- *	@buf: array of u32 (in host byte order), the dest bitmap, non NULL
- *	@nwords: number of u32 words in @buf
- *	@bitmap: array of unsigned longs, the source bitmap, non NULL
+ * bitmap_from_arr32 - copy the contents of u32 array of bits to bitmap
+ *	@bitmap: array of unsigned longs, the destination bitmap
+ *	@buf: array of u32 (in host byte order), the source bitmap
  *	@nbits: number of bits in @bitmap
- *
- * copy min(nbits, 32*nwords) bits from @bitmap to @buf. Remaining
- * bits after nbits in @buf (if any) are cleared.
- *
- * Return the number of bits effectively copied.
  */
-unsigned int
-bitmap_to_u32array(u32 *buf, unsigned int nwords,
-		   const unsigned long *bitmap, unsigned int nbits)
+void bitmap_from_arr32(unsigned long *bitmap, const u32 *buf,
+						unsigned int nbits)
 {
-	unsigned int dst_idx = 0, src_idx = 0;
-
-	while (dst_idx < nwords) {
-		unsigned long part = 0;
-
-		if (src_idx < BITS_TO_LONGS(nbits)) {
-			part = bitmap[src_idx];
-			if (src_idx >= nbits/BITS_PER_LONG)
-				part &= BITMAP_LAST_WORD_MASK(nbits);
-			src_idx++;
-		}
+	unsigned int i, halfwords;
 
-		buf[dst_idx++] = part & 0xffffffffUL;
+	if (!nbits)
+		return;
 
-#if BITS_PER_LONG == 64
-		if (dst_idx < nwords) {
-			part >>= 32;
-			buf[dst_idx++] = part & 0xffffffffUL;
-		}
-#endif
+	halfwords = DIV_ROUND_UP(nbits, 32);
+	for (i = 0; i < halfwords; i++) {
+		bitmap[i/2] = (unsigned long) buf[i];
+		if (++i < halfwords)
+			bitmap[i/2] |= ((unsigned long) buf[i]) << 32;
 	}
 
-	return min_t(unsigned int, nbits, 32*nwords);
+	/* Clear tail bits in last word beyond nbits. */
+	if (nbits % BITS_PER_LONG)
+		bitmap[(halfwords - 1) / 2] &= BITMAP_LAST_WORD_MASK(nbits);
 }
-EXPORT_SYMBOL(bitmap_to_u32array);
+EXPORT_SYMBOL(bitmap_from_arr32);
 
 /**
- * bitmap_copy_le - copy a bitmap, putting the bits into little-endian order.
- * @dst:   destination buffer
- * @src:   bitmap to copy
- * @nbits: number of bits in the bitmap
- *
- * Require nbits % BITS_PER_LONG == 0.
+ * bitmap_to_arr32 - copy the contents of bitmap to a u32 array of bits
+ *	@buf: array of u32 (in host byte order), the dest bitmap
+ *	@bitmap: array of unsigned longs, the source bitmap
+ *	@nbits: number of bits in @bitmap
  */
-#ifdef __BIG_ENDIAN
-void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int nbits)
+void bitmap_to_arr32(u32 *buf, const unsigned long *bitmap, unsigned int nbits)
 {
-	unsigned int i;
+	unsigned int i, halfwords;
 
-	for (i = 0; i < nbits/BITS_PER_LONG; i++) {
-		if (BITS_PER_LONG == 64)
-			dst[i] = cpu_to_le64(src[i]);
-		else
-			dst[i] = cpu_to_le32(src[i]);
+	if (!nbits)
+		return;
+
+	halfwords = DIV_ROUND_UP(nbits, 32);
+	for (i = 0; i < halfwords; i++) {
+		buf[i] = (u32) (bitmap[i/2] & UINT_MAX);
+		if (++i < halfwords)
+			buf[i] = (u32) (bitmap[i/2] >> 32);
 	}
+
+	/* Clear tail bits in last element of array beyond nbits. */
+	if (nbits % BITS_PER_LONG)
+		buf[halfwords - 1] &= (u32) (UINT_MAX >> ((-nbits) & 31));
 }
-EXPORT_SYMBOL(bitmap_copy_le);
+EXPORT_SYMBOL(bitmap_to_arr32);
+
 #endif
diff --git a/lib/btree.c b/lib/btree.c
index f93a945274af..590facba2c50 100644
--- a/lib/btree.c
+++ b/lib/btree.c
@@ -3,7 +3,7 @@
  *
  * As should be obvious for Linux kernel code, license is GPLv2
  *
- * Copyright (c) 2007-2008 Joern Engel <joern@logfs.org>
+ * Copyright (c) 2007-2008 Joern Engel <joern@purestorage.com>
  * Bits and pieces stolen from Peter Zijlstra's code, which is
  * Copyright 2007, Red Hat Inc. Peter Zijlstra
  * GPLv2
@@ -76,6 +76,8 @@ struct btree_geo btree_geo128 = {
 };
 EXPORT_SYMBOL_GPL(btree_geo128);
 
+#define MAX_KEYLEN	(2 * LONG_PER_U64)
+
 static struct kmem_cache *btree_cachep;
 
 void *btree_alloc(gfp_t gfp_mask, void *pool_data)
@@ -313,7 +315,7 @@ void *btree_get_prev(struct btree_head *head, struct btree_geo *geo,
 {
 	int i, height;
 	unsigned long *node, *oldnode;
-	unsigned long *retry_key = NULL, key[geo->keylen];
+	unsigned long *retry_key = NULL, key[MAX_KEYLEN];
 
 	if (keyzero(geo, __key))
 		return NULL;
@@ -639,8 +641,8 @@ EXPORT_SYMBOL_GPL(btree_remove);
 int btree_merge(struct btree_head *target, struct btree_head *victim,
 		struct btree_geo *geo, gfp_t gfp)
 {
-	unsigned long key[geo->keylen];
-	unsigned long dup[geo->keylen];
+	unsigned long key[MAX_KEYLEN];
+	unsigned long dup[MAX_KEYLEN];
 	void *val;
 	int err;
 
diff --git a/lib/bug.c b/lib/bug.c
index c1b0fad31b10..1077366f496b 100644
--- a/lib/bug.c
+++ b/lib/bug.c
@@ -150,6 +150,8 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
 		return BUG_TRAP_TYPE_NONE;
 
 	bug = find_bug(bugaddr);
+	if (!bug)
+		return BUG_TRAP_TYPE_NONE;
 
 	file = NULL;
 	line = 0;
@@ -191,7 +193,7 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs)
 	if (file)
 		pr_crit("kernel BUG at %s:%u!\n", file, line);
 	else
-		pr_crit("Kernel BUG at %p [verbose debug info unavailable]\n",
+		pr_crit("Kernel BUG at %pB [verbose debug info unavailable]\n",
 			(void *)bugaddr);
 
 	return BUG_TRAP_TYPE_BUG;
diff --git a/lib/cpumask.c b/lib/cpumask.c
index 35fe142ebb5e..beca6244671a 100644
--- a/lib/cpumask.c
+++ b/lib/cpumask.c
@@ -33,10 +33,11 @@ EXPORT_SYMBOL(cpumask_next);
 int cpumask_next_and(int n, const struct cpumask *src1p,
 		     const struct cpumask *src2p)
 {
-	while ((n = cpumask_next(n, src1p)) < nr_cpu_ids)
-		if (cpumask_test_cpu(n, src2p))
-			break;
-	return n;
+	/* -1 is a legal arg here. */
+	if (n != -1)
+		cpumask_check(n);
+	return find_next_and_bit(cpumask_bits(src1p), cpumask_bits(src2p),
+		nr_cpumask_bits, n + 1);
 }
 EXPORT_SYMBOL(cpumask_next_and);
 
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index 2f5349c6e81a..994be4805cec 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -42,14 +42,18 @@ static struct debug_obj		obj_static_pool[ODEBUG_POOL_SIZE] __initdata;
 static DEFINE_RAW_SPINLOCK(pool_lock);
 
 static HLIST_HEAD(obj_pool);
+static HLIST_HEAD(obj_to_free);
 
 static int			obj_pool_min_free = ODEBUG_POOL_SIZE;
 static int			obj_pool_free = ODEBUG_POOL_SIZE;
 static int			obj_pool_used;
 static int			obj_pool_max_used;
+/* The number of objs on the global free list */
+static int			obj_nr_tofree;
 static struct kmem_cache	*obj_cache;
 
 static int			debug_objects_maxchain __read_mostly;
+static int __maybe_unused	debug_objects_maxchecked __read_mostly;
 static int			debug_objects_fixups __read_mostly;
 static int			debug_objects_warnings __read_mostly;
 static int			debug_objects_enabled __read_mostly
@@ -96,12 +100,32 @@ static const char *obj_states[ODEBUG_STATE_MAX] = {
 static void fill_pool(void)
 {
 	gfp_t gfp = GFP_ATOMIC | __GFP_NORETRY | __GFP_NOWARN;
-	struct debug_obj *new;
+	struct debug_obj *new, *obj;
 	unsigned long flags;
 
 	if (likely(obj_pool_free >= debug_objects_pool_min_level))
 		return;
 
+	/*
+	 * Reuse objs from the global free list; they will be reinitialized
+	 * when allocating.
+	 */
+	while (obj_nr_tofree && (obj_pool_free < obj_pool_min_free)) {
+		raw_spin_lock_irqsave(&pool_lock, flags);
+		/*
+		 * Recheck with the lock held as the worker thread might have
+		 * won the race and freed the global free list already.
+		 */
+		if (obj_nr_tofree) {
+			obj = hlist_entry(obj_to_free.first, typeof(*obj), node);
+			hlist_del(&obj->node);
+			obj_nr_tofree--;
+			hlist_add_head(&obj->node, &obj_pool);
+			obj_pool_free++;
+		}
+		raw_spin_unlock_irqrestore(&pool_lock, flags);
+	}
+
 	if (unlikely(!obj_cache))
 		return;
 
@@ -177,62 +201,76 @@ alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr)
  * workqueue function to free objects.
  *
  * To reduce contention on the global pool_lock, the actual freeing of
- * debug objects will be delayed if the pool_lock is busy. We also free
- * the objects in a batch of 4 for each lock/unlock cycle.
+ * debug objects will be delayed if the pool_lock is busy.
  */
-#define ODEBUG_FREE_BATCH	4
-
 static void free_obj_work(struct work_struct *work)
 {
-	struct debug_obj *objs[ODEBUG_FREE_BATCH];
+	struct hlist_node *tmp;
+	struct debug_obj *obj;
 	unsigned long flags;
-	int i;
+	HLIST_HEAD(tofree);
 
 	if (!raw_spin_trylock_irqsave(&pool_lock, flags))
 		return;
-	while (obj_pool_free >= debug_objects_pool_size + ODEBUG_FREE_BATCH) {
-		for (i = 0; i < ODEBUG_FREE_BATCH; i++) {
-			objs[i] = hlist_entry(obj_pool.first,
-					      typeof(*objs[0]), node);
-			hlist_del(&objs[i]->node);
-		}
 
-		obj_pool_free -= ODEBUG_FREE_BATCH;
-		debug_objects_freed += ODEBUG_FREE_BATCH;
-		/*
-		 * We release pool_lock across kmem_cache_free() to
-		 * avoid contention on pool_lock.
-		 */
-		raw_spin_unlock_irqrestore(&pool_lock, flags);
-		for (i = 0; i < ODEBUG_FREE_BATCH; i++)
-			kmem_cache_free(obj_cache, objs[i]);
-		if (!raw_spin_trylock_irqsave(&pool_lock, flags))
-			return;
+	/*
+	 * The objs on the pool list might be allocated before the work is
+	 * run, so recheck if pool list it full or not, if not fill pool
+	 * list from the global free list
+	 */
+	while (obj_nr_tofree && obj_pool_free < debug_objects_pool_size) {
+		obj = hlist_entry(obj_to_free.first, typeof(*obj), node);
+		hlist_del(&obj->node);
+		hlist_add_head(&obj->node, &obj_pool);
+		obj_pool_free++;
+		obj_nr_tofree--;
+	}
+
+	/*
+	 * Pool list is already full and there are still objs on the free
+	 * list. Move remaining free objs to a temporary list to free the
+	 * memory outside the pool_lock held region.
+	 */
+	if (obj_nr_tofree) {
+		hlist_move_list(&obj_to_free, &tofree);
+		debug_objects_freed += obj_nr_tofree;
+		obj_nr_tofree = 0;
 	}
 	raw_spin_unlock_irqrestore(&pool_lock, flags);
+
+	hlist_for_each_entry_safe(obj, tmp, &tofree, node) {
+		hlist_del(&obj->node);
+		kmem_cache_free(obj_cache, obj);
+	}
 }
 
-/*
- * Put the object back into the pool and schedule work to free objects
- * if necessary.
- */
-static void free_object(struct debug_obj *obj)
+static bool __free_object(struct debug_obj *obj)
 {
 	unsigned long flags;
-	int sched = 0;
+	bool work;
 
 	raw_spin_lock_irqsave(&pool_lock, flags);
-	/*
-	 * schedule work when the pool is filled and the cache is
-	 * initialized:
-	 */
-	if (obj_pool_free > debug_objects_pool_size && obj_cache)
-		sched = 1;
-	hlist_add_head(&obj->node, &obj_pool);
-	obj_pool_free++;
+	work = (obj_pool_free > debug_objects_pool_size) && obj_cache;
 	obj_pool_used--;
+
+	if (work) {
+		obj_nr_tofree++;
+		hlist_add_head(&obj->node, &obj_to_free);
+	} else {
+		obj_pool_free++;
+		hlist_add_head(&obj->node, &obj_pool);
+	}
 	raw_spin_unlock_irqrestore(&pool_lock, flags);
-	if (sched)
+	return work;
+}
+
+/*
+ * Put the object back into the pool and schedule work to free objects
+ * if necessary.
+ */
+static void free_object(struct debug_obj *obj)
+{
+	if (__free_object(obj))
 		schedule_work(&debug_obj_work);
 }
 
@@ -714,13 +752,13 @@ EXPORT_SYMBOL_GPL(debug_object_active_state);
 static void __debug_check_no_obj_freed(const void *address, unsigned long size)
 {
 	unsigned long flags, oaddr, saddr, eaddr, paddr, chunks;
-	struct hlist_node *tmp;
-	HLIST_HEAD(freelist);
 	struct debug_obj_descr *descr;
 	enum debug_obj_state state;
 	struct debug_bucket *db;
+	struct hlist_node *tmp;
 	struct debug_obj *obj;
-	int cnt;
+	int cnt, objs_checked = 0;
+	bool work = false;
 
 	saddr = (unsigned long) address;
 	eaddr = saddr + size;
@@ -751,21 +789,24 @@ repeat:
 				goto repeat;
 			default:
 				hlist_del(&obj->node);
-				hlist_add_head(&obj->node, &freelist);
+				work |= __free_object(obj);
 				break;
 			}
 		}
 		raw_spin_unlock_irqrestore(&db->lock, flags);
 
-		/* Now free them */
-		hlist_for_each_entry_safe(obj, tmp, &freelist, node) {
-			hlist_del(&obj->node);
-			free_object(obj);
-		}
-
 		if (cnt > debug_objects_maxchain)
 			debug_objects_maxchain = cnt;
+
+		objs_checked += cnt;
 	}
+
+	if (objs_checked > debug_objects_maxchecked)
+		debug_objects_maxchecked = objs_checked;
+
+	/* Schedule work to actually kmem_cache_free() objects */
+	if (work)
+		schedule_work(&debug_obj_work);
 }
 
 void debug_check_no_obj_freed(const void *address, unsigned long size)
@@ -780,12 +821,14 @@ void debug_check_no_obj_freed(const void *address, unsigned long size)
 static int debug_stats_show(struct seq_file *m, void *v)
 {
 	seq_printf(m, "max_chain     :%d\n", debug_objects_maxchain);
+	seq_printf(m, "max_checked   :%d\n", debug_objects_maxchecked);
 	seq_printf(m, "warnings      :%d\n", debug_objects_warnings);
 	seq_printf(m, "fixups        :%d\n", debug_objects_fixups);
 	seq_printf(m, "pool_free     :%d\n", obj_pool_free);
 	seq_printf(m, "pool_min_free :%d\n", obj_pool_min_free);
 	seq_printf(m, "pool_used     :%d\n", obj_pool_used);
 	seq_printf(m, "pool_max_used :%d\n", obj_pool_max_used);
+	seq_printf(m, "on_free_list  :%d\n", obj_nr_tofree);
 	seq_printf(m, "objs_allocated:%d\n", debug_objects_allocated);
 	seq_printf(m, "objs_freed    :%d\n", debug_objects_freed);
 	return 0;
diff --git a/lib/devres.c b/lib/devres.c
index 5f2aedd58bc5..5bec1120b392 100644
--- a/lib/devres.c
+++ b/lib/devres.c
@@ -5,6 +5,12 @@
 #include <linux/gfp.h>
 #include <linux/export.h>
 
+enum devm_ioremap_type {
+	DEVM_IOREMAP = 0,
+	DEVM_IOREMAP_NC,
+	DEVM_IOREMAP_WC,
+};
+
 void devm_ioremap_release(struct device *dev, void *res)
 {
 	iounmap(*(void __iomem **)res);
@@ -15,24 +21,28 @@ static int devm_ioremap_match(struct device *dev, void *res, void *match_data)
 	return *(void **)res == match_data;
 }
 
-/**
- * devm_ioremap - Managed ioremap()
- * @dev: Generic device to remap IO address for
- * @offset: Resource address to map
- * @size: Size of map
- *
- * Managed ioremap().  Map is automatically unmapped on driver detach.
- */
-void __iomem *devm_ioremap(struct device *dev, resource_size_t offset,
-			   resource_size_t size)
+static void __iomem *__devm_ioremap(struct device *dev, resource_size_t offset,
+				    resource_size_t size,
+				    enum devm_ioremap_type type)
 {
-	void __iomem **ptr, *addr;
+	void __iomem **ptr, *addr = NULL;
 
 	ptr = devres_alloc(devm_ioremap_release, sizeof(*ptr), GFP_KERNEL);
 	if (!ptr)
 		return NULL;
 
-	addr = ioremap(offset, size);
+	switch (type) {
+	case DEVM_IOREMAP:
+		addr = ioremap(offset, size);
+		break;
+	case DEVM_IOREMAP_NC:
+		addr = ioremap_nocache(offset, size);
+		break;
+	case DEVM_IOREMAP_WC:
+		addr = ioremap_wc(offset, size);
+		break;
+	}
+
 	if (addr) {
 		*ptr = addr;
 		devres_add(dev, ptr);
@@ -41,6 +51,20 @@ void __iomem *devm_ioremap(struct device *dev, resource_size_t offset,
 
 	return addr;
 }
+
+/**
+ * devm_ioremap - Managed ioremap()
+ * @dev: Generic device to remap IO address for
+ * @offset: Resource address to map
+ * @size: Size of map
+ *
+ * Managed ioremap().  Map is automatically unmapped on driver detach.
+ */
+void __iomem *devm_ioremap(struct device *dev, resource_size_t offset,
+			   resource_size_t size)
+{
+	return __devm_ioremap(dev, offset, size, DEVM_IOREMAP);
+}
 EXPORT_SYMBOL(devm_ioremap);
 
 /**
@@ -55,20 +79,7 @@ EXPORT_SYMBOL(devm_ioremap);
 void __iomem *devm_ioremap_nocache(struct device *dev, resource_size_t offset,
 				   resource_size_t size)
 {
-	void __iomem **ptr, *addr;
-
-	ptr = devres_alloc(devm_ioremap_release, sizeof(*ptr), GFP_KERNEL);
-	if (!ptr)
-		return NULL;
-
-	addr = ioremap_nocache(offset, size);
-	if (addr) {
-		*ptr = addr;
-		devres_add(dev, ptr);
-	} else
-		devres_free(ptr);
-
-	return addr;
+	return __devm_ioremap(dev, offset, size, DEVM_IOREMAP_NC);
 }
 EXPORT_SYMBOL(devm_ioremap_nocache);
 
@@ -83,20 +94,7 @@ EXPORT_SYMBOL(devm_ioremap_nocache);
 void __iomem *devm_ioremap_wc(struct device *dev, resource_size_t offset,
 			      resource_size_t size)
 {
-	void __iomem **ptr, *addr;
-
-	ptr = devres_alloc(devm_ioremap_release, sizeof(*ptr), GFP_KERNEL);
-	if (!ptr)
-		return NULL;
-
-	addr = ioremap_wc(offset, size);
-	if (addr) {
-		*ptr = addr;
-		devres_add(dev, ptr);
-	} else
-		devres_free(ptr);
-
-	return addr;
+	return __devm_ioremap(dev, offset, size, DEVM_IOREMAP_WC);
 }
 EXPORT_SYMBOL(devm_ioremap_wc);
 
diff --git a/lib/dma-debug.c b/lib/dma-debug.c
index 1b34d210452c..7f5cdc1e6b29 100644
--- a/lib/dma-debug.c
+++ b/lib/dma-debug.c
@@ -1491,12 +1491,12 @@ void debug_dma_alloc_coherent(struct device *dev, size_t size,
 	if (unlikely(virt == NULL))
 		return;
 
-	entry = dma_entry_alloc();
-	if (!entry)
+	/* handle vmalloc and linear addresses */
+	if (!is_vmalloc_addr(virt) && !virt_addr_valid(virt))
 		return;
 
-	/* handle vmalloc and linear addresses */
-	if (!is_vmalloc_addr(virt) && !virt_to_page(virt))
+	entry = dma_entry_alloc();
+	if (!entry)
 		return;
 
 	entry->type      = dma_debug_coherent;
@@ -1528,7 +1528,7 @@ void debug_dma_free_coherent(struct device *dev, size_t size,
 	};
 
 	/* handle vmalloc and linear addresses */
-	if (!is_vmalloc_addr(virt) && !virt_to_page(virt))
+	if (!is_vmalloc_addr(virt) && !virt_addr_valid(virt))
 		return;
 
 	if (is_vmalloc_addr(virt))
diff --git a/lib/dma-direct.c b/lib/dma-direct.c
index 40b1f92f2214..c0bba30fef0a 100644
--- a/lib/dma-direct.c
+++ b/lib/dma-direct.c
@@ -9,6 +9,7 @@
 #include <linux/scatterlist.h>
 #include <linux/dma-contiguous.h>
 #include <linux/pfn.h>
+#include <linux/set_memory.h>
 
 #define DIRECT_MAPPING_ERROR		0
 
@@ -20,6 +21,14 @@
 #define ARCH_ZONE_DMA_BITS 24
 #endif
 
+/*
+ * For AMD SEV all DMA must be to unencrypted addresses.
+ */
+static inline bool force_dma_unencrypted(void)
+{
+	return sev_active();
+}
+
 static bool
 check_addr(struct device *dev, dma_addr_t dma_addr, size_t size,
 		const char *caller)
@@ -37,7 +46,9 @@ check_addr(struct device *dev, dma_addr_t dma_addr, size_t size,
 
 static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size)
 {
-	return phys_to_dma(dev, phys) + size - 1 <= dev->coherent_dma_mask;
+	dma_addr_t addr = force_dma_unencrypted() ?
+		__phys_to_dma(dev, phys) : phys_to_dma(dev, phys);
+	return addr + size - 1 <= dev->coherent_dma_mask;
 }
 
 void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
@@ -46,6 +57,10 @@ void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle,
 	unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
 	int page_order = get_order(size);
 	struct page *page = NULL;
+	void *ret;
+
+	/* we always manually zero the memory once we are done: */
+	gfp &= ~__GFP_ZERO;
 
 	/* GFP_DMA32 and GFP_DMA are no ops without the corresponding zones: */
 	if (dev->coherent_dma_mask <= DMA_BIT_MASK(ARCH_ZONE_DMA_BITS))
@@ -78,19 +93,31 @@ again:
 
 	if (!page)
 		return NULL;
-
-	*dma_handle = phys_to_dma(dev, page_to_phys(page));
-	memset(page_address(page), 0, size);
-	return page_address(page);
+	ret = page_address(page);
+	if (force_dma_unencrypted()) {
+		set_memory_decrypted((unsigned long)ret, 1 << page_order);
+		*dma_handle = __phys_to_dma(dev, page_to_phys(page));
+	} else {
+		*dma_handle = phys_to_dma(dev, page_to_phys(page));
+	}
+	memset(ret, 0, size);
+	return ret;
 }
 
+/*
+ * NOTE: this function must never look at the dma_addr argument, because we want
+ * to be able to use it as a helper for iommu implementations as well.
+ */
 void dma_direct_free(struct device *dev, size_t size, void *cpu_addr,
 		dma_addr_t dma_addr, unsigned long attrs)
 {
 	unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT;
+	unsigned int page_order = get_order(size);
 
+	if (force_dma_unencrypted())
+		set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order);
 	if (!dma_release_from_contiguous(dev, virt_to_page(cpu_addr), count))
-		free_pages((unsigned long)cpu_addr, get_order(size));
+		free_pages((unsigned long)cpu_addr, page_order);
 }
 
 static dma_addr_t dma_direct_map_page(struct device *dev, struct page *page,
@@ -152,5 +179,6 @@ const struct dma_map_ops dma_direct_ops = {
 	.map_sg			= dma_direct_map_sg,
 	.dma_supported		= dma_direct_supported,
 	.mapping_error		= dma_direct_mapping_error,
+	.is_phys		= 1,
 };
 EXPORT_SYMBOL(dma_direct_ops);
diff --git a/lib/find_bit.c b/lib/find_bit.c
index 6ed74f78380c..ee3df93ba69a 100644
--- a/lib/find_bit.c
+++ b/lib/find_bit.c
@@ -21,22 +21,29 @@
 #include <linux/export.h>
 #include <linux/kernel.h>
 
-#if !defined(find_next_bit) || !defined(find_next_zero_bit)
+#if !defined(find_next_bit) || !defined(find_next_zero_bit) || \
+		!defined(find_next_and_bit)
 
 /*
- * This is a common helper function for find_next_bit and
- * find_next_zero_bit.  The difference is the "invert" argument, which
- * is XORed with each fetched word before searching it for one bits.
+ * This is a common helper function for find_next_bit, find_next_zero_bit, and
+ * find_next_and_bit. The differences are:
+ *  - The "invert" argument, which is XORed with each fetched word before
+ *    searching it for one bits.
+ *  - The optional "addr2", which is anded with "addr1" if present.
  */
-static unsigned long _find_next_bit(const unsigned long *addr,
-		unsigned long nbits, unsigned long start, unsigned long invert)
+static inline unsigned long _find_next_bit(const unsigned long *addr1,
+		const unsigned long *addr2, unsigned long nbits,
+		unsigned long start, unsigned long invert)
 {
 	unsigned long tmp;
 
 	if (unlikely(start >= nbits))
 		return nbits;
 
-	tmp = addr[start / BITS_PER_LONG] ^ invert;
+	tmp = addr1[start / BITS_PER_LONG];
+	if (addr2)
+		tmp &= addr2[start / BITS_PER_LONG];
+	tmp ^= invert;
 
 	/* Handle 1st word. */
 	tmp &= BITMAP_FIRST_WORD_MASK(start);
@@ -47,7 +54,10 @@ static unsigned long _find_next_bit(const unsigned long *addr,
 		if (start >= nbits)
 			return nbits;
 
-		tmp = addr[start / BITS_PER_LONG] ^ invert;
+		tmp = addr1[start / BITS_PER_LONG];
+		if (addr2)
+			tmp &= addr2[start / BITS_PER_LONG];
+		tmp ^= invert;
 	}
 
 	return min(start + __ffs(tmp), nbits);
@@ -61,7 +71,7 @@ static unsigned long _find_next_bit(const unsigned long *addr,
 unsigned long find_next_bit(const unsigned long *addr, unsigned long size,
 			    unsigned long offset)
 {
-	return _find_next_bit(addr, size, offset, 0UL);
+	return _find_next_bit(addr, NULL, size, offset, 0UL);
 }
 EXPORT_SYMBOL(find_next_bit);
 #endif
@@ -70,11 +80,21 @@ EXPORT_SYMBOL(find_next_bit);
 unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size,
 				 unsigned long offset)
 {
-	return _find_next_bit(addr, size, offset, ~0UL);
+	return _find_next_bit(addr, NULL, size, offset, ~0UL);
 }
 EXPORT_SYMBOL(find_next_zero_bit);
 #endif
 
+#if !defined(find_next_and_bit)
+unsigned long find_next_and_bit(const unsigned long *addr1,
+		const unsigned long *addr2, unsigned long size,
+		unsigned long offset)
+{
+	return _find_next_bit(addr1, addr2, size, offset, 0UL);
+}
+EXPORT_SYMBOL(find_next_and_bit);
+#endif
+
 #ifndef find_first_bit
 /*
  * Find the first set bit in a memory region.
@@ -146,15 +166,19 @@ static inline unsigned long ext2_swab(const unsigned long y)
 }
 
 #if !defined(find_next_bit_le) || !defined(find_next_zero_bit_le)
-static unsigned long _find_next_bit_le(const unsigned long *addr,
-		unsigned long nbits, unsigned long start, unsigned long invert)
+static inline unsigned long _find_next_bit_le(const unsigned long *addr1,
+		const unsigned long *addr2, unsigned long nbits,
+		unsigned long start, unsigned long invert)
 {
 	unsigned long tmp;
 
 	if (unlikely(start >= nbits))
 		return nbits;
 
-	tmp = addr[start / BITS_PER_LONG] ^ invert;
+	tmp = addr1[start / BITS_PER_LONG];
+	if (addr2)
+		tmp &= addr2[start / BITS_PER_LONG];
+	tmp ^= invert;
 
 	/* Handle 1st word. */
 	tmp &= ext2_swab(BITMAP_FIRST_WORD_MASK(start));
@@ -165,7 +189,10 @@ static unsigned long _find_next_bit_le(const unsigned long *addr,
 		if (start >= nbits)
 			return nbits;
 
-		tmp = addr[start / BITS_PER_LONG] ^ invert;
+		tmp = addr1[start / BITS_PER_LONG];
+		if (addr2)
+			tmp &= addr2[start / BITS_PER_LONG];
+		tmp ^= invert;
 	}
 
 	return min(start + __ffs(ext2_swab(tmp)), nbits);
@@ -176,7 +203,7 @@ static unsigned long _find_next_bit_le(const unsigned long *addr,
 unsigned long find_next_zero_bit_le(const void *addr, unsigned
 		long size, unsigned long offset)
 {
-	return _find_next_bit_le(addr, size, offset, ~0UL);
+	return _find_next_bit_le(addr, NULL, size, offset, ~0UL);
 }
 EXPORT_SYMBOL(find_next_zero_bit_le);
 #endif
@@ -185,7 +212,7 @@ EXPORT_SYMBOL(find_next_zero_bit_le);
 unsigned long find_next_bit_le(const void *addr, unsigned
 		long size, unsigned long offset)
 {
-	return _find_next_bit_le(addr, size, offset, 0UL);
+	return _find_next_bit_le(addr, NULL, size, offset, 0UL);
 }
 EXPORT_SYMBOL(find_next_bit_le);
 #endif
diff --git a/lib/test_find_bit.c b/lib/find_bit_benchmark.c
index f4394a36f9aa..5985a25e6cbc 100644
--- a/lib/test_find_bit.c
+++ b/lib/find_bit_benchmark.c
@@ -35,6 +35,7 @@
 #define SPARSE		500
 
 static DECLARE_BITMAP(bitmap, BITMAP_LEN) __initdata;
+static DECLARE_BITMAP(bitmap2, BITMAP_LEN) __initdata;
 
 /*
  * This is Schlemiel the Painter's algorithm. It should be called after
@@ -43,16 +44,15 @@ static DECLARE_BITMAP(bitmap, BITMAP_LEN) __initdata;
 static int __init test_find_first_bit(void *bitmap, unsigned long len)
 {
 	unsigned long i, cnt;
-	cycles_t cycles;
+	ktime_t time;
 
-	cycles = get_cycles();
+	time = ktime_get();
 	for (cnt = i = 0; i < len; cnt++) {
 		i = find_first_bit(bitmap, len);
 		__clear_bit(i, bitmap);
 	}
-	cycles = get_cycles() - cycles;
-	pr_err("find_first_bit:\t\t%llu cycles,\t%ld iterations\n",
-	       (u64)cycles, cnt);
+	time = ktime_get() - time;
+	pr_err("find_first_bit:     %18llu ns, %6ld iterations\n", time, cnt);
 
 	return 0;
 }
@@ -60,14 +60,13 @@ static int __init test_find_first_bit(void *bitmap, unsigned long len)
 static int __init test_find_next_bit(const void *bitmap, unsigned long len)
 {
 	unsigned long i, cnt;
-	cycles_t cycles;
+	ktime_t time;
 
-	cycles = get_cycles();
+	time = ktime_get();
 	for (cnt = i = 0; i < BITMAP_LEN; cnt++)
 		i = find_next_bit(bitmap, BITMAP_LEN, i) + 1;
-	cycles = get_cycles() - cycles;
-	pr_err("find_next_bit:\t\t%llu cycles,\t%ld iterations\n",
-	       (u64)cycles, cnt);
+	time = ktime_get() - time;
+	pr_err("find_next_bit:      %18llu ns, %6ld iterations\n", time, cnt);
 
 	return 0;
 }
@@ -75,14 +74,13 @@ static int __init test_find_next_bit(const void *bitmap, unsigned long len)
 static int __init test_find_next_zero_bit(const void *bitmap, unsigned long len)
 {
 	unsigned long i, cnt;
-	cycles_t cycles;
+	ktime_t time;
 
-	cycles = get_cycles();
+	time = ktime_get();
 	for (cnt = i = 0; i < BITMAP_LEN; cnt++)
 		i = find_next_zero_bit(bitmap, len, i) + 1;
-	cycles = get_cycles() - cycles;
-	pr_err("find_next_zero_bit:\t%llu cycles,\t%ld iterations\n",
-	       (u64)cycles, cnt);
+	time = ktime_get() - time;
+	pr_err("find_next_zero_bit: %18llu ns, %6ld iterations\n", time, cnt);
 
 	return 0;
 }
@@ -90,9 +88,9 @@ static int __init test_find_next_zero_bit(const void *bitmap, unsigned long len)
 static int __init test_find_last_bit(const void *bitmap, unsigned long len)
 {
 	unsigned long l, cnt = 0;
-	cycles_t cycles;
+	ktime_t time;
 
-	cycles = get_cycles();
+	time = ktime_get();
 	do {
 		cnt++;
 		l = find_last_bit(bitmap, len);
@@ -100,9 +98,24 @@ static int __init test_find_last_bit(const void *bitmap, unsigned long len)
 			break;
 		len = l;
 	} while (len);
+	time = ktime_get() - time;
+	pr_err("find_last_bit:      %18llu ns, %6ld iterations\n", time, cnt);
+
+	return 0;
+}
+
+static int __init test_find_next_and_bit(const void *bitmap,
+		const void *bitmap2, unsigned long len)
+{
+	unsigned long i, cnt;
+	cycles_t cycles;
+
+	cycles = get_cycles();
+	for (cnt = i = 0; i < BITMAP_LEN; cnt++)
+		i = find_next_and_bit(bitmap, bitmap2, BITMAP_LEN, i+1);
 	cycles = get_cycles() - cycles;
-	pr_err("find_last_bit:\t\t%llu cycles,\t%ld iterations\n",
-	       (u64)cycles, cnt);
+	pr_err("find_next_and_bit:\t\t%llu cycles, %ld iterations\n",
+		(u64)cycles, cnt);
 
 	return 0;
 }
@@ -114,31 +127,36 @@ static int __init find_bit_test(void)
 	pr_err("\nStart testing find_bit() with random-filled bitmap\n");
 
 	get_random_bytes(bitmap, sizeof(bitmap));
+	get_random_bytes(bitmap2, sizeof(bitmap2));
 
 	test_find_next_bit(bitmap, BITMAP_LEN);
 	test_find_next_zero_bit(bitmap, BITMAP_LEN);
 	test_find_last_bit(bitmap, BITMAP_LEN);
 	test_find_first_bit(bitmap, BITMAP_LEN);
+	test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN);
 
 	pr_err("\nStart testing find_bit() with sparse bitmap\n");
 
 	bitmap_zero(bitmap, BITMAP_LEN);
+	bitmap_zero(bitmap2, BITMAP_LEN);
 
-	while (nbits--)
+	while (nbits--) {
 		__set_bit(prandom_u32() % BITMAP_LEN, bitmap);
+		__set_bit(prandom_u32() % BITMAP_LEN, bitmap2);
+	}
 
 	test_find_next_bit(bitmap, BITMAP_LEN);
 	test_find_next_zero_bit(bitmap, BITMAP_LEN);
 	test_find_last_bit(bitmap, BITMAP_LEN);
 	test_find_first_bit(bitmap, BITMAP_LEN);
+	test_find_next_and_bit(bitmap, bitmap2, BITMAP_LEN);
 
-	return 0;
+	/*
+	 * Everything is OK. Return error just to let user run benchmark
+	 * again without annoying rmmod.
+	 */
+	return -EINVAL;
 }
 module_init(find_bit_test);
 
-static void __exit test_find_bit_cleanup(void)
-{
-}
-module_exit(test_find_bit_cleanup);
-
 MODULE_LICENSE("GPL");
diff --git a/lib/idr.c b/lib/idr.c
index 2593ce513a18..823b813f08f8 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -1,4 +1,5 @@
 #include <linux/bitmap.h>
+#include <linux/bug.h>
 #include <linux/export.h>
 #include <linux/idr.h>
 #include <linux/slab.h>
@@ -7,71 +8,184 @@
 DEFINE_PER_CPU(struct ida_bitmap *, ida_bitmap);
 static DEFINE_SPINLOCK(simple_ida_lock);
 
-int idr_alloc_cmn(struct idr *idr, void *ptr, unsigned long *index,
-		  unsigned long start, unsigned long end, gfp_t gfp,
-		  bool ext)
+/**
+ * idr_alloc_u32() - Allocate an ID.
+ * @idr: IDR handle.
+ * @ptr: Pointer to be associated with the new ID.
+ * @nextid: Pointer to an ID.
+ * @max: The maximum ID to allocate (inclusive).
+ * @gfp: Memory allocation flags.
+ *
+ * Allocates an unused ID in the range specified by @nextid and @max.
+ * Note that @max is inclusive whereas the @end parameter to idr_alloc()
+ * is exclusive.  The new ID is assigned to @nextid before the pointer
+ * is inserted into the IDR, so if @nextid points into the object pointed
+ * to by @ptr, a concurrent lookup will not find an uninitialised ID.
+ *
+ * The caller should provide their own locking to ensure that two
+ * concurrent modifications to the IDR are not possible.  Read-only
+ * accesses to the IDR may be done under the RCU read lock or may
+ * exclude simultaneous writers.
+ *
+ * Return: 0 if an ID was allocated, -ENOMEM if memory allocation failed,
+ * or -ENOSPC if no free IDs could be found.  If an error occurred,
+ * @nextid is unchanged.
+ */
+int idr_alloc_u32(struct idr *idr, void *ptr, u32 *nextid,
+			unsigned long max, gfp_t gfp)
 {
 	struct radix_tree_iter iter;
 	void __rcu **slot;
+	unsigned int base = idr->idr_base;
+	unsigned int id = *nextid;
 
 	if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr)))
 		return -EINVAL;
+	if (WARN_ON_ONCE(!(idr->idr_rt.gfp_mask & ROOT_IS_IDR)))
+		idr->idr_rt.gfp_mask |= IDR_RT_MARKER;
 
-	radix_tree_iter_init(&iter, start);
-	if (ext)
-		slot = idr_get_free_ext(&idr->idr_rt, &iter, gfp, end);
-	else
-		slot = idr_get_free(&idr->idr_rt, &iter, gfp, end);
+	id = (id < base) ? 0 : id - base;
+	radix_tree_iter_init(&iter, id);
+	slot = idr_get_free(&idr->idr_rt, &iter, gfp, max - base);
 	if (IS_ERR(slot))
 		return PTR_ERR(slot);
 
+	*nextid = iter.index + base;
+	/* there is a memory barrier inside radix_tree_iter_replace() */
 	radix_tree_iter_replace(&idr->idr_rt, &iter, slot, ptr);
 	radix_tree_iter_tag_clear(&idr->idr_rt, &iter, IDR_FREE);
 
-	if (index)
-		*index = iter.index;
 	return 0;
 }
-EXPORT_SYMBOL_GPL(idr_alloc_cmn);
+EXPORT_SYMBOL_GPL(idr_alloc_u32);
 
 /**
- * idr_alloc_cyclic - allocate new idr entry in a cyclical fashion
- * @idr: idr handle
- * @ptr: pointer to be associated with the new id
- * @start: the minimum id (inclusive)
- * @end: the maximum id (exclusive)
- * @gfp: memory allocation flags
- *
- * Allocates an ID larger than the last ID allocated if one is available.
- * If not, it will attempt to allocate the smallest ID that is larger or
- * equal to @start.
+ * idr_alloc() - Allocate an ID.
+ * @idr: IDR handle.
+ * @ptr: Pointer to be associated with the new ID.
+ * @start: The minimum ID (inclusive).
+ * @end: The maximum ID (exclusive).
+ * @gfp: Memory allocation flags.
+ *
+ * Allocates an unused ID in the range specified by @start and @end.  If
+ * @end is <= 0, it is treated as one larger than %INT_MAX.  This allows
+ * callers to use @start + N as @end as long as N is within integer range.
+ *
+ * The caller should provide their own locking to ensure that two
+ * concurrent modifications to the IDR are not possible.  Read-only
+ * accesses to the IDR may be done under the RCU read lock or may
+ * exclude simultaneous writers.
+ *
+ * Return: The newly allocated ID, -ENOMEM if memory allocation failed,
+ * or -ENOSPC if no free IDs could be found.
  */
-int idr_alloc_cyclic(struct idr *idr, void *ptr, int start, int end, gfp_t gfp)
+int idr_alloc(struct idr *idr, void *ptr, int start, int end, gfp_t gfp)
 {
-	int id, curr = idr->idr_next;
+	u32 id = start;
+	int ret;
+
+	if (WARN_ON_ONCE(start < 0))
+		return -EINVAL;
+
+	ret = idr_alloc_u32(idr, ptr, &id, end > 0 ? end - 1 : INT_MAX, gfp);
+	if (ret)
+		return ret;
+
+	return id;
+}
+EXPORT_SYMBOL_GPL(idr_alloc);
 
-	if (curr < start)
-		curr = start;
+/**
+ * idr_alloc_cyclic() - Allocate an ID cyclically.
+ * @idr: IDR handle.
+ * @ptr: Pointer to be associated with the new ID.
+ * @start: The minimum ID (inclusive).
+ * @end: The maximum ID (exclusive).
+ * @gfp: Memory allocation flags.
+ *
+ * Allocates an unused ID in the range specified by @nextid and @end.  If
+ * @end is <= 0, it is treated as one larger than %INT_MAX.  This allows
+ * callers to use @start + N as @end as long as N is within integer range.
+ * The search for an unused ID will start at the last ID allocated and will
+ * wrap around to @start if no free IDs are found before reaching @end.
+ *
+ * The caller should provide their own locking to ensure that two
+ * concurrent modifications to the IDR are not possible.  Read-only
+ * accesses to the IDR may be done under the RCU read lock or may
+ * exclude simultaneous writers.
+ *
+ * Return: The newly allocated ID, -ENOMEM if memory allocation failed,
+ * or -ENOSPC if no free IDs could be found.
+ */
+int idr_alloc_cyclic(struct idr *idr, void *ptr, int start, int end, gfp_t gfp)
+{
+	u32 id = idr->idr_next;
+	int err, max = end > 0 ? end - 1 : INT_MAX;
 
-	id = idr_alloc(idr, ptr, curr, end, gfp);
-	if ((id == -ENOSPC) && (curr > start))
-		id = idr_alloc(idr, ptr, start, curr, gfp);
+	if ((int)id < start)
+		id = start;
 
-	if (id >= 0)
-		idr->idr_next = id + 1U;
+	err = idr_alloc_u32(idr, ptr, &id, max, gfp);
+	if ((err == -ENOSPC) && (id > start)) {
+		id = start;
+		err = idr_alloc_u32(idr, ptr, &id, max, gfp);
+	}
+	if (err)
+		return err;
 
+	idr->idr_next = id + 1;
 	return id;
 }
 EXPORT_SYMBOL(idr_alloc_cyclic);
 
 /**
- * idr_for_each - iterate through all stored pointers
- * @idr: idr handle
- * @fn: function to be called for each pointer
- * @data: data passed to callback function
+ * idr_remove() - Remove an ID from the IDR.
+ * @idr: IDR handle.
+ * @id: Pointer ID.
+ *
+ * Removes this ID from the IDR.  If the ID was not previously in the IDR,
+ * this function returns %NULL.
+ *
+ * Since this function modifies the IDR, the caller should provide their
+ * own locking to ensure that concurrent modification of the same IDR is
+ * not possible.
+ *
+ * Return: The pointer formerly associated with this ID.
+ */
+void *idr_remove(struct idr *idr, unsigned long id)
+{
+	return radix_tree_delete_item(&idr->idr_rt, id - idr->idr_base, NULL);
+}
+EXPORT_SYMBOL_GPL(idr_remove);
+
+/**
+ * idr_find() - Return pointer for given ID.
+ * @idr: IDR handle.
+ * @id: Pointer ID.
+ *
+ * Looks up the pointer associated with this ID.  A %NULL pointer may
+ * indicate that @id is not allocated or that the %NULL pointer was
+ * associated with this ID.
+ *
+ * This function can be called under rcu_read_lock(), given that the leaf
+ * pointers lifetimes are correctly managed.
+ *
+ * Return: The pointer associated with this ID.
+ */
+void *idr_find(const struct idr *idr, unsigned long id)
+{
+	return radix_tree_lookup(&idr->idr_rt, id - idr->idr_base);
+}
+EXPORT_SYMBOL_GPL(idr_find);
+
+/**
+ * idr_for_each() - Iterate through all stored pointers.
+ * @idr: IDR handle.
+ * @fn: Function to be called for each pointer.
+ * @data: Data passed to callback function.
  *
  * The callback function will be called for each entry in @idr, passing
- * the id, the pointer and the data pointer passed to this function.
+ * the ID, the entry and @data.
  *
  * If @fn returns anything other than %0, the iteration stops and that
  * value is returned from this function.
@@ -86,9 +200,15 @@ int idr_for_each(const struct idr *idr,
 {
 	struct radix_tree_iter iter;
 	void __rcu **slot;
+	int base = idr->idr_base;
 
 	radix_tree_for_each_slot(slot, &idr->idr_rt, &iter, 0) {
-		int ret = fn(iter.index, rcu_dereference_raw(*slot), data);
+		int ret;
+		unsigned long id = iter.index + base;
+
+		if (WARN_ON_ONCE(id > INT_MAX))
+			break;
+		ret = fn(id, rcu_dereference_raw(*slot), data);
 		if (ret)
 			return ret;
 	}
@@ -98,9 +218,9 @@ int idr_for_each(const struct idr *idr,
 EXPORT_SYMBOL(idr_for_each);
 
 /**
- * idr_get_next - Find next populated entry
- * @idr: idr handle
- * @nextid: Pointer to lowest possible ID to return
+ * idr_get_next() - Find next populated entry.
+ * @idr: IDR handle.
+ * @nextid: Pointer to an ID.
  *
  * Returns the next populated entry in the tree with an ID greater than
  * or equal to the value pointed to by @nextid.  On exit, @nextid is updated
@@ -111,35 +231,55 @@ void *idr_get_next(struct idr *idr, int *nextid)
 {
 	struct radix_tree_iter iter;
 	void __rcu **slot;
+	unsigned long base = idr->idr_base;
+	unsigned long id = *nextid;
 
-	slot = radix_tree_iter_find(&idr->idr_rt, &iter, *nextid);
+	id = (id < base) ? 0 : id - base;
+	slot = radix_tree_iter_find(&idr->idr_rt, &iter, id);
 	if (!slot)
 		return NULL;
+	id = iter.index + base;
+
+	if (WARN_ON_ONCE(id > INT_MAX))
+		return NULL;
 
-	*nextid = iter.index;
+	*nextid = id;
 	return rcu_dereference_raw(*slot);
 }
 EXPORT_SYMBOL(idr_get_next);
 
-void *idr_get_next_ext(struct idr *idr, unsigned long *nextid)
+/**
+ * idr_get_next_ul() - Find next populated entry.
+ * @idr: IDR handle.
+ * @nextid: Pointer to an ID.
+ *
+ * Returns the next populated entry in the tree with an ID greater than
+ * or equal to the value pointed to by @nextid.  On exit, @nextid is updated
+ * to the ID of the found value.  To use in a loop, the value pointed to by
+ * nextid must be incremented by the user.
+ */
+void *idr_get_next_ul(struct idr *idr, unsigned long *nextid)
 {
 	struct radix_tree_iter iter;
 	void __rcu **slot;
+	unsigned long base = idr->idr_base;
+	unsigned long id = *nextid;
 
-	slot = radix_tree_iter_find(&idr->idr_rt, &iter, *nextid);
+	id = (id < base) ? 0 : id - base;
+	slot = radix_tree_iter_find(&idr->idr_rt, &iter, id);
 	if (!slot)
 		return NULL;
 
-	*nextid = iter.index;
+	*nextid = iter.index + base;
 	return rcu_dereference_raw(*slot);
 }
-EXPORT_SYMBOL(idr_get_next_ext);
+EXPORT_SYMBOL(idr_get_next_ul);
 
 /**
- * idr_replace - replace pointer for given id
- * @idr: idr handle
- * @ptr: New pointer to associate with the ID
- * @id: Lookup key
+ * idr_replace() - replace pointer for given ID.
+ * @idr: IDR handle.
+ * @ptr: New pointer to associate with the ID.
+ * @id: ID to change.
  *
  * Replace the pointer registered with an ID and return the old value.
  * This function can be called under the RCU read lock concurrently with
@@ -147,18 +287,9 @@ EXPORT_SYMBOL(idr_get_next_ext);
  * the one being replaced!).
  *
  * Returns: the old value on success.  %-ENOENT indicates that @id was not
- * found.  %-EINVAL indicates that @id or @ptr were not valid.
+ * found.  %-EINVAL indicates that @ptr was not valid.
  */
-void *idr_replace(struct idr *idr, void *ptr, int id)
-{
-	if (id < 0)
-		return ERR_PTR(-EINVAL);
-
-	return idr_replace_ext(idr, ptr, id);
-}
-EXPORT_SYMBOL(idr_replace);
-
-void *idr_replace_ext(struct idr *idr, void *ptr, unsigned long id)
+void *idr_replace(struct idr *idr, void *ptr, unsigned long id)
 {
 	struct radix_tree_node *node;
 	void __rcu **slot = NULL;
@@ -166,6 +297,7 @@ void *idr_replace_ext(struct idr *idr, void *ptr, unsigned long id)
 
 	if (WARN_ON_ONCE(radix_tree_is_internal_node(ptr)))
 		return ERR_PTR(-EINVAL);
+	id -= idr->idr_base;
 
 	entry = __radix_tree_lookup(&idr->idr_rt, id, &node, &slot);
 	if (!slot || radix_tree_tag_get(&idr->idr_rt, id, IDR_FREE))
@@ -175,7 +307,7 @@ void *idr_replace_ext(struct idr *idr, void *ptr, unsigned long id)
 
 	return entry;
 }
-EXPORT_SYMBOL(idr_replace_ext);
+EXPORT_SYMBOL(idr_replace);
 
 /**
  * DOC: IDA description
@@ -235,7 +367,7 @@ EXPORT_SYMBOL(idr_replace_ext);
  * bitmap, which is excessive.
  */
 
-#define IDA_MAX (0x80000000U / IDA_BITMAP_BITS)
+#define IDA_MAX (0x80000000U / IDA_BITMAP_BITS - 1)
 
 /**
  * ida_get_new_above - allocate new ID above or equal to a start id
@@ -300,7 +432,6 @@ int ida_get_new_above(struct ida *ida, int start, int *id)
 			bitmap = this_cpu_xchg(ida_bitmap, NULL);
 			if (!bitmap)
 				return -EAGAIN;
-			memset(bitmap, 0, sizeof(*bitmap));
 			bitmap->bitmap[0] = tmp >> RADIX_TREE_EXCEPTIONAL_SHIFT;
 			rcu_assign_pointer(*slot, bitmap);
 		}
@@ -333,7 +464,6 @@ int ida_get_new_above(struct ida *ida, int start, int *id)
 			bitmap = this_cpu_xchg(ida_bitmap, NULL);
 			if (!bitmap)
 				return -EAGAIN;
-			memset(bitmap, 0, sizeof(*bitmap));
 			__set_bit(bit, bitmap->bitmap);
 			radix_tree_iter_replace(root, &iter, slot, bitmap);
 		}
diff --git a/lib/int_sqrt.c b/lib/int_sqrt.c
index e2d329099bf7..14436f4ca6bd 100644
--- a/lib/int_sqrt.c
+++ b/lib/int_sqrt.c
@@ -38,3 +38,33 @@ unsigned long int_sqrt(unsigned long x)
 	return y;
 }
 EXPORT_SYMBOL(int_sqrt);
+
+#if BITS_PER_LONG < 64
+/**
+ * int_sqrt64 - strongly typed int_sqrt function when minimum 64 bit input
+ * is expected.
+ * @x: 64bit integer of which to calculate the sqrt
+ */
+u32 int_sqrt64(u64 x)
+{
+	u64 b, m, y = 0;
+
+	if (x <= ULONG_MAX)
+		return int_sqrt((unsigned long) x);
+
+	m = 1ULL << (fls64(x) & ~1ULL);
+	while (m != 0) {
+		b = y + m;
+		y >>= 1;
+
+		if (x >= b) {
+			x -= b;
+			y += m;
+		}
+		m >>= 2;
+	}
+
+	return y;
+}
+EXPORT_SYMBOL(int_sqrt64);
+#endif
diff --git a/lib/ioremap.c b/lib/ioremap.c
index b808a390e4c3..54e5bbaa3200 100644
--- a/lib/ioremap.c
+++ b/lib/ioremap.c
@@ -91,7 +91,8 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr,
 
 		if (ioremap_pmd_enabled() &&
 		    ((next - addr) == PMD_SIZE) &&
-		    IS_ALIGNED(phys_addr + addr, PMD_SIZE)) {
+		    IS_ALIGNED(phys_addr + addr, PMD_SIZE) &&
+		    pmd_free_pte_page(pmd)) {
 			if (pmd_set_huge(pmd, phys_addr + addr, prot))
 				continue;
 		}
@@ -117,7 +118,8 @@ static inline int ioremap_pud_range(p4d_t *p4d, unsigned long addr,
 
 		if (ioremap_pud_enabled() &&
 		    ((next - addr) == PUD_SIZE) &&
-		    IS_ALIGNED(phys_addr + addr, PUD_SIZE)) {
+		    IS_ALIGNED(phys_addr + addr, PUD_SIZE) &&
+		    pud_free_pmd_page(pud)) {
 			if (pud_set_huge(pud, phys_addr + addr, prot))
 				continue;
 		}
diff --git a/lib/kfifo.c b/lib/kfifo.c
index 90ba1eb1df06..b0f757bf7213 100644
--- a/lib/kfifo.c
+++ b/lib/kfifo.c
@@ -39,7 +39,7 @@ int __kfifo_alloc(struct __kfifo *fifo, unsigned int size,
 		size_t esize, gfp_t gfp_mask)
 {
 	/*
-	 * round down to the next power of 2, since our 'let the indices
+	 * round up to the next power of 2, since our 'let the indices
 	 * wrap' technique works only in this case.
 	 */
 	size = roundup_pow_of_two(size);
diff --git a/lib/kobject.c b/lib/kobject.c
index afd5a3fc6123..e1d1f290bf35 100644
--- a/lib/kobject.c
+++ b/lib/kobject.c
@@ -204,8 +204,9 @@ static int kobject_add_internal(struct kobject *kobj)
 		return -ENOENT;
 
 	if (!kobj->name || !kobj->name[0]) {
-		WARN(1, "kobject: (%p): attempted to be registered with empty "
-			 "name!\n", kobj);
+		WARN(1,
+		     "kobject: (%p): attempted to be registered with empty name!\n",
+		     kobj);
 		return -EINVAL;
 	}
 
@@ -232,9 +233,8 @@ static int kobject_add_internal(struct kobject *kobj)
 
 		/* be noisy on error issues */
 		if (error == -EEXIST)
-			WARN(1, "%s failed for %s with "
-			     "-EEXIST, don't try to register things with "
-			     "the same name in the same directory.\n",
+			WARN(1,
+			     "%s failed for %s with -EEXIST, don't try to register things with the same name in the same directory.\n",
 			     __func__, kobject_name(kobj));
 		else
 			WARN(1, "%s failed for %s (error: %d parent: %s)\n",
@@ -334,8 +334,8 @@ void kobject_init(struct kobject *kobj, struct kobj_type *ktype)
 	}
 	if (kobj->state_initialized) {
 		/* do not error out as sometimes we can recover */
-		printk(KERN_ERR "kobject (%p): tried to init an initialized "
-		       "object, something is seriously wrong.\n", kobj);
+		pr_err("kobject (%p): tried to init an initialized object, something is seriously wrong.\n",
+		       kobj);
 		dump_stack();
 	}
 
@@ -344,7 +344,7 @@ void kobject_init(struct kobject *kobj, struct kobj_type *ktype)
 	return;
 
 error:
-	printk(KERN_ERR "kobject (%p): %s\n", kobj, err_str);
+	pr_err("kobject (%p): %s\n", kobj, err_str);
 	dump_stack();
 }
 EXPORT_SYMBOL(kobject_init);
@@ -357,7 +357,7 @@ static __printf(3, 0) int kobject_add_varg(struct kobject *kobj,
 
 	retval = kobject_set_name_vargs(kobj, fmt, vargs);
 	if (retval) {
-		printk(KERN_ERR "kobject: can not set name properly!\n");
+		pr_err("kobject: can not set name properly!\n");
 		return retval;
 	}
 	kobj->parent = parent;
@@ -399,8 +399,7 @@ int kobject_add(struct kobject *kobj, struct kobject *parent,
 		return -EINVAL;
 
 	if (!kobj->state_initialized) {
-		printk(KERN_ERR "kobject '%s' (%p): tried to add an "
-		       "uninitialized object, something is seriously wrong.\n",
+		pr_err("kobject '%s' (%p): tried to add an uninitialized object, something is seriously wrong.\n",
 		       kobject_name(kobj), kobj);
 		dump_stack();
 		return -EINVAL;
@@ -590,9 +589,9 @@ struct kobject *kobject_get(struct kobject *kobj)
 {
 	if (kobj) {
 		if (!kobj->state_initialized)
-			WARN(1, KERN_WARNING "kobject: '%s' (%p): is not "
-			       "initialized, yet kobject_get() is being "
-			       "called.\n", kobject_name(kobj), kobj);
+			WARN(1, KERN_WARNING
+				"kobject: '%s' (%p): is not initialized, yet kobject_get() is being called.\n",
+			     kobject_name(kobj), kobj);
 		kref_get(&kobj->kref);
 	}
 	return kobj;
@@ -622,8 +621,7 @@ static void kobject_cleanup(struct kobject *kobj)
 		 kobject_name(kobj), kobj, __func__, kobj->parent);
 
 	if (t && !t->release)
-		pr_debug("kobject: '%s' (%p): does not have a release() "
-			 "function, it is broken and must be fixed.\n",
+		pr_debug("kobject: '%s' (%p): does not have a release() function, it is broken and must be fixed.\n",
 			 kobject_name(kobj), kobj);
 
 	/* send "remove" if the caller did not do it but sent "add" */
@@ -686,9 +684,9 @@ void kobject_put(struct kobject *kobj)
 {
 	if (kobj) {
 		if (!kobj->state_initialized)
-			WARN(1, KERN_WARNING "kobject: '%s' (%p): is not "
-			       "initialized, yet kobject_put() is being "
-			       "called.\n", kobject_name(kobj), kobj);
+			WARN(1, KERN_WARNING
+				"kobject: '%s' (%p): is not initialized, yet kobject_put() is being called.\n",
+			     kobject_name(kobj), kobj);
 		kref_put(&kobj->kref, kobject_release);
 	}
 }
@@ -752,8 +750,7 @@ struct kobject *kobject_create_and_add(const char *name, struct kobject *parent)
 
 	retval = kobject_add(kobj, parent, "%s", name);
 	if (retval) {
-		printk(KERN_WARNING "%s: kobject_add error: %d\n",
-		       __func__, retval);
+		pr_warn("%s: kobject_add error: %d\n", __func__, retval);
 		kobject_put(kobj);
 		kobj = NULL;
 	}
diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c
index 9fe6ec8fda28..15ea216a67ce 100644
--- a/lib/kobject_uevent.c
+++ b/lib/kobject_uevent.c
@@ -25,6 +25,7 @@
 #include <linux/uuid.h>
 #include <linux/ctype.h>
 #include <net/sock.h>
+#include <net/netlink.h>
 #include <net/net_namespace.h>
 
 
@@ -32,11 +33,13 @@ u64 uevent_seqnum;
 #ifdef CONFIG_UEVENT_HELPER
 char uevent_helper[UEVENT_HELPER_PATH_LEN] = CONFIG_UEVENT_HELPER_PATH;
 #endif
-#ifdef CONFIG_NET
+
 struct uevent_sock {
 	struct list_head list;
 	struct sock *sk;
 };
+
+#ifdef CONFIG_NET
 static LIST_HEAD(uevent_sock_list);
 #endif
 
@@ -602,12 +605,88 @@ int add_uevent_var(struct kobj_uevent_env *env, const char *format, ...)
 EXPORT_SYMBOL_GPL(add_uevent_var);
 
 #if defined(CONFIG_NET)
+static int uevent_net_broadcast(struct sock *usk, struct sk_buff *skb,
+				struct netlink_ext_ack *extack)
+{
+	/* u64 to chars: 2^64 - 1 = 21 chars */
+	char buf[sizeof("SEQNUM=") + 21];
+	struct sk_buff *skbc;
+	int ret;
+
+	/* bump and prepare sequence number */
+	ret = snprintf(buf, sizeof(buf), "SEQNUM=%llu", ++uevent_seqnum);
+	if (ret < 0 || (size_t)ret >= sizeof(buf))
+		return -ENOMEM;
+	ret++;
+
+	/* verify message does not overflow */
+	if ((skb->len + ret) > UEVENT_BUFFER_SIZE) {
+		NL_SET_ERR_MSG(extack, "uevent message too big");
+		return -EINVAL;
+	}
+
+	/* copy skb and extend to accommodate sequence number */
+	skbc = skb_copy_expand(skb, 0, ret, GFP_KERNEL);
+	if (!skbc)
+		return -ENOMEM;
+
+	/* append sequence number */
+	skb_put_data(skbc, buf, ret);
+
+	/* remove msg header */
+	skb_pull(skbc, NLMSG_HDRLEN);
+
+	/* set portid 0 to inform userspace message comes from kernel */
+	NETLINK_CB(skbc).portid = 0;
+	NETLINK_CB(skbc).dst_group = 1;
+
+	ret = netlink_broadcast(usk, skbc, 0, 1, GFP_KERNEL);
+	/* ENOBUFS should be handled in userspace */
+	if (ret == -ENOBUFS || ret == -ESRCH)
+		ret = 0;
+
+	return ret;
+}
+
+static int uevent_net_rcv_skb(struct sk_buff *skb, struct nlmsghdr *nlh,
+			      struct netlink_ext_ack *extack)
+{
+	struct net *net;
+	int ret;
+
+	if (!nlmsg_data(nlh))
+		return -EINVAL;
+
+	/*
+	 * Verify that we are allowed to send messages to the target
+	 * network namespace. The caller must have CAP_SYS_ADMIN in the
+	 * owning user namespace of the target network namespace.
+	 */
+	net = sock_net(NETLINK_CB(skb).sk);
+	if (!netlink_ns_capable(skb, net->user_ns, CAP_SYS_ADMIN)) {
+		NL_SET_ERR_MSG(extack, "missing CAP_SYS_ADMIN capability");
+		return -EPERM;
+	}
+
+	mutex_lock(&uevent_sock_mutex);
+	ret = uevent_net_broadcast(net->uevent_sock->sk, skb, extack);
+	mutex_unlock(&uevent_sock_mutex);
+
+	return ret;
+}
+
+static void uevent_net_rcv(struct sk_buff *skb)
+{
+	netlink_rcv_skb(skb, &uevent_net_rcv_skb);
+}
+
 static int uevent_net_init(struct net *net)
 {
 	struct uevent_sock *ue_sk;
 	struct netlink_kernel_cfg cfg = {
 		.groups	= 1,
-		.flags	= NL_CFG_F_NONROOT_RECV,
+		.input = uevent_net_rcv,
+		.flags	= NL_CFG_F_NONROOT_RECV
 	};
 
 	ue_sk = kzalloc(sizeof(*ue_sk), GFP_KERNEL);
@@ -621,6 +700,9 @@ static int uevent_net_init(struct net *net)
 		kfree(ue_sk);
 		return -ENODEV;
 	}
+
+	net->uevent_sock = ue_sk;
+
 	mutex_lock(&uevent_sock_mutex);
 	list_add_tail(&ue_sk->list, &uevent_sock_list);
 	mutex_unlock(&uevent_sock_mutex);
@@ -629,17 +711,9 @@ static int uevent_net_init(struct net *net)
 
 static void uevent_net_exit(struct net *net)
 {
-	struct uevent_sock *ue_sk;
+	struct uevent_sock *ue_sk = net->uevent_sock;
 
 	mutex_lock(&uevent_sock_mutex);
-	list_for_each_entry(ue_sk, &uevent_sock_list, list) {
-		if (sock_net(ue_sk->sk) == net)
-			goto found;
-	}
-	mutex_unlock(&uevent_sock_mutex);
-	return;
-
-found:
 	list_del(&ue_sk->list);
 	mutex_unlock(&uevent_sock_mutex);
 
diff --git a/lib/libcrc32c.c b/lib/libcrc32c.c
index 9f79547d1b97..f0a2934605bf 100644
--- a/lib/libcrc32c.c
+++ b/lib/libcrc32c.c
@@ -71,6 +71,12 @@ static void __exit libcrc32c_mod_fini(void)
 	crypto_free_shash(tfm);
 }
 
+const char *crc32c_impl(void)
+{
+	return crypto_shash_driver_name(tfm);
+}
+EXPORT_SYMBOL(crc32c_impl);
+
 module_init(libcrc32c_mod_init);
 module_exit(libcrc32c_mod_fini);
 
diff --git a/lib/pci_iomap.c b/lib/pci_iomap.c
index c10fba461454..2d3eb1cb73b8 100644
--- a/lib/pci_iomap.c
+++ b/lib/pci_iomap.c
@@ -1,3 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
 /*
  * Implement the default iomap interfaces
  *
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index 30e7dd88148b..9f96fa7bc000 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -322,6 +322,8 @@ EXPORT_SYMBOL_GPL(percpu_ref_switch_to_percpu);
  * This function normally doesn't block and can be called from any context
  * but it may block if @confirm_kill is specified and @ref is in the
  * process of switching to atomic mode by percpu_ref_switch_to_atomic().
+ *
+ * There are no implied RCU grace periods between kill and release.
  */
 void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 				 percpu_ref_func_t *confirm_kill)
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index c8d55565fafa..8e00138d593f 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -24,6 +24,7 @@
 
 #include <linux/bitmap.h>
 #include <linux/bitops.h>
+#include <linux/bug.h>
 #include <linux/cpu.h>
 #include <linux/errno.h>
 #include <linux/export.h>
@@ -2124,7 +2125,7 @@ int ida_pre_get(struct ida *ida, gfp_t gfp)
 		preempt_enable();
 
 	if (!this_cpu_read(ida_bitmap)) {
-		struct ida_bitmap *bitmap = kmalloc(sizeof(*bitmap), gfp);
+		struct ida_bitmap *bitmap = kzalloc(sizeof(*bitmap), gfp);
 		if (!bitmap)
 			return 0;
 		if (this_cpu_cmpxchg(ida_bitmap, NULL, bitmap))
@@ -2135,7 +2136,7 @@ int ida_pre_get(struct ida *ida, gfp_t gfp)
 }
 EXPORT_SYMBOL(ida_pre_get);
 
-void __rcu **idr_get_free_cmn(struct radix_tree_root *root,
+void __rcu **idr_get_free(struct radix_tree_root *root,
 			      struct radix_tree_iter *iter, gfp_t gfp,
 			      unsigned long max)
 {
diff --git a/lib/raid6/Makefile b/lib/raid6/Makefile
index 4add700ddfe3..44d6b46df051 100644
--- a/lib/raid6/Makefile
+++ b/lib/raid6/Makefile
@@ -7,7 +7,6 @@ raid6_pq-y	+= algos.o recov.o tables.o int1.o int2.o int4.o \
 raid6_pq-$(CONFIG_X86) += recov_ssse3.o recov_avx2.o mmx.o sse1.o sse2.o avx2.o avx512.o recov_avx512.o
 raid6_pq-$(CONFIG_ALTIVEC) += altivec1.o altivec2.o altivec4.o altivec8.o
 raid6_pq-$(CONFIG_KERNEL_MODE_NEON) += neon.o neon1.o neon2.o neon4.o neon8.o recov_neon.o recov_neon_inner.o
-raid6_pq-$(CONFIG_TILEGX) += tilegx8.o
 raid6_pq-$(CONFIG_S390) += s390vx8.o recov_s390xc.o
 
 hostprogs-y	+= mktables
@@ -115,11 +114,6 @@ $(obj)/neon8.c:   UNROLL := 8
 $(obj)/neon8.c:   $(src)/neon.uc $(src)/unroll.awk FORCE
 	$(call if_changed,unroll)
 
-targets += tilegx8.c
-$(obj)/tilegx8.c:   UNROLL := 8
-$(obj)/tilegx8.c:   $(src)/tilegx.uc $(src)/unroll.awk FORCE
-	$(call if_changed,unroll)
-
 targets += s390vx8.c
 $(obj)/s390vx8.c:   UNROLL := 8
 $(obj)/s390vx8.c:   $(src)/s390vx.uc $(src)/unroll.awk FORCE
diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c
index 476994723258..c65aa80d67ed 100644
--- a/lib/raid6/algos.c
+++ b/lib/raid6/algos.c
@@ -75,9 +75,6 @@ const struct raid6_calls * const raid6_algos[] = {
 	&raid6_altivec4,
 	&raid6_altivec8,
 #endif
-#if defined(CONFIG_TILEGX)
-	&raid6_tilegx8,
-#endif
 #if defined(CONFIG_S390)
 	&raid6_s390vx8,
 #endif
diff --git a/lib/raid6/sse2.c b/lib/raid6/sse2.c
index 1d2276b007ee..8191e1d0d2fb 100644
--- a/lib/raid6/sse2.c
+++ b/lib/raid6/sse2.c
@@ -91,7 +91,7 @@ static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs)
 
 static void raid6_sse21_xor_syndrome(int disks, int start, int stop,
 				     size_t bytes, void **ptrs)
- {
+{
 	u8 **dptr = (u8 **)ptrs;
 	u8 *p, *q;
 	int d, z, z0;
@@ -200,9 +200,9 @@ static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs)
 	kernel_fpu_end();
 }
 
- static void raid6_sse22_xor_syndrome(int disks, int start, int stop,
+static void raid6_sse22_xor_syndrome(int disks, int start, int stop,
 				     size_t bytes, void **ptrs)
- {
+{
 	u8 **dptr = (u8 **)ptrs;
 	u8 *p, *q;
 	int d, z, z0;
@@ -265,7 +265,7 @@ static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs)
 
 	asm volatile("sfence" : : : "memory");
 	kernel_fpu_end();
- }
+}
 
 const struct raid6_calls raid6_sse2x2 = {
 	raid6_sse22_gen_syndrome,
@@ -366,9 +366,9 @@ static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs)
 	kernel_fpu_end();
 }
 
- static void raid6_sse24_xor_syndrome(int disks, int start, int stop,
+static void raid6_sse24_xor_syndrome(int disks, int start, int stop,
 				     size_t bytes, void **ptrs)
- {
+{
 	u8 **dptr = (u8 **)ptrs;
 	u8 *p, *q;
 	int d, z, z0;
@@ -471,7 +471,7 @@ static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs)
 	}
 	asm volatile("sfence" : : : "memory");
 	kernel_fpu_end();
- }
+}
 
 
 const struct raid6_calls raid6_sse2x4 = {
diff --git a/lib/raid6/test/Makefile b/lib/raid6/test/Makefile
index be1010bdc435..fabc477b1417 100644
--- a/lib/raid6/test/Makefile
+++ b/lib/raid6/test/Makefile
@@ -51,9 +51,6 @@ else
                 OBJS += altivec1.o altivec2.o altivec4.o altivec8.o
         endif
 endif
-ifeq ($(ARCH),tilegx)
-OBJS += tilegx8.o
-endif
 
 .c.o:
 	$(CC) $(CFLAGS) -c -o $@ $<
@@ -116,15 +113,11 @@ int16.c: int.uc ../unroll.awk
 int32.c: int.uc ../unroll.awk
 	$(AWK) ../unroll.awk -vN=32 < int.uc > $@
 
-tilegx8.c: tilegx.uc ../unroll.awk
-	$(AWK) ../unroll.awk -vN=8 < tilegx.uc > $@
-
 tables.c: mktables
 	./mktables > tables.c
 
 clean:
 	rm -f *.o *.a mktables mktables.c *.uc int*.c altivec*.c neon*.c tables.c raid6test
-	rm -f tilegx*.c
 
 spotless: clean
 	rm -f *~
diff --git a/lib/raid6/tilegx.uc b/lib/raid6/tilegx.uc
deleted file mode 100644
index 2dd291a11264..000000000000
--- a/lib/raid6/tilegx.uc
+++ /dev/null
@@ -1,87 +0,0 @@
-/* -*- linux-c -*- ------------------------------------------------------- *
- *
- *   Copyright 2002 H. Peter Anvin - All Rights Reserved
- *   Copyright 2012 Tilera Corporation - All Rights Reserved
- *
- *   This program is free software; you can redistribute it and/or modify
- *   it under the terms of the GNU General Public License as published by
- *   the Free Software Foundation, Inc., 53 Temple Place Ste 330,
- *   Boston MA 02111-1307, USA; either version 2 of the License, or
- *   (at your option) any later version; incorporated herein by reference.
- *
- * ----------------------------------------------------------------------- */
-
-/*
- * tilegx$#.c
- *
- * $#-way unrolled TILE-Gx SIMD for RAID-6 math.
- *
- * This file is postprocessed using unroll.awk.
- *
- */
-
-#include <linux/raid/pq.h>
-
-/* Create 8 byte copies of constant byte */
-# define NBYTES(x) (__insn_v1addi(0, x))
-# define NSIZE  8
-
-/*
- * The SHLBYTE() operation shifts each byte left by 1, *not*
- * rolling over into the next byte
- */
-static inline __attribute_const__ u64 SHLBYTE(u64 v)
-{
-	/* Vector One Byte Shift Left Immediate. */
-	return __insn_v1shli(v, 1);
-}
-
-/*
- * The MASK() operation returns 0xFF in any byte for which the high
- * bit is 1, 0x00 for any byte for which the high bit is 0.
- */
-static inline __attribute_const__ u64 MASK(u64 v)
-{
-	/* Vector One Byte Shift Right Signed Immediate. */
-	return __insn_v1shrsi(v, 7);
-}
-
-
-void raid6_tilegx$#_gen_syndrome(int disks, size_t bytes, void **ptrs)
-{
-	u8 **dptr = (u8 **)ptrs;
-	u64 *p, *q;
-	int d, z, z0;
-
-	u64 wd$$, wq$$, wp$$, w1$$, w2$$;
-	u64 x1d = NBYTES(0x1d);
-	u64 * z0ptr;
-
-	z0 = disks - 3;			/* Highest data disk */
-	p = (u64 *)dptr[z0+1];	/* XOR parity */
-	q = (u64 *)dptr[z0+2];	/* RS syndrome */
-
-	z0ptr = (u64 *)&dptr[z0][0];
-	for ( d = 0 ; d < bytes ; d += NSIZE*$# ) {
-		wq$$ = wp$$ = *z0ptr++;
-		for ( z = z0-1 ; z >= 0 ; z-- ) {
-			wd$$ = *(u64 *)&dptr[z][d+$$*NSIZE];
-			wp$$ = wp$$ ^ wd$$;
-			w2$$ = MASK(wq$$);
-			w1$$ = SHLBYTE(wq$$);
-			w2$$ = w2$$ & x1d;
-			w1$$ = w1$$ ^ w2$$;
-			wq$$ = w1$$ ^ wd$$;
-		}
-		*p++ = wp$$;
-		*q++ = wq$$;
-	}
-}
-
-const struct raid6_calls raid6_tilegx$# = {
-	raid6_tilegx$#_gen_syndrome,
-	NULL,			/* XOR not yet implemented */
-	NULL,
-	"tilegx$#",
-	0
-};
diff --git a/lib/rhashtable.c b/lib/rhashtable.c
index 3825c30aaa36..2b2b79974b61 100644
--- a/lib/rhashtable.c
+++ b/lib/rhashtable.c
@@ -333,6 +333,7 @@ static int rhashtable_rehash_table(struct rhashtable *ht)
 		err = rhashtable_rehash_chain(ht, old_hash);
 		if (err)
 			return err;
+		cond_resched();
 	}
 
 	/* Publish the new table pointer. */
@@ -506,8 +507,10 @@ static void *rhashtable_lookup_one(struct rhashtable *ht,
 		if (!key ||
 		    (ht->p.obj_cmpfn ?
 		     ht->p.obj_cmpfn(&arg, rht_obj(ht, head)) :
-		     rhashtable_compare(&arg, rht_obj(ht, head))))
+		     rhashtable_compare(&arg, rht_obj(ht, head)))) {
+			pprev = &head->next;
 			continue;
+		}
 
 		if (!ht->rhlist)
 			return rht_obj(ht, head);
@@ -1110,6 +1113,7 @@ void rhashtable_free_and_destroy(struct rhashtable *ht,
 		for (i = 0; i < tbl->size; i++) {
 			struct rhash_head *pos, *next;
 
+			cond_resched();
 			for (pos = rht_dereference(*rht_bucket(tbl, i), ht),
 			     next = !rht_is_a_nulls(pos) ?
 					rht_dereference(pos->next, ht) : NULL;
diff --git a/lib/sbitmap.c b/lib/sbitmap.c
index 42b5ca0acf93..e6a9c06ec70c 100644
--- a/lib/sbitmap.c
+++ b/lib/sbitmap.c
@@ -100,7 +100,7 @@ static int __sbitmap_get_word(unsigned long *word, unsigned long depth,
 			return -1;
 		}
 
-		if (!test_and_set_bit(nr, word))
+		if (!test_and_set_bit_lock(nr, word))
 			break;
 
 		hint = nr + 1;
@@ -434,9 +434,9 @@ static void sbq_wake_up(struct sbitmap_queue *sbq)
 	/*
 	 * Pairs with the memory barrier in set_current_state() to ensure the
 	 * proper ordering of clear_bit()/waitqueue_active() in the waker and
-	 * test_and_set_bit()/prepare_to_wait()/finish_wait() in the waiter. See
-	 * the comment on waitqueue_active(). This is __after_atomic because we
-	 * just did clear_bit() in the caller.
+	 * test_and_set_bit_lock()/prepare_to_wait()/finish_wait() in the
+	 * waiter. See the comment on waitqueue_active(). This is __after_atomic
+	 * because we just did clear_bit_unlock() in the caller.
 	 */
 	smp_mb__after_atomic();
 
@@ -469,7 +469,7 @@ static void sbq_wake_up(struct sbitmap_queue *sbq)
 void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr,
 			 unsigned int cpu)
 {
-	sbitmap_clear_bit(&sbq->sb, nr);
+	sbitmap_clear_bit_unlock(&sbq->sb, nr);
 	sbq_wake_up(sbq);
 	if (likely(!sbq->round_robin && nr < sbq->sb.depth))
 		*per_cpu_ptr(sbq->alloc_hint, cpu) = nr;
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index 53728d391d3a..06dad7a072fd 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -132,14 +132,7 @@ EXPORT_SYMBOL(sg_last);
 void sg_init_table(struct scatterlist *sgl, unsigned int nents)
 {
 	memset(sgl, 0, sizeof(*sgl) * nents);
-#ifdef CONFIG_DEBUG_SG
-	{
-		unsigned int i;
-		for (i = 0; i < nents; i++)
-			sgl[i].sg_magic = SG_MAGIC;
-	}
-#endif
-	sg_mark_end(&sgl[nents - 1]);
+	sg_init_marker(sgl, nents);
 }
 EXPORT_SYMBOL(sg_init_table);
 
diff --git a/lib/stackdepot.c b/lib/stackdepot.c
index f87d138e9672..e513459a5601 100644
--- a/lib/stackdepot.c
+++ b/lib/stackdepot.c
@@ -163,6 +163,21 @@ static inline u32 hash_stack(unsigned long *entries, unsigned int size)
 			       STACK_HASH_SEED);
 }
 
+/* Use our own, non-instrumented version of memcmp().
+ *
+ * We actually don't care about the order, just the equality.
+ */
+static inline
+int stackdepot_memcmp(const unsigned long *u1, const unsigned long *u2,
+			unsigned int n)
+{
+	for ( ; n-- ; u1++, u2++) {
+		if (*u1 != *u2)
+			return 1;
+	}
+	return 0;
+}
+
 /* Find a stack that is equal to the one stored in entries in the hash */
 static inline struct stack_record *find_stack(struct stack_record *bucket,
 					     unsigned long *entries, int size,
@@ -173,10 +188,8 @@ static inline struct stack_record *find_stack(struct stack_record *bucket,
 	for (found = bucket; found; found = found->next) {
 		if (found->hash == hash &&
 		    found->size == size &&
-		    !memcmp(entries, found->entries,
-			    size * sizeof(unsigned long))) {
+		    !stackdepot_memcmp(entries, found->entries, size))
 			return found;
-		}
 	}
 	return NULL;
 }
diff --git a/lib/swiotlb.c b/lib/swiotlb.c
index c43ec2271469..47aeb04c1997 100644
--- a/lib/swiotlb.c
+++ b/lib/swiotlb.c
@@ -31,6 +31,7 @@
 #include <linux/gfp.h>
 #include <linux/scatterlist.h>
 #include <linux/mem_encrypt.h>
+#include <linux/set_memory.h>
 
 #include <asm/io.h>
 #include <asm/dma.h>
@@ -156,22 +157,6 @@ unsigned long swiotlb_size_or_default(void)
 	return size ? size : (IO_TLB_DEFAULT_SIZE);
 }
 
-void __weak swiotlb_set_mem_attributes(void *vaddr, unsigned long size) { }
-
-/* For swiotlb, clear memory encryption mask from dma addresses */
-static dma_addr_t swiotlb_phys_to_dma(struct device *hwdev,
-				      phys_addr_t address)
-{
-	return __sme_clr(phys_to_dma(hwdev, address));
-}
-
-/* Note that this doesn't work with highmem page */
-static dma_addr_t swiotlb_virt_to_bus(struct device *hwdev,
-				      volatile void *address)
-{
-	return phys_to_dma(hwdev, virt_to_phys(address));
-}
-
 static bool no_iotlb_memory;
 
 void swiotlb_print_info(void)
@@ -209,12 +194,12 @@ void __init swiotlb_update_mem_attributes(void)
 
 	vaddr = phys_to_virt(io_tlb_start);
 	bytes = PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT);
-	swiotlb_set_mem_attributes(vaddr, bytes);
+	set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
 	memset(vaddr, 0, bytes);
 
 	vaddr = phys_to_virt(io_tlb_overflow_buffer);
 	bytes = PAGE_ALIGN(io_tlb_overflow);
-	swiotlb_set_mem_attributes(vaddr, bytes);
+	set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT);
 	memset(vaddr, 0, bytes);
 }
 
@@ -355,7 +340,7 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
 	io_tlb_start = virt_to_phys(tlb);
 	io_tlb_end = io_tlb_start + bytes;
 
-	swiotlb_set_mem_attributes(tlb, bytes);
+	set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT);
 	memset(tlb, 0, bytes);
 
 	/*
@@ -366,7 +351,8 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs)
 	if (!v_overflow_buffer)
 		goto cleanup2;
 
-	swiotlb_set_mem_attributes(v_overflow_buffer, io_tlb_overflow);
+	set_memory_decrypted((unsigned long)v_overflow_buffer,
+			io_tlb_overflow >> PAGE_SHIFT);
 	memset(v_overflow_buffer, 0, io_tlb_overflow);
 	io_tlb_overflow_buffer = virt_to_phys(v_overflow_buffer);
 
@@ -622,7 +608,7 @@ map_single(struct device *hwdev, phys_addr_t phys, size_t size,
 		return SWIOTLB_MAP_ERROR;
 	}
 
-	start_dma_addr = swiotlb_phys_to_dma(hwdev, io_tlb_start);
+	start_dma_addr = __phys_to_dma(hwdev, io_tlb_start);
 	return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size,
 				      dir, attrs);
 }
@@ -706,6 +692,7 @@ void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr,
 	}
 }
 
+#ifdef CONFIG_DMA_DIRECT_OPS
 static inline bool dma_coherent_ok(struct device *dev, dma_addr_t addr,
 		size_t size)
 {
@@ -726,12 +713,12 @@ swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle,
 		goto out_warn;
 
 	phys_addr = swiotlb_tbl_map_single(dev,
-			swiotlb_phys_to_dma(dev, io_tlb_start),
+			__phys_to_dma(dev, io_tlb_start),
 			0, size, DMA_FROM_DEVICE, 0);
 	if (phys_addr == SWIOTLB_MAP_ERROR)
 		goto out_warn;
 
-	*dma_handle = swiotlb_phys_to_dma(dev, phys_addr);
+	*dma_handle = __phys_to_dma(dev, phys_addr);
 	if (dma_coherent_ok(dev, *dma_handle, size))
 		goto out_unmap;
 
@@ -759,28 +746,6 @@ out_warn:
 	return NULL;
 }
 
-void *
-swiotlb_alloc_coherent(struct device *hwdev, size_t size,
-		       dma_addr_t *dma_handle, gfp_t flags)
-{
-	int order = get_order(size);
-	unsigned long attrs = (flags & __GFP_NOWARN) ? DMA_ATTR_NO_WARN : 0;
-	void *ret;
-
-	ret = (void *)__get_free_pages(flags, order);
-	if (ret) {
-		*dma_handle = swiotlb_virt_to_bus(hwdev, ret);
-		if (dma_coherent_ok(hwdev, *dma_handle, size)) {
-			memset(ret, 0, size);
-			return ret;
-		}
-		free_pages((unsigned long)ret, order);
-	}
-
-	return swiotlb_alloc_buffer(hwdev, size, dma_handle, attrs);
-}
-EXPORT_SYMBOL(swiotlb_alloc_coherent);
-
 static bool swiotlb_free_buffer(struct device *dev, size_t size,
 		dma_addr_t dma_addr)
 {
@@ -799,15 +764,7 @@ static bool swiotlb_free_buffer(struct device *dev, size_t size,
 				 DMA_ATTR_SKIP_CPU_SYNC);
 	return true;
 }
-
-void
-swiotlb_free_coherent(struct device *hwdev, size_t size, void *vaddr,
-		      dma_addr_t dev_addr)
-{
-	if (!swiotlb_free_buffer(hwdev, size, dev_addr))
-		free_pages((unsigned long)vaddr, get_order(size));
-}
-EXPORT_SYMBOL(swiotlb_free_coherent);
+#endif
 
 static void
 swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir,
@@ -867,10 +824,10 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
 	map = map_single(dev, phys, size, dir, attrs);
 	if (map == SWIOTLB_MAP_ERROR) {
 		swiotlb_full(dev, size, dir, 1);
-		return swiotlb_phys_to_dma(dev, io_tlb_overflow_buffer);
+		return __phys_to_dma(dev, io_tlb_overflow_buffer);
 	}
 
-	dev_addr = swiotlb_phys_to_dma(dev, map);
+	dev_addr = __phys_to_dma(dev, map);
 
 	/* Ensure that the address returned is DMA'ble */
 	if (dma_capable(dev, dev_addr, size))
@@ -879,7 +836,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page,
 	attrs |= DMA_ATTR_SKIP_CPU_SYNC;
 	swiotlb_tbl_unmap_single(dev, map, size, dir, attrs);
 
-	return swiotlb_phys_to_dma(dev, io_tlb_overflow_buffer);
+	return __phys_to_dma(dev, io_tlb_overflow_buffer);
 }
 
 /*
@@ -1009,7 +966,7 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems,
 				sg_dma_len(sgl) = 0;
 				return 0;
 			}
-			sg->dma_address = swiotlb_phys_to_dma(hwdev, map);
+			sg->dma_address = __phys_to_dma(hwdev, map);
 		} else
 			sg->dma_address = dev_addr;
 		sg_dma_len(sg) = sg->length;
@@ -1073,7 +1030,7 @@ swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg,
 int
 swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
 {
-	return (dma_addr == swiotlb_phys_to_dma(hwdev, io_tlb_overflow_buffer));
+	return (dma_addr == __phys_to_dma(hwdev, io_tlb_overflow_buffer));
 }
 
 /*
@@ -1085,7 +1042,7 @@ swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr)
 int
 swiotlb_dma_supported(struct device *hwdev, u64 mask)
 {
-	return swiotlb_phys_to_dma(hwdev, io_tlb_end - 1) <= mask;
+	return __phys_to_dma(hwdev, io_tlb_end - 1) <= mask;
 }
 
 #ifdef CONFIG_DMA_DIRECT_OPS
diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c
index aa1f2669bdd5..b3f235baa05d 100644
--- a/lib/test_bitmap.c
+++ b/lib/test_bitmap.c
@@ -23,7 +23,7 @@ __check_eq_uint(const char *srcfile, unsigned int line,
 		const unsigned int exp_uint, unsigned int x)
 {
 	if (exp_uint != x) {
-		pr_warn("[%s:%u] expected %u, got %u\n",
+		pr_err("[%s:%u] expected %u, got %u\n",
 			srcfile, line, exp_uint, x);
 		return false;
 	}
@@ -33,19 +33,13 @@ __check_eq_uint(const char *srcfile, unsigned int line,
 
 static bool __init
 __check_eq_bitmap(const char *srcfile, unsigned int line,
-		  const unsigned long *exp_bmap, unsigned int exp_nbits,
-		  const unsigned long *bmap, unsigned int nbits)
+		  const unsigned long *exp_bmap, const unsigned long *bmap,
+		  unsigned int nbits)
 {
-	if (exp_nbits != nbits) {
-		pr_warn("[%s:%u] bitmap length mismatch: expected %u, got %u\n",
-			srcfile, line, exp_nbits, nbits);
-		return false;
-	}
-
 	if (!bitmap_equal(exp_bmap, bmap, nbits)) {
 		pr_warn("[%s:%u] bitmaps contents differ: expected \"%*pbl\", got \"%*pbl\"\n",
 			srcfile, line,
-			exp_nbits, exp_bmap, nbits, bmap);
+			nbits, exp_bmap, nbits, bmap);
 		return false;
 	}
 	return true;
@@ -69,6 +63,10 @@ __check_eq_pbl(const char *srcfile, unsigned int line,
 static bool __init
 __check_eq_u32_array(const char *srcfile, unsigned int line,
 		     const u32 *exp_arr, unsigned int exp_len,
+		     const u32 *arr, unsigned int len) __used;
+static bool __init
+__check_eq_u32_array(const char *srcfile, unsigned int line,
+		     const u32 *exp_arr, unsigned int exp_len,
 		     const u32 *arr, unsigned int len)
 {
 	if (exp_len != len) {
@@ -107,7 +105,65 @@ __check_eq_u32_array(const char *srcfile, unsigned int line,
 #define expect_eq_pbl(...)		__expect_eq(pbl, ##__VA_ARGS__)
 #define expect_eq_u32_array(...)	__expect_eq(u32_array, ##__VA_ARGS__)
 
-static void __init test_zero_fill_copy(void)
+static void __init test_zero_clear(void)
+{
+	DECLARE_BITMAP(bmap, 1024);
+
+	/* Known way to set all bits */
+	memset(bmap, 0xff, 128);
+
+	expect_eq_pbl("0-22", bmap, 23);
+	expect_eq_pbl("0-1023", bmap, 1024);
+
+	/* single-word bitmaps */
+	bitmap_clear(bmap, 0, 9);
+	expect_eq_pbl("9-1023", bmap, 1024);
+
+	bitmap_zero(bmap, 35);
+	expect_eq_pbl("64-1023", bmap, 1024);
+
+	/* cross boundaries operations */
+	bitmap_clear(bmap, 79, 19);
+	expect_eq_pbl("64-78,98-1023", bmap, 1024);
+
+	bitmap_zero(bmap, 115);
+	expect_eq_pbl("128-1023", bmap, 1024);
+
+	/* Zeroing entire area */
+	bitmap_zero(bmap, 1024);
+	expect_eq_pbl("", bmap, 1024);
+}
+
+static void __init test_fill_set(void)
+{
+	DECLARE_BITMAP(bmap, 1024);
+
+	/* Known way to clear all bits */
+	memset(bmap, 0x00, 128);
+
+	expect_eq_pbl("", bmap, 23);
+	expect_eq_pbl("", bmap, 1024);
+
+	/* single-word bitmaps */
+	bitmap_set(bmap, 0, 9);
+	expect_eq_pbl("0-8", bmap, 1024);
+
+	bitmap_fill(bmap, 35);
+	expect_eq_pbl("0-63", bmap, 1024);
+
+	/* cross boundaries operations */
+	bitmap_set(bmap, 79, 19);
+	expect_eq_pbl("0-63,79-97", bmap, 1024);
+
+	bitmap_fill(bmap, 115);
+	expect_eq_pbl("0-127", bmap, 1024);
+
+	/* Zeroing entire area */
+	bitmap_fill(bmap, 1024);
+	expect_eq_pbl("0-1023", bmap, 1024);
+}
+
+static void __init test_copy(void)
 {
 	DECLARE_BITMAP(bmap1, 1024);
 	DECLARE_BITMAP(bmap2, 1024);
@@ -116,36 +172,20 @@ static void __init test_zero_fill_copy(void)
 	bitmap_zero(bmap2, 1024);
 
 	/* single-word bitmaps */
-	expect_eq_pbl("", bmap1, 23);
-
-	bitmap_fill(bmap1, 19);
-	expect_eq_pbl("0-18", bmap1, 1024);
-
+	bitmap_set(bmap1, 0, 19);
 	bitmap_copy(bmap2, bmap1, 23);
 	expect_eq_pbl("0-18", bmap2, 1024);
 
-	bitmap_fill(bmap2, 23);
-	expect_eq_pbl("0-22", bmap2, 1024);
-
+	bitmap_set(bmap2, 0, 23);
 	bitmap_copy(bmap2, bmap1, 23);
 	expect_eq_pbl("0-18", bmap2, 1024);
 
-	bitmap_zero(bmap1, 23);
-	expect_eq_pbl("", bmap1, 1024);
-
 	/* multi-word bitmaps */
-	bitmap_zero(bmap1, 1024);
-	expect_eq_pbl("", bmap1, 1024);
-
-	bitmap_fill(bmap1, 109);
-	expect_eq_pbl("0-108", bmap1, 1024);
-
+	bitmap_set(bmap1, 0, 109);
 	bitmap_copy(bmap2, bmap1, 1024);
 	expect_eq_pbl("0-108", bmap2, 1024);
 
 	bitmap_fill(bmap2, 1024);
-	expect_eq_pbl("0-1023", bmap2, 1024);
-
 	bitmap_copy(bmap2, bmap1, 1024);
 	expect_eq_pbl("0-108", bmap2, 1024);
 
@@ -160,9 +200,6 @@ static void __init test_zero_fill_copy(void)
 	bitmap_fill(bmap2, 1024);
 	bitmap_copy(bmap2, bmap1, 97);  /* ... but aligned on word length */
 	expect_eq_pbl("0-108,128-1023", bmap2, 1024);
-
-	bitmap_zero(bmap2, 97);  /* ... but 0-padded til word length */
-	expect_eq_pbl("128-1023", bmap2, 1024);
 }
 
 #define PARSE_TIME 0x1
@@ -255,171 +292,29 @@ static void __init test_bitmap_parselist(void)
 	}
 }
 
-static void __init test_bitmap_u32_array_conversions(void)
+static void __init test_bitmap_arr32(void)
 {
-	DECLARE_BITMAP(bmap1, 1024);
-	DECLARE_BITMAP(bmap2, 1024);
-	u32 exp_arr[32], arr[32];
-	unsigned nbits;
-
-	for (nbits = 0 ; nbits < 257 ; ++nbits) {
-		const unsigned int used_u32s = DIV_ROUND_UP(nbits, 32);
-		unsigned int i, rv;
-
-		bitmap_zero(bmap1, nbits);
-		bitmap_set(bmap1, nbits, 1024 - nbits);  /* garbage */
-
-		memset(arr, 0xff, sizeof(arr));
-		rv = bitmap_to_u32array(arr, used_u32s, bmap1, nbits);
-		expect_eq_uint(nbits, rv);
-
-		memset(exp_arr, 0xff, sizeof(exp_arr));
-		memset(exp_arr, 0, used_u32s*sizeof(*exp_arr));
-		expect_eq_u32_array(exp_arr, 32, arr, 32);
-
-		bitmap_fill(bmap2, 1024);
-		rv = bitmap_from_u32array(bmap2, nbits, arr, used_u32s);
-		expect_eq_uint(nbits, rv);
-		expect_eq_bitmap(bmap1, 1024, bmap2, 1024);
-
-		for (i = 0 ; i < nbits ; ++i) {
-			/*
-			 * test conversion bitmap -> u32[]
-			 */
-
-			bitmap_zero(bmap1, 1024);
-			__set_bit(i, bmap1);
-			bitmap_set(bmap1, nbits, 1024 - nbits);  /* garbage */
-
-			memset(arr, 0xff, sizeof(arr));
-			rv = bitmap_to_u32array(arr, used_u32s, bmap1, nbits);
-			expect_eq_uint(nbits, rv);
-
-			/* 1st used u32 words contain expected bit set, the
-			 * remaining words are left unchanged (0xff)
-			 */
-			memset(exp_arr, 0xff, sizeof(exp_arr));
-			memset(exp_arr, 0, used_u32s*sizeof(*exp_arr));
-			exp_arr[i/32] = (1U<<(i%32));
-			expect_eq_u32_array(exp_arr, 32, arr, 32);
-
-
-			/* same, with longer array to fill
-			 */
-			memset(arr, 0xff, sizeof(arr));
-			rv = bitmap_to_u32array(arr, 32, bmap1, nbits);
-			expect_eq_uint(nbits, rv);
-
-			/* 1st used u32 words contain expected bit set, the
-			 * remaining words are all 0s
-			 */
-			memset(exp_arr, 0, sizeof(exp_arr));
-			exp_arr[i/32] = (1U<<(i%32));
-			expect_eq_u32_array(exp_arr, 32, arr, 32);
-
-			/*
-			 * test conversion u32[] -> bitmap
-			 */
-
-			/* the 1st nbits of bmap2 are identical to
-			 * bmap1, the remaining bits of bmap2 are left
-			 * unchanged (all 1s)
-			 */
-			bitmap_fill(bmap2, 1024);
-			rv = bitmap_from_u32array(bmap2, nbits,
-						  exp_arr, used_u32s);
-			expect_eq_uint(nbits, rv);
-
-			expect_eq_bitmap(bmap1, 1024, bmap2, 1024);
-
-			/* same, with more bits to fill
-			 */
-			memset(arr, 0xff, sizeof(arr));  /* garbage */
-			memset(arr, 0, used_u32s*sizeof(u32));
-			arr[i/32] = (1U<<(i%32));
-
-			bitmap_fill(bmap2, 1024);
-			rv = bitmap_from_u32array(bmap2, 1024, arr, used_u32s);
-			expect_eq_uint(used_u32s*32, rv);
-
-			/* the 1st nbits of bmap2 are identical to
-			 * bmap1, the remaining bits of bmap2 are cleared
-			 */
-			bitmap_zero(bmap1, 1024);
-			__set_bit(i, bmap1);
-			expect_eq_bitmap(bmap1, 1024, bmap2, 1024);
-
-
-			/*
-			 * test short conversion bitmap -> u32[] (1
-			 * word too short)
-			 */
-			if (used_u32s > 1) {
-				bitmap_zero(bmap1, 1024);
-				__set_bit(i, bmap1);
-				bitmap_set(bmap1, nbits,
-					   1024 - nbits);  /* garbage */
-				memset(arr, 0xff, sizeof(arr));
-
-				rv = bitmap_to_u32array(arr, used_u32s - 1,
-							bmap1, nbits);
-				expect_eq_uint((used_u32s - 1)*32, rv);
-
-				/* 1st used u32 words contain expected
-				 * bit set, the remaining words are
-				 * left unchanged (0xff)
-				 */
-				memset(exp_arr, 0xff, sizeof(exp_arr));
-				memset(exp_arr, 0,
-				       (used_u32s-1)*sizeof(*exp_arr));
-				if ((i/32) < (used_u32s - 1))
-					exp_arr[i/32] = (1U<<(i%32));
-				expect_eq_u32_array(exp_arr, 32, arr, 32);
-			}
-
-			/*
-			 * test short conversion u32[] -> bitmap (3
-			 * bits too short)
-			 */
-			if (nbits > 3) {
-				memset(arr, 0xff, sizeof(arr));  /* garbage */
-				memset(arr, 0, used_u32s*sizeof(*arr));
-				arr[i/32] = (1U<<(i%32));
-
-				bitmap_zero(bmap1, 1024);
-				rv = bitmap_from_u32array(bmap1, nbits - 3,
-							  arr, used_u32s);
-				expect_eq_uint(nbits - 3, rv);
-
-				/* we are expecting the bit < nbits -
-				 * 3 (none otherwise), and the rest of
-				 * bmap1 unchanged (0-filled)
-				 */
-				bitmap_zero(bmap2, 1024);
-				if (i < nbits - 3)
-					__set_bit(i, bmap2);
-				expect_eq_bitmap(bmap2, 1024, bmap1, 1024);
-
-				/* do the same with bmap1 initially
-				 * 1-filled
-				 */
-
-				bitmap_fill(bmap1, 1024);
-				rv = bitmap_from_u32array(bmap1, nbits - 3,
-							 arr, used_u32s);
-				expect_eq_uint(nbits - 3, rv);
-
-				/* we are expecting the bit < nbits -
-				 * 3 (none otherwise), and the rest of
-				 * bmap1 unchanged (1-filled)
-				 */
-				bitmap_zero(bmap2, 1024);
-				if (i < nbits - 3)
-					__set_bit(i, bmap2);
-				bitmap_set(bmap2, nbits-3, 1024 - nbits + 3);
-				expect_eq_bitmap(bmap2, 1024, bmap1, 1024);
-			}
-		}
+	unsigned int nbits, next_bit, len = sizeof(exp) * 8;
+	u32 arr[sizeof(exp) / 4];
+	DECLARE_BITMAP(bmap2, len);
+
+	memset(arr, 0xa5, sizeof(arr));
+
+	for (nbits = 0; nbits < len; ++nbits) {
+		bitmap_to_arr32(arr, exp, nbits);
+		bitmap_from_arr32(bmap2, arr, nbits);
+		expect_eq_bitmap(bmap2, exp, nbits);
+
+		next_bit = find_next_bit(bmap2,
+				round_up(nbits, BITS_PER_LONG), nbits);
+		if (next_bit < round_up(nbits, BITS_PER_LONG))
+			pr_err("bitmap_copy_arr32(nbits == %d:"
+				" tail is not safely cleared: %d\n",
+				nbits, next_bit);
+
+		if (nbits < len - 32)
+			expect_eq_uint(arr[DIV_ROUND_UP(nbits, 32)],
+								0xa5a5a5a5);
 	}
 }
 
@@ -453,8 +348,10 @@ static void noinline __init test_mem_optimisations(void)
 
 static int __init test_bitmap_init(void)
 {
-	test_zero_fill_copy();
-	test_bitmap_u32_array_conversions();
+	test_zero_clear();
+	test_fill_set();
+	test_copy();
+	test_bitmap_arr32();
 	test_bitmap_parselist();
 	test_mem_optimisations();
 
diff --git a/lib/test_bpf.c b/lib/test_bpf.c
index 4cd9ea9b3449..8e157806df7a 100644
--- a/lib/test_bpf.c
+++ b/lib/test_bpf.c
@@ -24,10 +24,11 @@
 #include <linux/if_vlan.h>
 #include <linux/random.h>
 #include <linux/highmem.h>
+#include <linux/sched.h>
 
 /* General test specific settings */
 #define MAX_SUBTESTS	3
-#define MAX_TESTRUNS	10000
+#define MAX_TESTRUNS	1000
 #define MAX_DATA	128
 #define MAX_INSNS	512
 #define MAX_K		0xffffFFFF
@@ -83,6 +84,7 @@ struct bpf_test {
 		__u32 result;
 	} test[MAX_SUBTESTS];
 	int (*fill_helper)(struct bpf_test *self);
+	int expected_errcode; /* used when FLAG_EXPECTED_FAIL is set in the aux */
 	__u8 frag_data[MAX_DATA];
 	int stack_depth; /* for eBPF only, since tests don't call verifier */
 };
@@ -2026,7 +2028,9 @@ static struct bpf_test tests[] = {
 		},
 		CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
 		{ },
-		{ }
+		{ },
+		.fill_helper = NULL,
+		.expected_errcode = -EINVAL,
 	},
 	{
 		"check: div_k_0",
@@ -2036,7 +2040,9 @@ static struct bpf_test tests[] = {
 		},
 		CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
 		{ },
-		{ }
+		{ },
+		.fill_helper = NULL,
+		.expected_errcode = -EINVAL,
 	},
 	{
 		"check: unknown insn",
@@ -2047,7 +2053,9 @@ static struct bpf_test tests[] = {
 		},
 		CLASSIC | FLAG_EXPECTED_FAIL,
 		{ },
-		{ }
+		{ },
+		.fill_helper = NULL,
+		.expected_errcode = -EINVAL,
 	},
 	{
 		"check: out of range spill/fill",
@@ -2057,7 +2065,9 @@ static struct bpf_test tests[] = {
 		},
 		CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
 		{ },
-		{ }
+		{ },
+		.fill_helper = NULL,
+		.expected_errcode = -EINVAL,
 	},
 	{
 		"JUMPS + HOLES",
@@ -2149,6 +2159,8 @@ static struct bpf_test tests[] = {
 		CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
 		{ },
 		{ },
+		.fill_helper = NULL,
+		.expected_errcode = -EINVAL,
 	},
 	{
 		"check: LDX + RET X",
@@ -2159,6 +2171,8 @@ static struct bpf_test tests[] = {
 		CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
 		{ },
 		{ },
+		.fill_helper = NULL,
+		.expected_errcode = -EINVAL,
 	},
 	{	/* Mainly checking JIT here. */
 		"M[]: alt STX + LDX",
@@ -2333,6 +2347,8 @@ static struct bpf_test tests[] = {
 		CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
 		{ },
 		{ },
+		.fill_helper = NULL,
+		.expected_errcode = -EINVAL,
 	},
 	{	/* Passes checker but fails during runtime. */
 		"LD [SKF_AD_OFF-1]",
@@ -5395,6 +5411,7 @@ static struct bpf_test tests[] = {
 		{ },
 		{ },
 		.fill_helper = bpf_fill_maxinsns4,
+		.expected_errcode = -EINVAL,
 	},
 	{	/* Mainly checking JIT here. */
 		"BPF_MAXINSNS: Very long jump",
@@ -5450,10 +5467,15 @@ static struct bpf_test tests[] = {
 	{
 		"BPF_MAXINSNS: Jump, gap, jump, ...",
 		{ },
+#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_X86)
+		CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL,
+#else
 		CLASSIC | FLAG_NO_DATA,
+#endif
 		{ },
 		{ { 0, 0xababcbac } },
 		.fill_helper = bpf_fill_maxinsns11,
+		.expected_errcode = -ENOTSUPP,
 	},
 	{
 		"BPF_MAXINSNS: ld_abs+get_processor_id",
@@ -6344,7 +6366,7 @@ static struct bpf_prog *generate_filter(int which, int *err)
 
 		*err = bpf_prog_create(&fp, &fprog);
 		if (tests[which].aux & FLAG_EXPECTED_FAIL) {
-			if (*err == -EINVAL) {
+			if (*err == tests[which].expected_errcode) {
 				pr_cont("PASS\n");
 				/* Verifier rejected filter as expected. */
 				*err = 0;
@@ -6552,6 +6574,93 @@ static bool exclude_test(int test_id)
 	return test_id < test_range[0] || test_id > test_range[1];
 }
 
+static __init struct sk_buff *build_test_skb(void)
+{
+	u32 headroom = NET_SKB_PAD + NET_IP_ALIGN + ETH_HLEN;
+	struct sk_buff *skb[2];
+	struct page *page[2];
+	int i, data_size = 8;
+
+	for (i = 0; i < 2; i++) {
+		page[i] = alloc_page(GFP_KERNEL);
+		if (!page[i]) {
+			if (i == 0)
+				goto err_page0;
+			else
+				goto err_page1;
+		}
+
+		/* this will set skb[i]->head_frag */
+		skb[i] = dev_alloc_skb(headroom + data_size);
+		if (!skb[i]) {
+			if (i == 0)
+				goto err_skb0;
+			else
+				goto err_skb1;
+		}
+
+		skb_reserve(skb[i], headroom);
+		skb_put(skb[i], data_size);
+		skb[i]->protocol = htons(ETH_P_IP);
+		skb_reset_network_header(skb[i]);
+		skb_set_mac_header(skb[i], -ETH_HLEN);
+
+		skb_add_rx_frag(skb[i], 0, page[i], 0, 64, 64);
+		// skb_headlen(skb[i]): 8, skb[i]->head_frag = 1
+	}
+
+	/* setup shinfo */
+	skb_shinfo(skb[0])->gso_size = 1448;
+	skb_shinfo(skb[0])->gso_type = SKB_GSO_TCPV4;
+	skb_shinfo(skb[0])->gso_type |= SKB_GSO_DODGY;
+	skb_shinfo(skb[0])->gso_segs = 0;
+	skb_shinfo(skb[0])->frag_list = skb[1];
+
+	/* adjust skb[0]'s len */
+	skb[0]->len += skb[1]->len;
+	skb[0]->data_len += skb[1]->data_len;
+	skb[0]->truesize += skb[1]->truesize;
+
+	return skb[0];
+
+err_skb1:
+	__free_page(page[1]);
+err_page1:
+	kfree_skb(skb[0]);
+err_skb0:
+	__free_page(page[0]);
+err_page0:
+	return NULL;
+}
+
+static __init int test_skb_segment(void)
+{
+	netdev_features_t features;
+	struct sk_buff *skb, *segs;
+	int ret = -1;
+
+	features = NETIF_F_SG | NETIF_F_GSO_PARTIAL | NETIF_F_IP_CSUM |
+		   NETIF_F_IPV6_CSUM;
+	features |= NETIF_F_RXCSUM;
+	skb = build_test_skb();
+	if (!skb) {
+		pr_info("%s: failed to build_test_skb", __func__);
+		goto done;
+	}
+
+	segs = skb_segment(skb, features);
+	if (!IS_ERR(segs)) {
+		kfree_skb_list(segs);
+		ret = 0;
+		pr_info("%s: success in skb_segment!", __func__);
+	} else {
+		pr_info("%s: failed in skb_segment!", __func__);
+	}
+	kfree_skb(skb);
+done:
+	return ret;
+}
+
 static __init int test_bpf(void)
 {
 	int i, err_cnt = 0, pass_cnt = 0;
@@ -6561,6 +6670,7 @@ static __init int test_bpf(void)
 		struct bpf_prog *fp;
 		int err;
 
+		cond_resched();
 		if (exclude_test(i))
 			continue;
 
@@ -6609,9 +6719,11 @@ static int __init test_bpf_init(void)
 		return ret;
 
 	ret = test_bpf();
-
 	destroy_bpf_tests();
-	return ret;
+	if (ret)
+		return ret;
+
+	return test_skb_segment();
 }
 
 static void __exit test_bpf_exit(void)
diff --git a/lib/test_kasan.c b/lib/test_kasan.c
index ef1a3ac1397e..98854a64b014 100644
--- a/lib/test_kasan.c
+++ b/lib/test_kasan.c
@@ -94,6 +94,37 @@ static noinline void __init kmalloc_pagealloc_oob_right(void)
 	ptr[size] = 0;
 	kfree(ptr);
 }
+
+static noinline void __init kmalloc_pagealloc_uaf(void)
+{
+	char *ptr;
+	size_t size = KMALLOC_MAX_CACHE_SIZE + 10;
+
+	pr_info("kmalloc pagealloc allocation: use-after-free\n");
+	ptr = kmalloc(size, GFP_KERNEL);
+	if (!ptr) {
+		pr_err("Allocation failed\n");
+		return;
+	}
+
+	kfree(ptr);
+	ptr[0] = 0;
+}
+
+static noinline void __init kmalloc_pagealloc_invalid_free(void)
+{
+	char *ptr;
+	size_t size = KMALLOC_MAX_CACHE_SIZE + 10;
+
+	pr_info("kmalloc pagealloc allocation: invalid-free\n");
+	ptr = kmalloc(size, GFP_KERNEL);
+	if (!ptr) {
+		pr_err("Allocation failed\n");
+		return;
+	}
+
+	kfree(ptr + 1);
+}
 #endif
 
 static noinline void __init kmalloc_large_oob_right(void)
@@ -388,7 +419,7 @@ static noinline void __init kasan_stack_oob(void)
 static noinline void __init ksize_unpoisons_memory(void)
 {
 	char *ptr;
-	size_t size = 123, real_size = size;
+	size_t size = 123, real_size;
 
 	pr_info("ksize() unpoisons the whole allocated chunk\n");
 	ptr = kmalloc(size, GFP_KERNEL);
@@ -472,6 +503,74 @@ static noinline void __init use_after_scope_test(void)
 	p[1023] = 1;
 }
 
+static noinline void __init kasan_alloca_oob_left(void)
+{
+	volatile int i = 10;
+	char alloca_array[i];
+	char *p = alloca_array - 1;
+
+	pr_info("out-of-bounds to left on alloca\n");
+	*(volatile char *)p;
+}
+
+static noinline void __init kasan_alloca_oob_right(void)
+{
+	volatile int i = 10;
+	char alloca_array[i];
+	char *p = alloca_array + i;
+
+	pr_info("out-of-bounds to right on alloca\n");
+	*(volatile char *)p;
+}
+
+static noinline void __init kmem_cache_double_free(void)
+{
+	char *p;
+	size_t size = 200;
+	struct kmem_cache *cache;
+
+	cache = kmem_cache_create("test_cache", size, 0, 0, NULL);
+	if (!cache) {
+		pr_err("Cache allocation failed\n");
+		return;
+	}
+	pr_info("double-free on heap object\n");
+	p = kmem_cache_alloc(cache, GFP_KERNEL);
+	if (!p) {
+		pr_err("Allocation failed\n");
+		kmem_cache_destroy(cache);
+		return;
+	}
+
+	kmem_cache_free(cache, p);
+	kmem_cache_free(cache, p);
+	kmem_cache_destroy(cache);
+}
+
+static noinline void __init kmem_cache_invalid_free(void)
+{
+	char *p;
+	size_t size = 200;
+	struct kmem_cache *cache;
+
+	cache = kmem_cache_create("test_cache", size, 0, SLAB_TYPESAFE_BY_RCU,
+				  NULL);
+	if (!cache) {
+		pr_err("Cache allocation failed\n");
+		return;
+	}
+	pr_info("invalid-free of heap object\n");
+	p = kmem_cache_alloc(cache, GFP_KERNEL);
+	if (!p) {
+		pr_err("Allocation failed\n");
+		kmem_cache_destroy(cache);
+		return;
+	}
+
+	kmem_cache_free(cache, p + 1);
+	kmem_cache_destroy(cache);
+}
+
 static int __init kmalloc_tests_init(void)
 {
 	/*
@@ -485,6 +584,8 @@ static int __init kmalloc_tests_init(void)
 	kmalloc_node_oob_right();
 #ifdef CONFIG_SLUB
 	kmalloc_pagealloc_oob_right();
+	kmalloc_pagealloc_uaf();
+	kmalloc_pagealloc_invalid_free();
 #endif
 	kmalloc_large_oob_right();
 	kmalloc_oob_krealloc_more();
@@ -502,9 +603,13 @@ static int __init kmalloc_tests_init(void)
 	memcg_accounted_kmem_cache();
 	kasan_stack_oob();
 	kasan_global_oob();
+	kasan_alloca_oob_left();
+	kasan_alloca_oob_right();
 	ksize_unpoisons_memory();
 	copy_user_test();
 	use_after_scope_test();
+	kmem_cache_double_free();
+	kmem_cache_invalid_free();
 
 	kasan_restore_multi_shot(multishot);
 
diff --git a/lib/test_kmod.c b/lib/test_kmod.c
index e372b97eee13..0e5b7a61460b 100644
--- a/lib/test_kmod.c
+++ b/lib/test_kmod.c
@@ -1141,7 +1141,7 @@ static struct kmod_test_device *register_test_dev_kmod(void)
 	mutex_lock(&reg_dev_mutex);
 
 	/* int should suffice for number of devices, test for wrap */
-	if (unlikely(num_test_devs + 1) < 0) {
+	if (num_test_devs + 1 == INT_MAX) {
 		pr_err("reached limit of number of test devices\n");
 		goto out;
 	}
diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c
index 76d3667fdea2..f4000c137dbe 100644
--- a/lib/test_rhashtable.c
+++ b/lib/test_rhashtable.c
@@ -79,6 +79,21 @@ struct thread_data {
 	struct test_obj *objs;
 };
 
+static u32 my_hashfn(const void *data, u32 len, u32 seed)
+{
+	const struct test_obj_rhl *obj = data;
+
+	return (obj->value.id % 10) << RHT_HASH_RESERVED_SPACE;
+}
+
+static int my_cmpfn(struct rhashtable_compare_arg *arg, const void *obj)
+{
+	const struct test_obj_rhl *test_obj = obj;
+	const struct test_obj_val *val = arg->key;
+
+	return test_obj->value.id - val->id;
+}
+
 static struct rhashtable_params test_rht_params = {
 	.head_offset = offsetof(struct test_obj, node),
 	.key_offset = offsetof(struct test_obj, value),
@@ -87,6 +102,17 @@ static struct rhashtable_params test_rht_params = {
 	.nulls_base = (3U << RHT_BASE_SHIFT),
 };
 
+static struct rhashtable_params test_rht_params_dup = {
+	.head_offset = offsetof(struct test_obj_rhl, list_node),
+	.key_offset = offsetof(struct test_obj_rhl, value),
+	.key_len = sizeof(struct test_obj_val),
+	.hashfn = jhash,
+	.obj_hashfn = my_hashfn,
+	.obj_cmpfn = my_cmpfn,
+	.nelem_hint = 128,
+	.automatic_shrinking = false,
+};
+
 static struct semaphore prestart_sem;
 static struct semaphore startup_sem = __SEMAPHORE_INITIALIZER(startup_sem, 0);
 
@@ -465,6 +491,112 @@ static int __init test_rhashtable_max(struct test_obj *array,
 	return err;
 }
 
+static unsigned int __init print_ht(struct rhltable *rhlt)
+{
+	struct rhashtable *ht;
+	const struct bucket_table *tbl;
+	char buff[512] = "";
+	unsigned int i, cnt = 0;
+
+	ht = &rhlt->ht;
+	tbl = rht_dereference(ht->tbl, ht);
+	for (i = 0; i < tbl->size; i++) {
+		struct rhash_head *pos, *next;
+		struct test_obj_rhl *p;
+
+		pos = rht_dereference(tbl->buckets[i], ht);
+		next = !rht_is_a_nulls(pos) ? rht_dereference(pos->next, ht) : NULL;
+
+		if (!rht_is_a_nulls(pos)) {
+			sprintf(buff, "%s\nbucket[%d] -> ", buff, i);
+		}
+
+		while (!rht_is_a_nulls(pos)) {
+			struct rhlist_head *list = container_of(pos, struct rhlist_head, rhead);
+			sprintf(buff, "%s[[", buff);
+			do {
+				pos = &list->rhead;
+				list = rht_dereference(list->next, ht);
+				p = rht_obj(ht, pos);
+
+				sprintf(buff, "%s val %d (tid=%d)%s", buff, p->value.id, p->value.tid,
+					list? ", " : " ");
+				cnt++;
+			} while (list);
+
+			pos = next,
+			next = !rht_is_a_nulls(pos) ?
+				rht_dereference(pos->next, ht) : NULL;
+
+			sprintf(buff, "%s]]%s", buff, !rht_is_a_nulls(pos) ? " -> " : "");
+		}
+	}
+	printk(KERN_ERR "\n---- ht: ----%s\n-------------\n", buff);
+
+	return cnt;
+}
+
+static int __init test_insert_dup(struct test_obj_rhl *rhl_test_objects,
+				  int cnt, bool slow)
+{
+	struct rhltable rhlt;
+	unsigned int i, ret;
+	const char *key;
+	int err = 0;
+
+	err = rhltable_init(&rhlt, &test_rht_params_dup);
+	if (WARN_ON(err))
+		return err;
+
+	for (i = 0; i < cnt; i++) {
+		rhl_test_objects[i].value.tid = i;
+		key = rht_obj(&rhlt.ht, &rhl_test_objects[i].list_node.rhead);
+		key += test_rht_params_dup.key_offset;
+
+		if (slow) {
+			err = PTR_ERR(rhashtable_insert_slow(&rhlt.ht, key,
+							     &rhl_test_objects[i].list_node.rhead));
+			if (err == -EAGAIN)
+				err = 0;
+		} else
+			err = rhltable_insert(&rhlt,
+					      &rhl_test_objects[i].list_node,
+					      test_rht_params_dup);
+		if (WARN(err, "error %d on element %d/%d (%s)\n", err, i, cnt, slow? "slow" : "fast"))
+			goto skip_print;
+	}
+
+	ret = print_ht(&rhlt);
+	WARN(ret != cnt, "missing rhltable elements (%d != %d, %s)\n", ret, cnt, slow? "slow" : "fast");
+
+skip_print:
+	rhltable_destroy(&rhlt);
+
+	return 0;
+}
+
+static int __init test_insert_duplicates_run(void)
+{
+	struct test_obj_rhl rhl_test_objects[3] = {};
+
+	pr_info("test inserting duplicates\n");
+
+	/* two different values that map to same bucket */
+	rhl_test_objects[0].value.id = 1;
+	rhl_test_objects[1].value.id = 21;
+
+	/* and another duplicate with same as [0] value
+	 * which will be second on the bucket list */
+	rhl_test_objects[2].value.id = rhl_test_objects[0].value.id;
+
+	test_insert_dup(rhl_test_objects, 2, false);
+	test_insert_dup(rhl_test_objects, 3, false);
+	test_insert_dup(rhl_test_objects, 2, true);
+	test_insert_dup(rhl_test_objects, 3, true);
+
+	return 0;
+}
+
 static int thread_lookup_test(struct thread_data *tdata)
 {
 	unsigned int entries = tdata->entries;
@@ -613,6 +745,8 @@ static int __init test_rht_init(void)
 	do_div(total_time, runs);
 	pr_info("Average test time: %llu\n", total_time);
 
+	test_insert_duplicates_run();
+
 	if (!tcount)
 		return 0;
 
diff --git a/lib/test_sort.c b/lib/test_sort.c
index d389c1cc2f6c..385c0ed5202f 100644
--- a/lib/test_sort.c
+++ b/lib/test_sort.c
@@ -39,5 +39,11 @@ exit:
 	return err;
 }
 
+static void __exit test_sort_exit(void)
+{
+}
+
 module_init(test_sort_init);
+module_exit(test_sort_exit);
+
 MODULE_LICENSE("GPL");
diff --git a/lib/test_user_copy.c b/lib/test_user_copy.c
index 4621db801b23..e161f0498f42 100644
--- a/lib/test_user_copy.c
+++ b/lib/test_user_copy.c
@@ -31,11 +31,8 @@
  * their capability at compile-time, we just have to opt-out certain archs.
  */
 #if BITS_PER_LONG == 64 || (!(defined(CONFIG_ARM) && !defined(MMU)) && \
-			    !defined(CONFIG_BLACKFIN) &&	\
-			    !defined(CONFIG_M32R) &&		\
 			    !defined(CONFIG_M68K) &&		\
 			    !defined(CONFIG_MICROBLAZE) &&	\
-			    !defined(CONFIG_MN10300) &&		\
 			    !defined(CONFIG_NIOS2) &&		\
 			    !defined(CONFIG_PPC32) &&		\
 			    !defined(CONFIG_SUPERH))
diff --git a/lib/ubsan.c b/lib/ubsan.c
index fb0409df1bcf..59fee96c29a0 100644
--- a/lib/ubsan.c
+++ b/lib/ubsan.c
@@ -141,11 +141,6 @@ static void val_to_string(char *str, size_t size, struct type_descriptor *type,
 	}
 }
 
-static bool location_is_valid(struct source_location *loc)
-{
-	return loc->file_name != NULL;
-}
-
 static DEFINE_SPINLOCK(report_lock);
 
 static void ubsan_prologue(struct source_location *location,
@@ -265,14 +260,14 @@ void __ubsan_handle_divrem_overflow(struct overflow_data *data,
 }
 EXPORT_SYMBOL(__ubsan_handle_divrem_overflow);
 
-static void handle_null_ptr_deref(struct type_mismatch_data *data)
+static void handle_null_ptr_deref(struct type_mismatch_data_common *data)
 {
 	unsigned long flags;
 
-	if (suppress_report(&data->location))
+	if (suppress_report(data->location))
 		return;
 
-	ubsan_prologue(&data->location, &flags);
+	ubsan_prologue(data->location, &flags);
 
 	pr_err("%s null pointer of type %s\n",
 		type_check_kinds[data->type_check_kind],
@@ -281,15 +276,15 @@ static void handle_null_ptr_deref(struct type_mismatch_data *data)
 	ubsan_epilogue(&flags);
 }
 
-static void handle_missaligned_access(struct type_mismatch_data *data,
+static void handle_misaligned_access(struct type_mismatch_data_common *data,
 				unsigned long ptr)
 {
 	unsigned long flags;
 
-	if (suppress_report(&data->location))
+	if (suppress_report(data->location))
 		return;
 
-	ubsan_prologue(&data->location, &flags);
+	ubsan_prologue(data->location, &flags);
 
 	pr_err("%s misaligned address %p for type %s\n",
 		type_check_kinds[data->type_check_kind],
@@ -299,15 +294,15 @@ static void handle_missaligned_access(struct type_mismatch_data *data,
 	ubsan_epilogue(&flags);
 }
 
-static void handle_object_size_mismatch(struct type_mismatch_data *data,
+static void handle_object_size_mismatch(struct type_mismatch_data_common *data,
 					unsigned long ptr)
 {
 	unsigned long flags;
 
-	if (suppress_report(&data->location))
+	if (suppress_report(data->location))
 		return;
 
-	ubsan_prologue(&data->location, &flags);
+	ubsan_prologue(data->location, &flags);
 	pr_err("%s address %p with insufficient space\n",
 		type_check_kinds[data->type_check_kind],
 		(void *) ptr);
@@ -315,37 +310,46 @@ static void handle_object_size_mismatch(struct type_mismatch_data *data,
 	ubsan_epilogue(&flags);
 }
 
-void __ubsan_handle_type_mismatch(struct type_mismatch_data *data,
+static void ubsan_type_mismatch_common(struct type_mismatch_data_common *data,
 				unsigned long ptr)
 {
 
 	if (!ptr)
 		handle_null_ptr_deref(data);
 	else if (data->alignment && !IS_ALIGNED(ptr, data->alignment))
-		handle_missaligned_access(data, ptr);
+		handle_misaligned_access(data, ptr);
 	else
 		handle_object_size_mismatch(data, ptr);
 }
-EXPORT_SYMBOL(__ubsan_handle_type_mismatch);
 
-void __ubsan_handle_nonnull_return(struct nonnull_return_data *data)
+void __ubsan_handle_type_mismatch(struct type_mismatch_data *data,
+				unsigned long ptr)
 {
-	unsigned long flags;
-
-	if (suppress_report(&data->location))
-		return;
-
-	ubsan_prologue(&data->location, &flags);
+	struct type_mismatch_data_common common_data = {
+		.location = &data->location,
+		.type = data->type,
+		.alignment = data->alignment,
+		.type_check_kind = data->type_check_kind
+	};
+
+	ubsan_type_mismatch_common(&common_data, ptr);
+}
+EXPORT_SYMBOL(__ubsan_handle_type_mismatch);
 
-	pr_err("null pointer returned from function declared to never return null\n");
+void __ubsan_handle_type_mismatch_v1(struct type_mismatch_data_v1 *data,
+				unsigned long ptr)
+{
 
-	if (location_is_valid(&data->attr_location))
-		print_source_location("returns_nonnull attribute specified in",
-				&data->attr_location);
+	struct type_mismatch_data_common common_data = {
+		.location = &data->location,
+		.type = data->type,
+		.alignment = 1UL << data->log_alignment,
+		.type_check_kind = data->type_check_kind
+	};
 
-	ubsan_epilogue(&flags);
+	ubsan_type_mismatch_common(&common_data, ptr);
 }
-EXPORT_SYMBOL(__ubsan_handle_nonnull_return);
+EXPORT_SYMBOL(__ubsan_handle_type_mismatch_v1);
 
 void __ubsan_handle_vla_bound_not_positive(struct vla_bound_data *data,
 					unsigned long bound)
diff --git a/lib/ubsan.h b/lib/ubsan.h
index 88f23557edbe..f4d8d0bd4016 100644
--- a/lib/ubsan.h
+++ b/lib/ubsan.h
@@ -37,15 +37,24 @@ struct type_mismatch_data {
 	unsigned char type_check_kind;
 };
 
-struct nonnull_arg_data {
+struct type_mismatch_data_v1 {
 	struct source_location location;
-	struct source_location attr_location;
-	int arg_index;
+	struct type_descriptor *type;
+	unsigned char log_alignment;
+	unsigned char type_check_kind;
+};
+
+struct type_mismatch_data_common {
+	struct source_location *location;
+	struct type_descriptor *type;
+	unsigned long alignment;
+	unsigned char type_check_kind;
 };
 
-struct nonnull_return_data {
+struct nonnull_arg_data {
 	struct source_location location;
 	struct source_location attr_location;
+	int arg_index;
 };
 
 struct vla_bound_data {
diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index 77ee6ced11b1..d7a708f82559 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -1849,7 +1849,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr,
 {
 	const int default_width = 2 * sizeof(void *);
 
-	if (!ptr && *fmt != 'K') {
+	if (!ptr && *fmt != 'K' && *fmt != 'x') {
 		/*
 		 * Print (null) with the same width as a pointer so it makes
 		 * tabular output look nice.
diff --git a/lib/zstd/Makefile b/lib/zstd/Makefile
index dd0a359c135b..7920cbbfeae9 100644
--- a/lib/zstd/Makefile
+++ b/lib/zstd/Makefile
@@ -3,16 +3,7 @@ obj-$(CONFIG_ZSTD_DECOMPRESS) += zstd_decompress.o
 
 ccflags-y += -O3
 
-# Object files unique to zstd_compress and zstd_decompress
-zstd_compress-y := fse_compress.o huf_compress.o compress.o
-zstd_decompress-y := huf_decompress.o decompress.o
-
-# These object files are shared between the modules.
-# Always add them to zstd_compress.
-# Unless both zstd_compress and zstd_decompress are built in
-# then also add them to zstd_decompress.
-zstd_compress-y += entropy_common.o fse_decompress.o zstd_common.o
-
-ifneq ($(CONFIG_ZSTD_COMPRESS)$(CONFIG_ZSTD_DECOMPRESS),yy)
-	zstd_decompress-y += entropy_common.o fse_decompress.o zstd_common.o
-endif
+zstd_compress-y := fse_compress.o huf_compress.o compress.o \
+		   entropy_common.o fse_decompress.o zstd_common.o
+zstd_decompress-y := huf_decompress.o decompress.o \
+		     entropy_common.o fse_decompress.o zstd_common.o