From 20607434113b8f7d74cfc98e27a4199535c1d4fa Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Mon, 30 Mar 2020 17:22:11 -0700
Subject: lib/bitmap.c: fix spello

Fix typo/spello for whitespaces.

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Jiri Kosina <jkosina@suse.cz>
---
 lib/bitmap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/bitmap.c b/lib/bitmap.c
index 89260aa342d6..a801379e9354 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -551,7 +551,7 @@ static inline bool end_of_region(char c)
 }
 
 /*
- * The format allows commas and whitespases at the beginning
+ * The format allows commas and whitespaces at the beginning
  * of the region.
  */
 static const char *bitmap_find_region(const char *str)
-- 
cgit v1.2.3


From 8aa26c575fb343ebde810b30dad0cba7d8121efb Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 18 Aug 2020 10:17:33 +0200
Subject: netlink: make NLA_BINARY validation more flexible

Add range validation for NLA_BINARY, allowing validation of any
combination of combination minimum or maximum lengths, using the
existing NLA_POLICY_RANGE()/NLA_POLICY_FULL_RANGE() macros, just
like for integers where the value is checked.

Also make NLA_POLICY_EXACT_LEN(), NLA_POLICY_EXACT_LEN_WARN()
and NLA_POLICY_MIN_LEN() special cases of this, removing the old
types NLA_EXACT_LEN and NLA_MIN_LEN.

This allows us to save some code where both minimum and maximum
lengths are requires, currently the policy only allows maximum
(NLA_BINARY), minimum (NLA_MIN_LEN) or exact (NLA_EXACT_LEN), so
a range of lengths cannot be accepted and must be checked by the
code that consumes the attributes later.

Also, this allows advertising the correct ranges in the policy
export to userspace. Here, NLA_MIN_LEN and NLA_EXACT_LEN already
were special cases of NLA_BINARY with min and min/max length
respectively.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netlink.h | 58 ++++++++++++++++++++++++++-----------------------
 lib/nlattr.c          | 60 +++++++++++++++++++++++++++++++++------------------
 net/netlink/policy.c  | 32 ++++++++++++++++-----------
 3 files changed, 89 insertions(+), 61 deletions(-)

(limited to 'lib')

diff --git a/include/net/netlink.h b/include/net/netlink.h
index c0411f14fb53..fdd317f8fde4 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -181,8 +181,6 @@ enum {
 	NLA_S64,
 	NLA_BITFIELD32,
 	NLA_REJECT,
-	NLA_EXACT_LEN,
-	NLA_MIN_LEN,
 	__NLA_TYPE_MAX,
 };
 
@@ -199,11 +197,11 @@ struct netlink_range_validation_signed {
 enum nla_policy_validation {
 	NLA_VALIDATE_NONE,
 	NLA_VALIDATE_RANGE,
+	NLA_VALIDATE_RANGE_WARN_TOO_LONG,
 	NLA_VALIDATE_MIN,
 	NLA_VALIDATE_MAX,
 	NLA_VALIDATE_RANGE_PTR,
 	NLA_VALIDATE_FUNCTION,
-	NLA_VALIDATE_WARN_TOO_LONG,
 };
 
 /**
@@ -222,7 +220,7 @@ enum nla_policy_validation {
  *    NLA_NUL_STRING       Maximum length of string (excluding NUL)
  *    NLA_FLAG             Unused
  *    NLA_BINARY           Maximum length of attribute payload
- *    NLA_MIN_LEN          Minimum length of attribute payload
+ *                         (but see also below with the validation type)
  *    NLA_NESTED,
  *    NLA_NESTED_ARRAY     Length verification is done by checking len of
  *                         nested header (or empty); len field is used if
@@ -237,11 +235,6 @@ enum nla_policy_validation {
  *                         just like "All other"
  *    NLA_BITFIELD32       Unused
  *    NLA_REJECT           Unused
- *    NLA_EXACT_LEN        Attribute should have exactly this length, otherwise
- *                         it is rejected or warned about, the latter happening
- *                         if and only if the `validation_type' is set to
- *                         NLA_VALIDATE_WARN_TOO_LONG.
- *    NLA_MIN_LEN          Minimum length of attribute payload
  *    All other            Minimum length of attribute payload
  *
  * Meaning of validation union:
@@ -296,6 +289,11 @@ enum nla_policy_validation {
  *                         pointer to a struct netlink_range_validation_signed
  *                         that indicates the min/max values.
  *                         Use NLA_POLICY_FULL_RANGE_SIGNED().
+ *
+ *    NLA_BINARY           If the validation type is like the ones for integers
+ *                         above, then the min/max length (not value like for
+ *                         integers) of the attribute is enforced.
+ *
  *    All other            Unused - but note that it's a union
  *
  * Meaning of `validate' field, use via NLA_POLICY_VALIDATE_FN:
@@ -309,7 +307,7 @@ enum nla_policy_validation {
  * static const struct nla_policy my_policy[ATTR_MAX+1] = {
  * 	[ATTR_FOO] = { .type = NLA_U16 },
  *	[ATTR_BAR] = { .type = NLA_STRING, .len = BARSIZ },
- *	[ATTR_BAZ] = { .type = NLA_EXACT_LEN, .len = sizeof(struct mystruct) },
+ *	[ATTR_BAZ] = NLA_POLICY_EXACT_LEN(sizeof(struct mystruct)),
  *	[ATTR_GOO] = NLA_POLICY_BITFIELD32(myvalidflags),
  * };
  */
@@ -335,9 +333,10 @@ struct nla_policy {
 		 * nesting validation starts here.
 		 *
 		 * Additionally, it means that NLA_UNSPEC is actually NLA_REJECT
-		 * for any types >= this, so need to use NLA_MIN_LEN to get the
-		 * previous pure { .len = xyz } behaviour. The advantage of this
-		 * is that types not specified in the policy will be rejected.
+		 * for any types >= this, so need to use NLA_POLICY_MIN_LEN() to
+		 * get the previous pure { .len = xyz } behaviour. The advantage
+		 * of this is that types not specified in the policy will be
+		 * rejected.
 		 *
 		 * For completely new families it should be set to 1 so that the
 		 * validation is enforced for all attributes. For existing ones
@@ -349,12 +348,6 @@ struct nla_policy {
 	};
 };
 
-#define NLA_POLICY_EXACT_LEN(_len)	{ .type = NLA_EXACT_LEN, .len = _len }
-#define NLA_POLICY_EXACT_LEN_WARN(_len) \
-	{ .type = NLA_EXACT_LEN, .len = _len, \
-	  .validation_type = NLA_VALIDATE_WARN_TOO_LONG, }
-#define NLA_POLICY_MIN_LEN(_len)	{ .type = NLA_MIN_LEN, .len = _len }
-
 #define NLA_POLICY_ETH_ADDR		NLA_POLICY_EXACT_LEN(ETH_ALEN)
 #define NLA_POLICY_ETH_ADDR_COMPAT	NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN)
 
@@ -370,19 +363,21 @@ struct nla_policy {
 	{ .type = NLA_BITFIELD32, .bitfield32_valid = valid }
 
 #define __NLA_ENSURE(condition) BUILD_BUG_ON_ZERO(!(condition))
-#define NLA_ENSURE_UINT_TYPE(tp)			\
+#define NLA_ENSURE_UINT_OR_BINARY_TYPE(tp)		\
 	(__NLA_ENSURE(tp == NLA_U8 || tp == NLA_U16 ||	\
 		      tp == NLA_U32 || tp == NLA_U64 ||	\
-		      tp == NLA_MSECS) + tp)
+		      tp == NLA_MSECS ||		\
+		      tp == NLA_BINARY) + tp)
 #define NLA_ENSURE_SINT_TYPE(tp)			\
 	(__NLA_ENSURE(tp == NLA_S8 || tp == NLA_S16  ||	\
 		      tp == NLA_S32 || tp == NLA_S64) + tp)
-#define NLA_ENSURE_INT_TYPE(tp)				\
+#define NLA_ENSURE_INT_OR_BINARY_TYPE(tp)		\
 	(__NLA_ENSURE(tp == NLA_S8 || tp == NLA_U8 ||	\
 		      tp == NLA_S16 || tp == NLA_U16 ||	\
 		      tp == NLA_S32 || tp == NLA_U32 ||	\
 		      tp == NLA_S64 || tp == NLA_U64 ||	\
-		      tp == NLA_MSECS) + tp)
+		      tp == NLA_MSECS ||		\
+		      tp == NLA_BINARY) + tp)
 #define NLA_ENSURE_NO_VALIDATION_PTR(tp)		\
 	(__NLA_ENSURE(tp != NLA_BITFIELD32 &&		\
 		      tp != NLA_REJECT &&		\
@@ -390,14 +385,14 @@ struct nla_policy {
 		      tp != NLA_NESTED_ARRAY) + tp)
 
 #define NLA_POLICY_RANGE(tp, _min, _max) {		\
-	.type = NLA_ENSURE_INT_TYPE(tp),		\
+	.type = NLA_ENSURE_INT_OR_BINARY_TYPE(tp),	\
 	.validation_type = NLA_VALIDATE_RANGE,		\
 	.min = _min,					\
 	.max = _max					\
 }
 
 #define NLA_POLICY_FULL_RANGE(tp, _range) {		\
-	.type = NLA_ENSURE_UINT_TYPE(tp),		\
+	.type = NLA_ENSURE_UINT_OR_BINARY_TYPE(tp),	\
 	.validation_type = NLA_VALIDATE_RANGE_PTR,	\
 	.range = _range,				\
 }
@@ -409,13 +404,13 @@ struct nla_policy {
 }
 
 #define NLA_POLICY_MIN(tp, _min) {			\
-	.type = NLA_ENSURE_INT_TYPE(tp),		\
+	.type = NLA_ENSURE_INT_OR_BINARY_TYPE(tp),	\
 	.validation_type = NLA_VALIDATE_MIN,		\
 	.min = _min,					\
 }
 
 #define NLA_POLICY_MAX(tp, _max) {			\
-	.type = NLA_ENSURE_INT_TYPE(tp),		\
+	.type = NLA_ENSURE_INT_OR_BINARY_TYPE(tp),	\
 	.validation_type = NLA_VALIDATE_MAX,		\
 	.max = _max,					\
 }
@@ -427,6 +422,15 @@ struct nla_policy {
 	.len = __VA_ARGS__ + 0,				\
 }
 
+#define NLA_POLICY_EXACT_LEN(_len)	NLA_POLICY_RANGE(NLA_BINARY, _len, _len)
+#define NLA_POLICY_EXACT_LEN_WARN(_len) {			\
+	.type = NLA_BINARY,					\
+	.validation_type = NLA_VALIDATE_RANGE_WARN_TOO_LONG,	\
+	.min = _len,						\
+	.max = _len						\
+}
+#define NLA_POLICY_MIN_LEN(_len)	NLA_POLICY_MIN(NLA_BINARY, _len)
+
 /**
  * struct nl_info - netlink source information
  * @nlh: Netlink message header of original request
diff --git a/lib/nlattr.c b/lib/nlattr.c
index bc5b5cf608c4..665bdaff02c8 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -124,6 +124,7 @@ void nla_get_range_unsigned(const struct nla_policy *pt,
 		range->max = U8_MAX;
 		break;
 	case NLA_U16:
+	case NLA_BINARY:
 		range->max = U16_MAX;
 		break;
 	case NLA_U32:
@@ -140,6 +141,7 @@ void nla_get_range_unsigned(const struct nla_policy *pt,
 
 	switch (pt->validation_type) {
 	case NLA_VALIDATE_RANGE:
+	case NLA_VALIDATE_RANGE_WARN_TOO_LONG:
 		range->min = pt->min;
 		range->max = pt->max;
 		break;
@@ -157,9 +159,10 @@ void nla_get_range_unsigned(const struct nla_policy *pt,
 	}
 }
 
-static int nla_validate_int_range_unsigned(const struct nla_policy *pt,
-					   const struct nlattr *nla,
-					   struct netlink_ext_ack *extack)
+static int nla_validate_range_unsigned(const struct nla_policy *pt,
+				       const struct nlattr *nla,
+				       struct netlink_ext_ack *extack,
+				       unsigned int validate)
 {
 	struct netlink_range_validation range;
 	u64 value;
@@ -178,15 +181,39 @@ static int nla_validate_int_range_unsigned(const struct nla_policy *pt,
 	case NLA_MSECS:
 		value = nla_get_u64(nla);
 		break;
+	case NLA_BINARY:
+		value = nla_len(nla);
+		break;
 	default:
 		return -EINVAL;
 	}
 
 	nla_get_range_unsigned(pt, &range);
 
+	if (pt->validation_type == NLA_VALIDATE_RANGE_WARN_TOO_LONG &&
+	    pt->type == NLA_BINARY && value > range.max) {
+		pr_warn_ratelimited("netlink: '%s': attribute type %d has an invalid length.\n",
+				    current->comm, pt->type);
+		if (validate & NL_VALIDATE_STRICT_ATTRS) {
+			NL_SET_ERR_MSG_ATTR(extack, nla,
+					    "invalid attribute length");
+			return -EINVAL;
+		}
+
+		/* this assumes min <= max (don't validate against min) */
+		return 0;
+	}
+
 	if (value < range.min || value > range.max) {
-		NL_SET_ERR_MSG_ATTR(extack, nla,
-				    "integer out of range");
+		bool binary = pt->type == NLA_BINARY;
+
+		if (binary)
+			NL_SET_ERR_MSG_ATTR(extack, nla,
+					    "binary attribute size out of range");
+		else
+			NL_SET_ERR_MSG_ATTR(extack, nla,
+					    "integer out of range");
+
 		return -ERANGE;
 	}
 
@@ -274,7 +301,8 @@ static int nla_validate_int_range_signed(const struct nla_policy *pt,
 
 static int nla_validate_int_range(const struct nla_policy *pt,
 				  const struct nlattr *nla,
-				  struct netlink_ext_ack *extack)
+				  struct netlink_ext_ack *extack,
+				  unsigned int validate)
 {
 	switch (pt->type) {
 	case NLA_U8:
@@ -282,7 +310,8 @@ static int nla_validate_int_range(const struct nla_policy *pt,
 	case NLA_U32:
 	case NLA_U64:
 	case NLA_MSECS:
-		return nla_validate_int_range_unsigned(pt, nla, extack);
+	case NLA_BINARY:
+		return nla_validate_range_unsigned(pt, nla, extack, validate);
 	case NLA_S8:
 	case NLA_S16:
 	case NLA_S32:
@@ -313,10 +342,7 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
 
 	BUG_ON(pt->type > NLA_TYPE_MAX);
 
-	if ((nla_attr_len[pt->type] && attrlen != nla_attr_len[pt->type]) ||
-	    (pt->type == NLA_EXACT_LEN &&
-	     pt->validation_type == NLA_VALIDATE_WARN_TOO_LONG &&
-	     attrlen != pt->len)) {
+	if (nla_attr_len[pt->type] && attrlen != nla_attr_len[pt->type]) {
 		pr_warn_ratelimited("netlink: '%s': attribute type %d has an invalid length.\n",
 				    current->comm, type);
 		if (validate & NL_VALIDATE_STRICT_ATTRS) {
@@ -449,19 +475,10 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
 					    "Unsupported attribute");
 			return -EINVAL;
 		}
-		/* fall through */
-	case NLA_MIN_LEN:
 		if (attrlen < pt->len)
 			goto out_err;
 		break;
 
-	case NLA_EXACT_LEN:
-		if (pt->validation_type != NLA_VALIDATE_WARN_TOO_LONG) {
-			if (attrlen != pt->len)
-				goto out_err;
-			break;
-		}
-		/* fall through */
 	default:
 		if (pt->len)
 			minlen = pt->len;
@@ -479,9 +496,10 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
 		break;
 	case NLA_VALIDATE_RANGE_PTR:
 	case NLA_VALIDATE_RANGE:
+	case NLA_VALIDATE_RANGE_WARN_TOO_LONG:
 	case NLA_VALIDATE_MIN:
 	case NLA_VALIDATE_MAX:
-		err = nla_validate_int_range(pt, nla, extack);
+		err = nla_validate_int_range(pt, nla, extack, validate);
 		if (err)
 			return err;
 		break;
diff --git a/net/netlink/policy.c b/net/netlink/policy.c
index f6491853c797..46c11c9c212a 100644
--- a/net/netlink/policy.c
+++ b/net/netlink/policy.c
@@ -251,12 +251,6 @@ send_attribute:
 				pt->bitfield32_valid))
 			goto nla_put_failure;
 		break;
-	case NLA_EXACT_LEN:
-		type = NL_ATTR_TYPE_BINARY;
-		if (nla_put_u32(skb, NL_POLICY_TYPE_ATTR_MIN_LENGTH, pt->len) ||
-		    nla_put_u32(skb, NL_POLICY_TYPE_ATTR_MAX_LENGTH, pt->len))
-			goto nla_put_failure;
-		break;
 	case NLA_STRING:
 	case NLA_NUL_STRING:
 	case NLA_BINARY:
@@ -266,14 +260,26 @@ send_attribute:
 			type = NL_ATTR_TYPE_NUL_STRING;
 		else
 			type = NL_ATTR_TYPE_BINARY;
-		if (pt->len && nla_put_u32(skb, NL_POLICY_TYPE_ATTR_MAX_LENGTH,
-					   pt->len))
-			goto nla_put_failure;
-		break;
-	case NLA_MIN_LEN:
-		type = NL_ATTR_TYPE_BINARY;
-		if (nla_put_u32(skb, NL_POLICY_TYPE_ATTR_MIN_LENGTH, pt->len))
+
+		if (pt->validation_type != NLA_VALIDATE_NONE) {
+			struct netlink_range_validation range;
+
+			nla_get_range_unsigned(pt, &range);
+
+			if (range.min &&
+			    nla_put_u32(skb, NL_POLICY_TYPE_ATTR_MIN_LENGTH,
+					range.min))
+				goto nla_put_failure;
+
+			if (range.max < U16_MAX &&
+			    nla_put_u32(skb, NL_POLICY_TYPE_ATTR_MAX_LENGTH,
+					range.max))
+				goto nla_put_failure;
+		} else if (pt->len &&
+			   nla_put_u32(skb, NL_POLICY_TYPE_ATTR_MAX_LENGTH,
+				       pt->len)) {
 			goto nla_put_failure;
+		}
 		break;
 	case NLA_FLAG:
 		type = NL_ATTR_TYPE_FLAG;
-- 
cgit v1.2.3


From 6e41c585e38ff696de3a11509a0ad0a11150b0c3 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Wed, 22 Jul 2020 22:14:36 -0400
Subject: unify generic instances of csum_partial_copy_nocheck()

quite a few architectures have the same csum_partial_copy_nocheck() -
simply memcpy() the data and then return the csum of the copy.

hexagon, parisc, ia64, s390, um: explicitly spelled out that way.

arc, arm64, csky, h8300, m68k/nommu, microblaze, mips/GENERIC_CSUM, nds32,
nios2, openrisc, riscv, unicore32: end up picking the same thing spelled
out in lib/checksum.h (with varying amounts of perversions along the way).

everybody else (alpha, arm, c6x, m68k/mmu, mips/!GENERIC_CSUM, powerpc,
sh, sparc, x86, xtensa) have non-generic variants.  For all except c6x
the declaration is in their asm/checksum.h.  c6x uses the wrapper
from asm-generic/checksum.h that would normally lead to the lib/checksum.h
instance, but in case of c6x we end up using an asm function from arch/c6x
instead.

Screw that mess - have architectures with private instances define
_HAVE_ARCH_CSUM_AND_COPY in their asm/checksum.h and have the default
one right in net/checksum.h conditional on _HAVE_ARCH_CSUM_AND_COPY
*not* defined.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/alpha/include/asm/checksum.h   |  1 +
 arch/arm/include/asm/checksum.h     |  1 +
 arch/c6x/include/asm/checksum.h     |  3 +++
 arch/hexagon/include/asm/checksum.h | 11 -----------
 arch/hexagon/lib/checksum.c         | 11 -----------
 arch/ia64/include/asm/checksum.h    |  3 ---
 arch/ia64/lib/csum_partial_copy.c   | 15 ---------------
 arch/m68k/include/asm/checksum.h    |  1 +
 arch/mips/include/asm/checksum.h    |  2 +-
 arch/nios2/include/asm/checksum.h   |  4 ----
 arch/parisc/include/asm/checksum.h  |  8 --------
 arch/parisc/lib/checksum.c          | 17 -----------------
 arch/powerpc/include/asm/checksum.h |  1 +
 arch/s390/include/asm/checksum.h    |  7 -------
 arch/sh/include/asm/checksum_32.h   |  1 +
 arch/sparc/include/asm/checksum.h   |  1 +
 arch/x86/include/asm/checksum.h     |  1 +
 arch/x86/um/asm/checksum.h          | 16 ----------------
 arch/xtensa/include/asm/checksum.h  |  1 +
 include/asm-generic/checksum.h      | 12 ------------
 include/net/checksum.h              |  9 +++++++++
 lib/checksum.c                      | 11 -----------
 22 files changed, 21 insertions(+), 116 deletions(-)

(limited to 'lib')

diff --git a/arch/alpha/include/asm/checksum.h b/arch/alpha/include/asm/checksum.h
index 0eac81624d01..7e8e4fa4362d 100644
--- a/arch/alpha/include/asm/checksum.h
+++ b/arch/alpha/include/asm/checksum.h
@@ -42,6 +42,7 @@ extern __wsum csum_partial(const void *buff, int len, __wsum sum);
  * better 64-bit) boundary
  */
 #define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER
+#define _HAVE_ARCH_CSUM_AND_COPY
 __wsum csum_and_copy_from_user(const void __user *src, void *dst, int len, __wsum sum, int *errp);
 
 __wsum csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum);
diff --git a/arch/arm/include/asm/checksum.h b/arch/arm/include/asm/checksum.h
index ed6073fee338..53f769508540 100644
--- a/arch/arm/include/asm/checksum.h
+++ b/arch/arm/include/asm/checksum.h
@@ -41,6 +41,7 @@ __wsum
 csum_partial_copy_from_user(const void __user *src, void *dst, int len, __wsum sum, int *err_ptr);
 
 #define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER
+#define _HAVE_ARCH_CSUM_AND_COPY
 static inline
 __wsum csum_and_copy_from_user (const void __user *src, void *dst,
 				      int len, __wsum sum, int *err_ptr)
diff --git a/arch/c6x/include/asm/checksum.h b/arch/c6x/include/asm/checksum.h
index 36770b8308d9..fa31bdc186b2 100644
--- a/arch/c6x/include/asm/checksum.h
+++ b/arch/c6x/include/asm/checksum.h
@@ -26,6 +26,9 @@ csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len,
 }
 #define csum_tcpudp_nofold csum_tcpudp_nofold
 
+#define _HAVE_ARCH_CSUM_AND_COPY
+extern __wsum csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum);
+
 #include <asm-generic/checksum.h>
 
 #endif /* _ASM_C6X_CHECKSUM_H */
diff --git a/arch/hexagon/include/asm/checksum.h b/arch/hexagon/include/asm/checksum.h
index a5c42f4614c1..4bc6ad96c4c5 100644
--- a/arch/hexagon/include/asm/checksum.h
+++ b/arch/hexagon/include/asm/checksum.h
@@ -9,17 +9,6 @@
 #define do_csum	do_csum
 unsigned int do_csum(const void *voidptr, int len);
 
-/*
- * the same as csum_partial, but copies from src while it
- * checksums
- *
- * here even more important to align src and dst on a 32-bit (or even
- * better 64-bit) boundary
- */
-#define csum_partial_copy_nocheck csum_partial_copy_nocheck
-__wsum csum_partial_copy_nocheck(const void *src, void *dst,
-					int len, __wsum sum);
-
 /*
  * computes the checksum of the TCP/UDP pseudo-header
  * returns a 16-bit checksum, already complemented
diff --git a/arch/hexagon/lib/checksum.c b/arch/hexagon/lib/checksum.c
index c4a6b72d97de..ba50822a0800 100644
--- a/arch/hexagon/lib/checksum.c
+++ b/arch/hexagon/lib/checksum.c
@@ -176,14 +176,3 @@ unsigned int do_csum(const void *voidptr, int len)
 
 	return 0xFFFF & sum0;
 }
-
-/*
- * copy from ds while checksumming, otherwise like csum_partial
- */
-__wsum
-csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum)
-{
-	memcpy(dst, src, len);
-	return csum_partial(dst, len, sum);
-}
-EXPORT_SYMBOL(csum_partial_copy_nocheck);
diff --git a/arch/ia64/include/asm/checksum.h b/arch/ia64/include/asm/checksum.h
index 2a1c64629cdc..f3026213aa32 100644
--- a/arch/ia64/include/asm/checksum.h
+++ b/arch/ia64/include/asm/checksum.h
@@ -37,9 +37,6 @@ extern __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr,
  */
 extern __wsum csum_partial(const void *buff, int len, __wsum sum);
 
-extern __wsum csum_partial_copy_nocheck(const void *src, void *dst,
-					       int len, __wsum sum);
-
 /*
  * This routine is used for miscellaneous IP-like checksums, mainly in
  * icmp.c
diff --git a/arch/ia64/lib/csum_partial_copy.c b/arch/ia64/lib/csum_partial_copy.c
index 6e82e0be8040..917e3138b277 100644
--- a/arch/ia64/lib/csum_partial_copy.c
+++ b/arch/ia64/lib/csum_partial_copy.c
@@ -96,18 +96,3 @@ unsigned long do_csum_c(const unsigned char * buff, int len, unsigned int psum)
 out:
 	return result;
 }
-
-/*
- * XXX Fixme
- *
- * This is very ugly but temporary. THIS NEEDS SERIOUS ENHANCEMENTS.
- * But it's very tricky to get right even in C.
- */
-__wsum
-csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum)
-{
-	memcpy(dst, src, len);
-	return csum_partial(dst, len, sum);
-}
-
-EXPORT_SYMBOL(csum_partial_copy_nocheck);
diff --git a/arch/m68k/include/asm/checksum.h b/arch/m68k/include/asm/checksum.h
index 3f2c15d6f18c..ab16881d84cb 100644
--- a/arch/m68k/include/asm/checksum.h
+++ b/arch/m68k/include/asm/checksum.h
@@ -31,6 +31,7 @@ __wsum csum_partial(const void *buff, int len, __wsum sum);
  */
 
 #define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER
+#define _HAVE_ARCH_CSUM_AND_COPY
 extern __wsum csum_and_copy_from_user(const void __user *src,
 						void *dst,
 						int len, __wsum sum,
diff --git a/arch/mips/include/asm/checksum.h b/arch/mips/include/asm/checksum.h
index 181f7d14efb9..abdf1086873d 100644
--- a/arch/mips/include/asm/checksum.h
+++ b/arch/mips/include/asm/checksum.h
@@ -101,9 +101,9 @@ __wsum csum_and_copy_to_user(const void *src, void __user *dst, int len,
  * the same as csum_partial, but copies from user space (but on MIPS
  * we have just one address space, so this is identical to the above)
  */
+#define _HAVE_ARCH_CSUM_AND_COPY
 __wsum csum_partial_copy_nocheck(const void *src, void *dst,
 				       int len, __wsum sum);
-#define csum_partial_copy_nocheck csum_partial_copy_nocheck
 
 /*
  *	Fold a partial checksum without adding pseudo headers
diff --git a/arch/nios2/include/asm/checksum.h b/arch/nios2/include/asm/checksum.h
index b4316c361729..69004e07a1ba 100644
--- a/arch/nios2/include/asm/checksum.h
+++ b/arch/nios2/include/asm/checksum.h
@@ -12,10 +12,6 @@
 
 /* Take these from lib/checksum.c */
 extern __wsum csum_partial(const void *buff, int len, __wsum sum);
-__wsum csum_partial_copy_nocheck(const void *src, void *dst, int len,
-				__wsum sum);
-#define csum_partial_copy_nocheck csum_partial_copy_nocheck
-
 extern __sum16 ip_fast_csum(const void *iph, unsigned int ihl);
 extern __sum16 ip_compute_csum(const void *buff, int len);
 
diff --git a/arch/parisc/include/asm/checksum.h b/arch/parisc/include/asm/checksum.h
index fe8c63b2d2c3..522cd574c068 100644
--- a/arch/parisc/include/asm/checksum.h
+++ b/arch/parisc/include/asm/checksum.h
@@ -18,14 +18,6 @@
  */
 extern __wsum csum_partial(const void *, int, __wsum);
 
-/*
- * The same as csum_partial, but copies from src while it checksums.
- *
- * Here even more important to align src and dst on a 32-bit (or even
- * better 64-bit) boundary
- */
-extern __wsum csum_partial_copy_nocheck(const void *, void *, int, __wsum);
-
 /*
  *	Optimized for IP headers, which always checksum on 4 octet boundaries.
  *
diff --git a/arch/parisc/lib/checksum.c b/arch/parisc/lib/checksum.c
index c6f161583549..4818f3db84a5 100644
--- a/arch/parisc/lib/checksum.c
+++ b/arch/parisc/lib/checksum.c
@@ -106,20 +106,3 @@ __wsum csum_partial(const void *buff, int len, __wsum sum)
 }
 
 EXPORT_SYMBOL(csum_partial);
-
-/*
- * copy while checksumming, otherwise like csum_partial
- */
-__wsum csum_partial_copy_nocheck(const void *src, void *dst,
-				       int len, __wsum sum)
-{
-	/*
-	 * It's 2:30 am and I don't feel like doing it real ...
-	 * This is lots slower than the real thing (tm)
-	 */
-	sum = csum_partial(src, len, sum);
-	memcpy(dst, src, len);
-
-	return sum;
-}
-EXPORT_SYMBOL(csum_partial_copy_nocheck);
diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h
index 9cce06194dcc..d75fc5bf8f37 100644
--- a/arch/powerpc/include/asm/checksum.h
+++ b/arch/powerpc/include/asm/checksum.h
@@ -29,6 +29,7 @@ extern __wsum csum_and_copy_from_user(const void __user *src, void *dst,
 extern __wsum csum_and_copy_to_user(const void *src, void __user *dst,
 				    int len, __wsum sum, int *err_ptr);
 
+#define _HAVE_ARCH_CSUM_AND_COPY
 #define csum_partial_copy_nocheck(src, dst, len, sum)   \
         csum_partial_copy_generic((src), (dst), (len), (sum), NULL, NULL)
 
diff --git a/arch/s390/include/asm/checksum.h b/arch/s390/include/asm/checksum.h
index 6d01c96aeb5c..6813bfa1eeb7 100644
--- a/arch/s390/include/asm/checksum.h
+++ b/arch/s390/include/asm/checksum.h
@@ -39,13 +39,6 @@ csum_partial(const void *buff, int len, __wsum sum)
 	return sum;
 }
 
-static inline __wsum
-csum_partial_copy_nocheck (const void *src, void *dst, int len, __wsum sum)
-{
-        memcpy(dst,src,len);
-	return csum_partial(dst, len, sum);
-}
-
 /*
  *      Fold a partial checksum without adding pseudo headers
  */
diff --git a/arch/sh/include/asm/checksum_32.h b/arch/sh/include/asm/checksum_32.h
index 91571a42e44e..87269642d78d 100644
--- a/arch/sh/include/asm/checksum_32.h
+++ b/arch/sh/include/asm/checksum_32.h
@@ -34,6 +34,7 @@ asmlinkage __wsum csum_partial_copy_generic(const void *src, void *dst,
 					    int len, __wsum sum,
 					    int *src_err_ptr, int *dst_err_ptr);
 
+#define _HAVE_ARCH_CSUM_AND_COPY
 /*
  *	Note: when you get a NULL pointer exception here this means someone
  *	passed in an incorrect kernel address to one of these functions.
diff --git a/arch/sparc/include/asm/checksum.h b/arch/sparc/include/asm/checksum.h
index a6256cb6fc5c..deb4fe5aeafd 100644
--- a/arch/sparc/include/asm/checksum.h
+++ b/arch/sparc/include/asm/checksum.h
@@ -1,6 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #ifndef ___ASM_SPARC_CHECKSUM_H
 #define ___ASM_SPARC_CHECKSUM_H
+#define _HAVE_ARCH_CSUM_AND_COPY
 #define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER
 #if defined(__sparc__) && defined(__arch64__)
 #include <asm/checksum_64.h>
diff --git a/arch/x86/include/asm/checksum.h b/arch/x86/include/asm/checksum.h
index 0ada98d5d09f..bca625a60186 100644
--- a/arch/x86/include/asm/checksum.h
+++ b/arch/x86/include/asm/checksum.h
@@ -1,6 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #define  _HAVE_ARCH_COPY_AND_CSUM_FROM_USER 1
 #define HAVE_CSUM_COPY_USER
+#define _HAVE_ARCH_CSUM_AND_COPY
 #ifdef CONFIG_X86_32
 # include <asm/checksum_32.h>
 #else
diff --git a/arch/x86/um/asm/checksum.h b/arch/x86/um/asm/checksum.h
index ff6bba2c8ab6..b07824500363 100644
--- a/arch/x86/um/asm/checksum.h
+++ b/arch/x86/um/asm/checksum.h
@@ -20,22 +20,6 @@
  */
 extern __wsum csum_partial(const void *buff, int len, __wsum sum);
 
-/*
- *	Note: when you get a NULL pointer exception here this means someone
- *	passed in an incorrect kernel address to one of these functions.
- *
- *	If you use these functions directly please don't forget the
- *	access_ok().
- */
-
-static __inline__
-__wsum csum_partial_copy_nocheck(const void *src, void *dst,
-				       int len, __wsum sum)
-{
-	memcpy(dst, src, len);
-	return csum_partial(dst, len, sum);
-}
-
 /**
  * csum_fold - Fold and invert a 32bit checksum.
  * sum: 32bit unfolded sum
diff --git a/arch/xtensa/include/asm/checksum.h b/arch/xtensa/include/asm/checksum.h
index 243a5fe79d3c..0c879099977f 100644
--- a/arch/xtensa/include/asm/checksum.h
+++ b/arch/xtensa/include/asm/checksum.h
@@ -41,6 +41,7 @@ asmlinkage __wsum csum_partial_copy_generic(const void *src, void *dst,
 					    int len, __wsum sum,
 					    int *src_err_ptr, int *dst_err_ptr);
 
+#define _HAVE_ARCH_CSUM_AND_COPY
 /*
  *	Note: when you get a NULL pointer exception here this means someone
  *	passed in an incorrect kernel address to one of these functions.
diff --git a/include/asm-generic/checksum.h b/include/asm-generic/checksum.h
index cd8b75aa770d..43e18db89c14 100644
--- a/include/asm-generic/checksum.h
+++ b/include/asm-generic/checksum.h
@@ -16,18 +16,6 @@
  */
 extern __wsum csum_partial(const void *buff, int len, __wsum sum);
 
-/*
- * the same as csum_partial, but copies from src while it
- * checksums
- *
- * here even more important to align src and dst on a 32-bit (or even
- * better 64-bit) boundary
- */
-#ifndef csum_partial_copy_nocheck
-__wsum csum_partial_copy_nocheck(const void *src, void *dst, int len,
-		__wsum sum);
-#endif
-
 #ifndef ip_fast_csum
 /*
  * This is a version of ip_compute_csum() optimized for IP headers,
diff --git a/include/net/checksum.h b/include/net/checksum.h
index 46754ba9d7b7..db9d02b5f88a 100644
--- a/include/net/checksum.h
+++ b/include/net/checksum.h
@@ -47,6 +47,15 @@ static __inline__ __wsum csum_and_copy_to_user
 }
 #endif
 
+#ifndef _HAVE_ARCH_CSUM_AND_COPY
+static inline __wsum
+csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum)
+{
+	memcpy(dst, src, len);
+	return csum_partial(dst, len, sum);
+}
+#endif
+
 #ifndef HAVE_ARCH_CSUM_ADD
 static inline __wsum csum_add(__wsum csum, __wsum addend)
 {
diff --git a/lib/checksum.c b/lib/checksum.c
index c7861e84c526..6860d6b05a17 100644
--- a/lib/checksum.c
+++ b/lib/checksum.c
@@ -145,17 +145,6 @@ __sum16 ip_compute_csum(const void *buff, int len)
 }
 EXPORT_SYMBOL(ip_compute_csum);
 
-/*
- * copy from ds while checksumming, otherwise like csum_partial
- */
-__wsum
-csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum)
-{
-	memcpy(dst, src, len);
-	return csum_partial(dst, len, sum);
-}
-EXPORT_SYMBOL(csum_partial_copy_nocheck);
-
 #ifndef csum_tcpudp_nofold
 static inline u32 from64to32(u64 x)
 {
-- 
cgit v1.2.3


From cc44c17baf7f3f833d36b2f2a1edb1cc0b6f2cc4 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 11 Jul 2020 00:12:07 -0400
Subject: csum_partial_copy_nocheck(): drop the last argument

It's always 0.  Note that we theoretically could use ~0U as well -
result will be the same modulo 0xffff, _if_ the damn thing did the
right thing for any value of initial sum; later we'll make use of
that when convenient.

However, unlike csum_and_copy_..._user(), there are instances that
did not work for arbitrary initial sums; c6x is one such.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/alpha/include/asm/checksum.h    | 2 +-
 arch/alpha/lib/csum_partial_copy.c   | 4 ++--
 arch/arm/include/asm/checksum.h      | 2 +-
 arch/arm/lib/csumpartialcopy.S       | 5 +++--
 arch/c6x/include/asm/checksum.h      | 2 +-
 arch/c6x/lib/csum_64plus.S           | 4 +---
 arch/m68k/include/asm/checksum.h     | 3 +--
 arch/m68k/lib/checksum.c             | 3 ++-
 arch/mips/include/asm/checksum.h     | 7 +++++--
 arch/mips/lib/csum_partial.S         | 4 ++--
 arch/powerpc/include/asm/checksum.h  | 4 ++--
 arch/sh/include/asm/checksum_32.h    | 5 ++---
 arch/sparc/include/asm/checksum_32.h | 4 ++--
 arch/sparc/include/asm/checksum_64.h | 8 ++++++--
 arch/sparc/lib/csum_copy.S           | 2 +-
 arch/x86/include/asm/checksum_32.h   | 5 ++---
 arch/x86/include/asm/checksum_64.h   | 3 +--
 arch/x86/lib/csum-wrappers_64.c      | 4 ++--
 arch/xtensa/include/asm/checksum.h   | 5 ++---
 drivers/net/ethernet/3com/typhoon.c  | 3 +--
 include/net/checksum.h               | 4 ++--
 lib/iov_iter.c                       | 2 +-
 net/core/skbuff.c                    | 4 ++--
 net/ipv4/icmp.c                      | 2 +-
 net/ipv4/ip_output.c                 | 2 +-
 net/ipv4/raw.c                       | 2 +-
 net/ipv6/raw.c                       | 2 +-
 27 files changed, 49 insertions(+), 48 deletions(-)

(limited to 'lib')

diff --git a/arch/alpha/include/asm/checksum.h b/arch/alpha/include/asm/checksum.h
index 7e8e4fa4362d..84f9faea864a 100644
--- a/arch/alpha/include/asm/checksum.h
+++ b/arch/alpha/include/asm/checksum.h
@@ -45,7 +45,7 @@ extern __wsum csum_partial(const void *buff, int len, __wsum sum);
 #define _HAVE_ARCH_CSUM_AND_COPY
 __wsum csum_and_copy_from_user(const void __user *src, void *dst, int len, __wsum sum, int *errp);
 
-__wsum csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum);
+__wsum csum_partial_copy_nocheck(const void *src, void *dst, int len);
 
 
 /*
diff --git a/arch/alpha/lib/csum_partial_copy.c b/arch/alpha/lib/csum_partial_copy.c
index af1dad74e933..f363dc89fcbe 100644
--- a/arch/alpha/lib/csum_partial_copy.c
+++ b/arch/alpha/lib/csum_partial_copy.c
@@ -372,13 +372,13 @@ csum_and_copy_from_user(const void __user *src, void *dst, int len,
 EXPORT_SYMBOL(csum_and_copy_from_user);
 
 __wsum
-csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum)
+csum_partial_copy_nocheck(const void *src, void *dst, int len)
 {
 	__wsum checksum;
 	mm_segment_t oldfs = get_fs();
 	set_fs(KERNEL_DS);
 	checksum = csum_and_copy_from_user((__force const void __user *)src,
-						dst, len, sum, NULL);
+						dst, len, 0, NULL);
 	set_fs(oldfs);
 	return checksum;
 }
diff --git a/arch/arm/include/asm/checksum.h b/arch/arm/include/asm/checksum.h
index 53f769508540..7612b2bd4e9b 100644
--- a/arch/arm/include/asm/checksum.h
+++ b/arch/arm/include/asm/checksum.h
@@ -35,7 +35,7 @@ __wsum csum_partial(const void *buff, int len, __wsum sum);
  */
 
 __wsum
-csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum);
+csum_partial_copy_nocheck(const void *src, void *dst, int len);
 
 __wsum
 csum_partial_copy_from_user(const void __user *src, void *dst, int len, __wsum sum, int *err_ptr);
diff --git a/arch/arm/lib/csumpartialcopy.S b/arch/arm/lib/csumpartialcopy.S
index 184d97254a7a..aab914fbc86b 100644
--- a/arch/arm/lib/csumpartialcopy.S
+++ b/arch/arm/lib/csumpartialcopy.S
@@ -9,13 +9,14 @@
 
 		.text
 
-/* Function: __u32 csum_partial_copy_nocheck(const char *src, char *dst, int len, __u32 sum)
- * Params  : r0 = src, r1 = dst, r2 = len, r3 = checksum
+/* Function: __u32 csum_partial_copy_nocheck(const char *src, char *dst, int len)
+ * Params  : r0 = src, r1 = dst, r2 = len
  * Returns : r0 = new checksum
  */
 
 		.macro	save_regs
 		stmfd	sp!, {r1, r4 - r8, lr}
+		mov	r3, #0
 		.endm
 
 		.macro	load_regs
diff --git a/arch/c6x/include/asm/checksum.h b/arch/c6x/include/asm/checksum.h
index fa31bdc186b2..934918def632 100644
--- a/arch/c6x/include/asm/checksum.h
+++ b/arch/c6x/include/asm/checksum.h
@@ -27,7 +27,7 @@ csum_tcpudp_nofold(__be32 saddr, __be32 daddr, __u32 len,
 #define csum_tcpudp_nofold csum_tcpudp_nofold
 
 #define _HAVE_ARCH_CSUM_AND_COPY
-extern __wsum csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum);
+extern __wsum csum_partial_copy_nocheck(const void *src, void *dst, int len);
 
 #include <asm-generic/checksum.h>
 
diff --git a/arch/c6x/lib/csum_64plus.S b/arch/c6x/lib/csum_64plus.S
index 9c07127485d1..57148866d8d3 100644
--- a/arch/c6x/lib/csum_64plus.S
+++ b/arch/c6x/lib/csum_64plus.S
@@ -24,7 +24,6 @@
 ENTRY(csum_partial_copy_nocheck)
 	MVC	.S2	ILC,B30
 
-	MV	.D1X	B6,A31		; given csum
 	ZERO	.D1	A9		; csum (a side)
 ||	ZERO	.D2	B9		; csum (b side)
 ||	SHRU	.S2X	A6,2,B5		; len / 4
@@ -144,8 +143,7 @@ L91:	SHRU	.S2X	A9,16,B4
 	SHRU	.S1	A9,16,A0
    [A0]	BNOP	.S1	L91,5
 
-L10:	ADD	.D1	A31,A9,A9
-	MV	.D1	A9,A4
+L10:	MV	.D1	A9,A4
 
 	BNOP	.S2	B3,4
 	MVC	.S2	B30,ILC
diff --git a/arch/m68k/include/asm/checksum.h b/arch/m68k/include/asm/checksum.h
index ab16881d84cb..d5e74c64b6cd 100644
--- a/arch/m68k/include/asm/checksum.h
+++ b/arch/m68k/include/asm/checksum.h
@@ -38,8 +38,7 @@ extern __wsum csum_and_copy_from_user(const void __user *src,
 						int *csum_err);
 
 extern __wsum csum_partial_copy_nocheck(const void *src,
-					      void *dst, int len,
-					      __wsum sum);
+					      void *dst, int len);
 
 /*
  *	This is a version of ip_fast_csum() optimized for IP headers,
diff --git a/arch/m68k/lib/checksum.c b/arch/m68k/lib/checksum.c
index 31797be9a3dc..86ddd2ee187d 100644
--- a/arch/m68k/lib/checksum.c
+++ b/arch/m68k/lib/checksum.c
@@ -324,9 +324,10 @@ EXPORT_SYMBOL(csum_and_copy_from_user);
  */
 
 __wsum
-csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum)
+csum_partial_copy_nocheck(const void *src, void *dst, int len)
 {
 	unsigned long tmp1, tmp2;
+	__wsum sum = 0;
 	__asm__("movel %2,%4\n\t"
 		"btst #1,%4\n\t"	/* Check alignment */
 		"jeq 2f\n\t"
diff --git a/arch/mips/include/asm/checksum.h b/arch/mips/include/asm/checksum.h
index abdf1086873d..ec3159dbc1b3 100644
--- a/arch/mips/include/asm/checksum.h
+++ b/arch/mips/include/asm/checksum.h
@@ -102,8 +102,11 @@ __wsum csum_and_copy_to_user(const void *src, void __user *dst, int len,
  * we have just one address space, so this is identical to the above)
  */
 #define _HAVE_ARCH_CSUM_AND_COPY
-__wsum csum_partial_copy_nocheck(const void *src, void *dst,
-				       int len, __wsum sum);
+__wsum __csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum);
+static inline __wsum csum_partial_copy_nocheck(const void *src, void *dst, int len)
+{
+	return __csum_partial_copy_nocheck(src, dst, len, 0);
+}
 
 /*
  *	Fold a partial checksum without adding pseudo headers
diff --git a/arch/mips/lib/csum_partial.S b/arch/mips/lib/csum_partial.S
index 87fda0713b84..8d70855b0914 100644
--- a/arch/mips/lib/csum_partial.S
+++ b/arch/mips/lib/csum_partial.S
@@ -462,8 +462,8 @@ EXPORT_SYMBOL(csum_partial)
 	lw	errptr, 16(sp)
 #endif
 	.if \__nocheck == 1
-	FEXPORT(csum_partial_copy_nocheck)
-	EXPORT_SYMBOL(csum_partial_copy_nocheck)
+	FEXPORT(__csum_partial_copy_nocheck)
+	EXPORT_SYMBOL(__csum_partial_copy_nocheck)
 	.endif
 	move	sum, zero
 	move	odd, zero
diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h
index d75fc5bf8f37..64299785f639 100644
--- a/arch/powerpc/include/asm/checksum.h
+++ b/arch/powerpc/include/asm/checksum.h
@@ -30,8 +30,8 @@ extern __wsum csum_and_copy_to_user(const void *src, void __user *dst,
 				    int len, __wsum sum, int *err_ptr);
 
 #define _HAVE_ARCH_CSUM_AND_COPY
-#define csum_partial_copy_nocheck(src, dst, len, sum)   \
-        csum_partial_copy_generic((src), (dst), (len), (sum), NULL, NULL)
+#define csum_partial_copy_nocheck(src, dst, len)   \
+        csum_partial_copy_generic((src), (dst), (len), 0, NULL, NULL)
 
 
 /*
diff --git a/arch/sh/include/asm/checksum_32.h b/arch/sh/include/asm/checksum_32.h
index 87269642d78d..e8bf84d3b843 100644
--- a/arch/sh/include/asm/checksum_32.h
+++ b/arch/sh/include/asm/checksum_32.h
@@ -43,10 +43,9 @@ asmlinkage __wsum csum_partial_copy_generic(const void *src, void *dst,
  *	access_ok().
  */
 static inline
-__wsum csum_partial_copy_nocheck(const void *src, void *dst,
-				 int len, __wsum sum)
+__wsum csum_partial_copy_nocheck(const void *src, void *dst, int len)
 {
-	return csum_partial_copy_generic(src, dst, len, sum, NULL, NULL);
+	return csum_partial_copy_generic(src, dst, len, 0, NULL, NULL);
 }
 
 #define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER
diff --git a/arch/sparc/include/asm/checksum_32.h b/arch/sparc/include/asm/checksum_32.h
index 479a0b812af5..d21d114436ba 100644
--- a/arch/sparc/include/asm/checksum_32.h
+++ b/arch/sparc/include/asm/checksum_32.h
@@ -42,7 +42,7 @@ __wsum csum_partial(const void *buff, int len, __wsum sum);
 unsigned int __csum_partial_copy_sparc_generic (const unsigned char *, unsigned char *);
 
 static inline __wsum
-csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum)
+csum_partial_copy_nocheck(const void *src, void *dst, int len)
 {
 	register unsigned int ret asm("o0") = (unsigned int)src;
 	register char *d asm("o1") = dst;
@@ -52,7 +52,7 @@ csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum)
 		"call __csum_partial_copy_sparc_generic\n\t"
 		" mov %6, %%g7\n"
 	: "=&r" (ret), "=&r" (d), "=&r" (l)
-	: "0" (ret), "1" (d), "2" (l), "r" (sum)
+	: "0" (ret), "1" (d), "2" (l), "r" (0)
 	: "o2", "o3", "o4", "o5", "o7",
 	  "g2", "g3", "g4", "g5", "g7",
 	  "memory", "cc");
diff --git a/arch/sparc/include/asm/checksum_64.h b/arch/sparc/include/asm/checksum_64.h
index 0fa4433f5662..7aebdbe3ac96 100644
--- a/arch/sparc/include/asm/checksum_64.h
+++ b/arch/sparc/include/asm/checksum_64.h
@@ -38,8 +38,12 @@ __wsum csum_partial(const void * buff, int len, __wsum sum);
  * here even more important to align src and dst on a 32-bit (or even
  * better 64-bit) boundary
  */
-__wsum csum_partial_copy_nocheck(const void *src, void *dst,
-				 int len, __wsum sum);
+__wsum __csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum);
+
+static inline __wsum csum_partial_copy_nocheck(const void *src, void *dst, int len)
+{
+	return __csum_partial_copy_nocheck(src, dst, len, 0);
+}
 
 long __csum_partial_copy_from_user(const void __user *src,
 				   void *dst, int len,
diff --git a/arch/sparc/lib/csum_copy.S b/arch/sparc/lib/csum_copy.S
index 26c644ba3ecb..72c900d21b12 100644
--- a/arch/sparc/lib/csum_copy.S
+++ b/arch/sparc/lib/csum_copy.S
@@ -33,7 +33,7 @@
 #endif
 
 #ifndef FUNC_NAME
-#define FUNC_NAME	csum_partial_copy_nocheck
+#define FUNC_NAME	__csum_partial_copy_nocheck
 #endif
 
 	.register	%g2, #scratch
diff --git a/arch/x86/include/asm/checksum_32.h b/arch/x86/include/asm/checksum_32.h
index 11624c8a9d8d..137a3033edcc 100644
--- a/arch/x86/include/asm/checksum_32.h
+++ b/arch/x86/include/asm/checksum_32.h
@@ -38,10 +38,9 @@ asmlinkage __wsum csum_partial_copy_generic(const void *src, void *dst,
  *	If you use these functions directly please don't forget the
  *	access_ok().
  */
-static inline __wsum csum_partial_copy_nocheck(const void *src, void *dst,
-					       int len, __wsum sum)
+static inline __wsum csum_partial_copy_nocheck(const void *src, void *dst, int len)
 {
-	return csum_partial_copy_generic(src, dst, len, sum, NULL, NULL);
+	return csum_partial_copy_generic(src, dst, len, 0, NULL, NULL);
 }
 
 static inline __wsum csum_and_copy_from_user(const void __user *src,
diff --git a/arch/x86/include/asm/checksum_64.h b/arch/x86/include/asm/checksum_64.h
index 0a289b87e872..5339f5dfc776 100644
--- a/arch/x86/include/asm/checksum_64.h
+++ b/arch/x86/include/asm/checksum_64.h
@@ -139,8 +139,7 @@ extern __wsum csum_and_copy_from_user(const void __user *src, void *dst,
 					  int len, __wsum isum, int *errp);
 extern __wsum csum_and_copy_to_user(const void *src, void __user *dst,
 					int len, __wsum isum, int *errp);
-extern __wsum csum_partial_copy_nocheck(const void *src, void *dst,
-					int len, __wsum sum);
+extern __wsum csum_partial_copy_nocheck(const void *src, void *dst, int len);
 
 /**
  * ip_compute_csum - Compute an 16bit IP checksum.
diff --git a/arch/x86/lib/csum-wrappers_64.c b/arch/x86/lib/csum-wrappers_64.c
index ee63d7576fd2..245f929a1c2c 100644
--- a/arch/x86/lib/csum-wrappers_64.c
+++ b/arch/x86/lib/csum-wrappers_64.c
@@ -129,9 +129,9 @@ EXPORT_SYMBOL(csum_and_copy_to_user);
  * Returns an 32bit unfolded checksum of the buffer.
  */
 __wsum
-csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum)
+csum_partial_copy_nocheck(const void *src, void *dst, int len)
 {
-	return csum_partial_copy_generic(src, dst, len, sum, NULL, NULL);
+	return csum_partial_copy_generic(src, dst, len, 0, NULL, NULL);
 }
 EXPORT_SYMBOL(csum_partial_copy_nocheck);
 
diff --git a/arch/xtensa/include/asm/checksum.h b/arch/xtensa/include/asm/checksum.h
index 0c879099977f..dc09448935bf 100644
--- a/arch/xtensa/include/asm/checksum.h
+++ b/arch/xtensa/include/asm/checksum.h
@@ -47,10 +47,9 @@ asmlinkage __wsum csum_partial_copy_generic(const void *src, void *dst,
  *	passed in an incorrect kernel address to one of these functions.
  */
 static inline
-__wsum csum_partial_copy_nocheck(const void *src, void *dst,
-					int len, __wsum sum)
+__wsum csum_partial_copy_nocheck(const void *src, void *dst, int len)
 {
-	return csum_partial_copy_generic(src, dst, len, sum, NULL, NULL);
+	return csum_partial_copy_generic(src, dst, len, 0, NULL, NULL);
 }
 
 #define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER
diff --git a/drivers/net/ethernet/3com/typhoon.c b/drivers/net/ethernet/3com/typhoon.c
index d3b30bacc94e..049cc0158a64 100644
--- a/drivers/net/ethernet/3com/typhoon.c
+++ b/drivers/net/ethernet/3com/typhoon.c
@@ -1419,8 +1419,7 @@ typhoon_download_firmware(struct typhoon *tp)
 			 * the checksum, we can do this once, at the end.
 			 */
 			csum = csum_fold(csum_partial_copy_nocheck(image_data,
-								   dpage, len,
-								   0));
+								   dpage, len));
 
 			iowrite32(len, ioaddr + TYPHOON_REG_BOOT_LENGTH);
 			iowrite32(le16_to_cpu((__force __le16)csum),
diff --git a/include/net/checksum.h b/include/net/checksum.h
index db9d02b5f88a..1029191986e3 100644
--- a/include/net/checksum.h
+++ b/include/net/checksum.h
@@ -49,10 +49,10 @@ static __inline__ __wsum csum_and_copy_to_user
 
 #ifndef _HAVE_ARCH_CSUM_AND_COPY
 static inline __wsum
-csum_partial_copy_nocheck(const void *src, void *dst, int len, __wsum sum)
+csum_partial_copy_nocheck(const void *src, void *dst, int len)
 {
 	memcpy(dst, src, len);
-	return csum_partial(dst, len, sum);
+	return csum_partial(dst, len, 0);
 }
 #endif
 
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 5e40786c8f12..84f3a9156684 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -581,7 +581,7 @@ static size_t copy_pipe_to_iter(const void *addr, size_t bytes,
 static __wsum csum_and_memcpy(void *to, const void *from, size_t len,
 			      __wsum sum, size_t off)
 {
-	__wsum next = csum_partial_copy_nocheck(from, to, len, 0);
+	__wsum next = csum_partial_copy_nocheck(from, to, len);
 	return csum_block_add(sum, next, off);
 }
 
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 6e806da2913e..48dd4757b898 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -2736,7 +2736,7 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
 		if (copy > len)
 			copy = len;
 		csum = csum_partial_copy_nocheck(skb->data + offset, to,
-						 copy, 0);
+						 copy);
 		if ((len -= copy) == 0)
 			return csum;
 		offset += copy;
@@ -2766,7 +2766,7 @@ __wsum skb_copy_and_csum_bits(const struct sk_buff *skb, int offset,
 				vaddr = kmap_atomic(p);
 				csum2 = csum_partial_copy_nocheck(vaddr + p_off,
 								  to + copied,
-								  p_len, 0);
+								  p_len);
 				kunmap_atomic(vaddr);
 				csum = csum_block_add(csum, csum2, pos);
 				pos += p_len;
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index e23dd5fc2e73..bdaaee52c41b 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -381,7 +381,7 @@ static void icmp_push_reply(struct icmp_bxm *icmp_param,
 
 		csum = csum_partial_copy_nocheck((void *)&icmp_param->data,
 						 (char *)icmph,
-						 icmp_param->head_len, 0);
+						 icmp_param->head_len);
 		skb_queue_walk(&sk->sk_write_queue, skb1) {
 			csum = csum_add(csum, skb1->csum);
 		}
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index 5bd059853376..bd79ea3e8c44 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1648,7 +1648,7 @@ static int ip_reply_glue_bits(void *dptr, char *to, int offset,
 {
 	__wsum csum;
 
-	csum = csum_partial_copy_nocheck(dptr+offset, to, len, 0);
+	csum = csum_partial_copy_nocheck(dptr+offset, to, len);
 	skb->csum = csum_block_add(skb->csum, csum, odd);
 	return 0;
 }
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 6fd4330287c2..79392154c30a 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -478,7 +478,7 @@ static int raw_getfrag(void *from, char *to, int offset, int len, int odd,
 			skb->csum = csum_block_add(
 				skb->csum,
 				csum_partial_copy_nocheck(rfv->hdr.c + offset,
-							  to, copy, 0),
+							  to, copy),
 				odd);
 
 		odd = 0;
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 874f01cd7aec..6e4ab80a3b94 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -746,7 +746,7 @@ static int raw6_getfrag(void *from, char *to, int offset, int len, int odd,
 			skb->csum = csum_block_add(
 				skb->csum,
 				csum_partial_copy_nocheck(rfv->c + offset,
-							  to, copy, 0),
+							  to, copy),
 				odd);
 
 		odd = 0;
-- 
cgit v1.2.3


From 99a2c96d52d312b11a943372964226fa134de3b1 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 11 Jul 2020 23:51:04 -0400
Subject: csum_and_copy_..._user(): pass 0xffffffff instead of 0 as initial sum

Preparation for the change of calling conventions; right now all
callers pass 0 as initial sum.  Passing 0xffffffff instead yields
the values comparable mod 0xffff and guarantees that 0 will not
be returned on success.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 lib/iov_iter.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'lib')

diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 84f3a9156684..437fbc14c972 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -1452,7 +1452,7 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
 		int err = 0;
 		next = csum_and_copy_from_user(v.iov_base,
 					       (to += v.iov_len) - v.iov_len,
-					       v.iov_len, 0, &err);
+					       v.iov_len, ~0U, &err);
 		if (!err) {
 			sum = csum_block_add(sum, next, off);
 			off += v.iov_len;
@@ -1494,7 +1494,7 @@ bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum,
 		int err = 0;
 		next = csum_and_copy_from_user(v.iov_base,
 					       (to += v.iov_len) - v.iov_len,
-					       v.iov_len, 0, &err);
+					       v.iov_len, ~0U, &err);
 		if (err)
 			return false;
 		sum = csum_block_add(sum, next, off);
@@ -1540,7 +1540,7 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csump,
 		int err = 0;
 		next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len,
 					     v.iov_base,
-					     v.iov_len, 0, &err);
+					     v.iov_len, ~0U, &err);
 		if (!err) {
 			sum = csum_block_add(sum, next, off);
 			off += v.iov_len;
-- 
cgit v1.2.3


From c693cc4676a055c4126e487b30b0a96ea7ec9936 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 11 Jul 2020 00:27:49 -0400
Subject: saner calling conventions for csum_and_copy_..._user()

All callers of these primitives will
	* discard anything we might've copied in case of error
	* ignore the csum value in case of error
	* always pass 0xffffffff as the initial sum, so the
resulting csum value (in case of success, that is) will never be 0.

That suggest the following calling conventions:
	* don't pass err_ptr - just return 0 on error.
	* don't bother with zeroing destination, etc. in case of error
	* don't pass the initial sum - just use 0xffffffff.

This commit does the minimal conversion in the instances of csum_and_copy_...();
the changes of actual asm code behind them are done later in the series.
Note that this asm code is often shared with csum_partial_copy_nocheck();
the difference is that csum_partial_copy_nocheck() passes 0 for initial
sum while csum_and_copy_..._user() pass 0xffffffff.  Fortunately, we are
free to pass 0xffffffff in all cases and subsequent patches will use that
freedom without any special comments.

A part that could be split off: parisc and uml/i386 claimed to have
csum_and_copy_to_user() instances of their own, but those were identical
to the generic one, so we simply drop them.  Not sure if it's worth
a separate commit...

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/alpha/include/asm/checksum.h    |  2 +-
 arch/alpha/lib/csum_partial_copy.c   | 25 ++++++-------
 arch/arm/include/asm/checksum.h      | 13 ++++---
 arch/m68k/include/asm/checksum.h     |  3 +-
 arch/m68k/lib/checksum.c             |  8 ++---
 arch/mips/include/asm/checksum.h     | 46 ++++++++++++------------
 arch/parisc/include/asm/checksum.h   | 20 -----------
 arch/powerpc/include/asm/checksum.h  |  4 +--
 arch/powerpc/lib/checksum_wrappers.c | 68 +++++++++++-------------------------
 arch/sh/include/asm/checksum_32.h    | 36 +++++++++----------
 arch/sparc/include/asm/checksum_32.h | 65 ++++++++++++++++------------------
 arch/sparc/include/asm/checksum_64.h | 14 ++++----
 arch/x86/include/asm/checksum_32.h   | 35 ++++++++-----------
 arch/x86/include/asm/checksum_64.h   |  6 ++--
 arch/x86/lib/csum-wrappers_64.c      | 38 +++++++++-----------
 arch/x86/um/asm/checksum_32.h        | 23 ------------
 arch/xtensa/include/asm/checksum.h   | 30 ++++++++--------
 include/net/checksum.h               | 15 ++++----
 lib/iov_iter.c                       | 19 +++++-----
 19 files changed, 183 insertions(+), 287 deletions(-)

(limited to 'lib')

diff --git a/arch/alpha/include/asm/checksum.h b/arch/alpha/include/asm/checksum.h
index 84f9faea864a..99d631e146b2 100644
--- a/arch/alpha/include/asm/checksum.h
+++ b/arch/alpha/include/asm/checksum.h
@@ -43,7 +43,7 @@ extern __wsum csum_partial(const void *buff, int len, __wsum sum);
  */
 #define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER
 #define _HAVE_ARCH_CSUM_AND_COPY
-__wsum csum_and_copy_from_user(const void __user *src, void *dst, int len, __wsum sum, int *errp);
+__wsum csum_and_copy_from_user(const void __user *src, void *dst, int len);
 
 __wsum csum_partial_copy_nocheck(const void *src, void *dst, int len);
 
diff --git a/arch/alpha/lib/csum_partial_copy.c b/arch/alpha/lib/csum_partial_copy.c
index f363dc89fcbe..3c0e89c39ddb 100644
--- a/arch/alpha/lib/csum_partial_copy.c
+++ b/arch/alpha/lib/csum_partial_copy.c
@@ -325,30 +325,27 @@ csum_partial_cfu_unaligned(const unsigned long __user * src,
 }
 
 __wsum
-csum_and_copy_from_user(const void __user *src, void *dst, int len,
-			       __wsum sum, int *errp)
+csum_and_copy_from_user(const void __user *src, void *dst, int len)
 {
-	unsigned long checksum = (__force u32) sum;
+	unsigned long checksum = ~0U;
 	unsigned long soff = 7 & (unsigned long) src;
 	unsigned long doff = 7 & (unsigned long) dst;
+	int err = 0;
 
 	if (len) {
-		if (!access_ok(src, len)) {
-			if (errp) *errp = -EFAULT;
-			memset(dst, 0, len);
-			return sum;
-		}
+		if (!access_ok(src, len))
+			return 0;
 		if (!doff) {
 			if (!soff)
 				checksum = csum_partial_cfu_aligned(
 					(const unsigned long __user *) src,
 					(unsigned long *) dst,
-					len-8, checksum, errp);
+					len-8, checksum, &err);
 			else
 				checksum = csum_partial_cfu_dest_aligned(
 					(const unsigned long __user *) src,
 					(unsigned long *) dst,
-					soff, len-8, checksum, errp);
+					soff, len-8, checksum, &err);
 		} else {
 			unsigned long partial_dest;
 			ldq_u(partial_dest, dst);
@@ -357,15 +354,15 @@ csum_and_copy_from_user(const void __user *src, void *dst, int len,
 					(const unsigned long __user *) src,
 					(unsigned long *) dst,
 					doff, len-8, checksum,
-					partial_dest, errp);
+					partial_dest, &err);
 			else
 				checksum = csum_partial_cfu_unaligned(
 					(const unsigned long __user *) src,
 					(unsigned long *) dst,
 					soff, doff, len-8, checksum,
-					partial_dest, errp);
+					partial_dest, &err);
 		}
-		checksum = from64to16 (checksum);
+		checksum = err ? 0 : from64to16 (checksum);
 	}
 	return (__force __wsum)checksum;
 }
@@ -378,7 +375,7 @@ csum_partial_copy_nocheck(const void *src, void *dst, int len)
 	mm_segment_t oldfs = get_fs();
 	set_fs(KERNEL_DS);
 	checksum = csum_and_copy_from_user((__force const void __user *)src,
-						dst, len, 0, NULL);
+						dst, len);
 	set_fs(oldfs);
 	return checksum;
 }
diff --git a/arch/arm/include/asm/checksum.h b/arch/arm/include/asm/checksum.h
index 7612b2bd4e9b..1601c132b064 100644
--- a/arch/arm/include/asm/checksum.h
+++ b/arch/arm/include/asm/checksum.h
@@ -43,16 +43,15 @@ csum_partial_copy_from_user(const void __user *src, void *dst, int len, __wsum s
 #define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER
 #define _HAVE_ARCH_CSUM_AND_COPY
 static inline
-__wsum csum_and_copy_from_user (const void __user *src, void *dst,
-				      int len, __wsum sum, int *err_ptr)
+__wsum csum_and_copy_from_user(const void __user *src, void *dst, int len)
 {
-	if (access_ok(src, len))
-		return csum_partial_copy_from_user(src, dst, len, sum, err_ptr);
+	int err = 0;
 
-	if (len)
-		*err_ptr = -EFAULT;
+	if (!access_ok(src, len))
+		return 0;
 
-	return sum;
+	sum = csum_partial_copy_from_user(src, dst, len, ~0U, &err);
+	return err ? 0 : sum;
 }
 
 /*
diff --git a/arch/m68k/include/asm/checksum.h b/arch/m68k/include/asm/checksum.h
index d5e74c64b6cd..692e7b6cc042 100644
--- a/arch/m68k/include/asm/checksum.h
+++ b/arch/m68k/include/asm/checksum.h
@@ -34,8 +34,7 @@ __wsum csum_partial(const void *buff, int len, __wsum sum);
 #define _HAVE_ARCH_CSUM_AND_COPY
 extern __wsum csum_and_copy_from_user(const void __user *src,
 						void *dst,
-						int len, __wsum sum,
-						int *csum_err);
+						int len);
 
 extern __wsum csum_partial_copy_nocheck(const void *src,
 					      void *dst, int len);
diff --git a/arch/m68k/lib/checksum.c b/arch/m68k/lib/checksum.c
index 86ddd2ee187d..3aeca261f622 100644
--- a/arch/m68k/lib/checksum.c
+++ b/arch/m68k/lib/checksum.c
@@ -129,8 +129,7 @@ EXPORT_SYMBOL(csum_partial);
  */
 
 __wsum
-csum_and_copy_from_user(const void __user *src, void *dst,
-			    int len, __wsum sum, int *csum_err)
+csum_and_copy_from_user(const void __user *src, void *dst, int len)
 {
 	/*
 	 * GCC doesn't like more than 10 operands for the asm
@@ -138,6 +137,7 @@ csum_and_copy_from_user(const void __user *src, void *dst,
 	 * code.
 	 */
 	unsigned long tmp1, tmp2;
+	__wsum sum = ~0U;
 
 	__asm__("movel %2,%4\n\t"
 		"btst #1,%4\n\t"	/* Check alignment */
@@ -311,9 +311,7 @@ csum_and_copy_from_user(const void __user *src, void *dst,
 		: "0" (sum), "1" (len), "2" (src), "3" (dst)
 	    );
 
-	*csum_err = tmp2;
-
-	return(sum);
+	return tmp2 ? 0 : sum;
 }
 
 EXPORT_SYMBOL(csum_and_copy_from_user);
diff --git a/arch/mips/include/asm/checksum.h b/arch/mips/include/asm/checksum.h
index ec3159dbc1b3..9dfce3522760 100644
--- a/arch/mips/include/asm/checksum.h
+++ b/arch/mips/include/asm/checksum.h
@@ -60,16 +60,15 @@ __wsum csum_partial_copy_from_user(const void __user *src, void *dst, int len,
 
 #define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER
 static inline
-__wsum csum_and_copy_from_user(const void __user *src, void *dst,
-			       int len, __wsum sum, int *err_ptr)
+__wsum csum_and_copy_from_user(const void __user *src, void *dst, int len)
 {
-	if (access_ok(src, len))
-		return csum_partial_copy_from_user(src, dst, len, sum,
-						   err_ptr);
-	if (len)
-		*err_ptr = -EFAULT;
+	__wsum sum = ~0U;
+	int err = 0;
 
-	return sum;
+	if (!access_ok(src, len))
+		return 0;
+	sum = csum_partial_copy_from_user(src, dst, len, sum, &err);
+	return err ? 0 : sum;
 }
 
 /*
@@ -77,24 +76,23 @@ __wsum csum_and_copy_from_user(const void __user *src, void *dst,
  */
 #define HAVE_CSUM_COPY_USER
 static inline
-__wsum csum_and_copy_to_user(const void *src, void __user *dst, int len,
-			     __wsum sum, int *err_ptr)
+__wsum csum_and_copy_to_user(const void *src, void __user *dst, int len)
 {
+	int err = 0;
+	__wsum sum = ~0U;
+
 	might_fault();
-	if (access_ok(dst, len)) {
-		if (uaccess_kernel())
-			return __csum_partial_copy_kernel(src,
-							  (__force void *)dst,
-							  len, sum, err_ptr);
-		else
-			return __csum_partial_copy_to_user(src,
-							   (__force void *)dst,
-							   len, sum, err_ptr);
-	}
-	if (len)
-		*err_ptr = -EFAULT;
-
-	return (__force __wsum)-1; /* invalid checksum */
+	if (!access_ok(dst, len))
+		return 0;
+	if (uaccess_kernel())
+		sum = __csum_partial_copy_kernel(src,
+						  (__force void *)dst,
+						  len, sum, &err);
+	else
+		sum = __csum_partial_copy_to_user(src,
+						   (__force void *)dst,
+						   len, sum, &err);
+	return err ? 0 : sum;
 }
 
 /*
diff --git a/arch/parisc/include/asm/checksum.h b/arch/parisc/include/asm/checksum.h
index 522cd574c068..3c43baca7b39 100644
--- a/arch/parisc/include/asm/checksum.h
+++ b/arch/parisc/include/asm/checksum.h
@@ -173,25 +173,5 @@ static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
 	return csum_fold(sum);
 }
 
-/* 
- *	Copy and checksum to user
- */
-#define HAVE_CSUM_COPY_USER
-static __inline__ __wsum csum_and_copy_to_user(const void *src,
-						      void __user *dst,
-						      int len, __wsum sum,
-						      int *err_ptr)
-{
-	/* code stolen from include/asm-mips64 */
-	sum = csum_partial(src, len, sum);
-	 
-	if (copy_to_user(dst, src, len)) {
-		*err_ptr = -EFAULT;
-		return (__force __wsum)-1;
-	}
-
-	return sum;
-}
-
 #endif
 
diff --git a/arch/powerpc/include/asm/checksum.h b/arch/powerpc/include/asm/checksum.h
index 64299785f639..dba685d984c0 100644
--- a/arch/powerpc/include/asm/checksum.h
+++ b/arch/powerpc/include/asm/checksum.h
@@ -24,10 +24,10 @@ extern __wsum csum_partial_copy_generic(const void *src, void *dst,
 
 #define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER
 extern __wsum csum_and_copy_from_user(const void __user *src, void *dst,
-				      int len, __wsum sum, int *err_ptr);
+				      int len);
 #define HAVE_CSUM_COPY_USER
 extern __wsum csum_and_copy_to_user(const void *src, void __user *dst,
-				    int len, __wsum sum, int *err_ptr);
+				    int len);
 
 #define _HAVE_ARCH_CSUM_AND_COPY
 #define csum_partial_copy_nocheck(src, dst, len)   \
diff --git a/arch/powerpc/lib/checksum_wrappers.c b/arch/powerpc/lib/checksum_wrappers.c
index fabe4db28726..b1faa82dd8af 100644
--- a/arch/powerpc/lib/checksum_wrappers.c
+++ b/arch/powerpc/lib/checksum_wrappers.c
@@ -12,82 +12,56 @@
 #include <linux/uaccess.h>
 
 __wsum csum_and_copy_from_user(const void __user *src, void *dst,
-			       int len, __wsum sum, int *err_ptr)
+			       int len)
 {
 	unsigned int csum;
+	int err = 0;
 
 	might_sleep();
-	allow_read_from_user(src, len);
-
-	*err_ptr = 0;
 
-	if (!len) {
-		csum = 0;
-		goto out;
-	}
+	if (unlikely(!access_ok(src, len)))
+		return 0;
 
-	if (unlikely((len < 0) || !access_ok(src, len))) {
-		*err_ptr = -EFAULT;
-		csum = (__force unsigned int)sum;
-		goto out;
-	}
+	allow_read_from_user(src, len);
 
 	csum = csum_partial_copy_generic((void __force *)src, dst,
-					 len, sum, err_ptr, NULL);
+					 len, ~0U, &err, NULL);
 
-	if (unlikely(*err_ptr)) {
+	if (unlikely(err)) {
 		int missing = __copy_from_user(dst, src, len);
 
-		if (missing) {
-			memset(dst + len - missing, 0, missing);
-			*err_ptr = -EFAULT;
-		} else {
-			*err_ptr = 0;
-		}
-
-		csum = csum_partial(dst, len, sum);
+		if (missing)
+			csum = 0;
+		else
+			csum = csum_partial(dst, len, ~0U);
 	}
 
-out:
 	prevent_read_from_user(src, len);
 	return (__force __wsum)csum;
 }
 EXPORT_SYMBOL(csum_and_copy_from_user);
 
-__wsum csum_and_copy_to_user(const void *src, void __user *dst, int len,
-			     __wsum sum, int *err_ptr)
+__wsum csum_and_copy_to_user(const void *src, void __user *dst, int len)
 {
 	unsigned int csum;
+	int err = 0;
 
 	might_sleep();
-	allow_write_to_user(dst, len);
-
-	*err_ptr = 0;
-
-	if (!len) {
-		csum = 0;
-		goto out;
-	}
+	if (unlikely(!access_ok(dst, len)))
+		return 0;
 
-	if (unlikely((len < 0) || !access_ok(dst, len))) {
-		*err_ptr = -EFAULT;
-		csum = -1; /* invalid checksum */
-		goto out;
-	}
+	allow_write_to_user(dst, len);
 
 	csum = csum_partial_copy_generic(src, (void __force *)dst,
-					 len, sum, NULL, err_ptr);
+					 len, ~0U, NULL, &err);
 
-	if (unlikely(*err_ptr)) {
-		csum = csum_partial(src, len, sum);
+	if (unlikely(err)) {
+		csum = csum_partial(src, len, ~0U);
 
-		if (copy_to_user(dst, src, len)) {
-			*err_ptr = -EFAULT;
-			csum = -1; /* invalid checksum */
-		}
+		if (copy_to_user(dst, src, len))
+			csum = 0;
 	}
 
-out:
 	prevent_write_to_user(dst, len);
 	return (__force __wsum)csum;
 }
diff --git a/arch/sh/include/asm/checksum_32.h b/arch/sh/include/asm/checksum_32.h
index e8bf84d3b843..a08e8eb924ed 100644
--- a/arch/sh/include/asm/checksum_32.h
+++ b/arch/sh/include/asm/checksum_32.h
@@ -50,15 +50,16 @@ __wsum csum_partial_copy_nocheck(const void *src, void *dst, int len)
 
 #define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER
 static inline
-__wsum csum_and_copy_from_user(const void __user *src, void *dst,
-				   int len, __wsum sum, int *err_ptr)
+__wsum csum_and_copy_from_user(const void __user *src, void *dst, int len)
 {
-	if (access_ok(src, len))
-		return csum_partial_copy_generic((__force const void *)src, dst,
-					len, sum, err_ptr, NULL);
-	if (len)
-		*err_ptr = -EFAULT;
-	return sum;
+	int err = 0;
+	__wsum sum = ~0U;
+
+	if (!access_ok(src, len))
+		return 0;
+	sum = csum_partial_copy_generic((__force const void *)src, dst,
+					len, sum, &err, NULL);
+	return err ? 0 : sum;
 }
 
 /*
@@ -199,16 +200,15 @@ static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
 #define HAVE_CSUM_COPY_USER
 static inline __wsum csum_and_copy_to_user(const void *src,
 					   void __user *dst,
-					   int len, __wsum sum,
-					   int *err_ptr)
+					   int len)
 {
-	if (access_ok(dst, len))
-		return csum_partial_copy_generic((__force const void *)src,
-						dst, len, sum, NULL, err_ptr);
-
-	if (len)
-		*err_ptr = -EFAULT;
-
-	return (__force __wsum)-1; /* invalid checksum */
+	int err = 0;
+	__wsum sum = ~0U;
+
+	if (!access_ok(dst, len))
+		return 0;
+	sum = csum_partial_copy_generic((__force const void *)src,
+						dst, len, sum, NULL, &err);
+	return err ? 0 : sum;
 }
 #endif /* __ASM_SH_CHECKSUM_H */
diff --git a/arch/sparc/include/asm/checksum_32.h b/arch/sparc/include/asm/checksum_32.h
index d21d114436ba..b5873b7b7bf0 100644
--- a/arch/sparc/include/asm/checksum_32.h
+++ b/arch/sparc/include/asm/checksum_32.h
@@ -60,19 +60,16 @@ csum_partial_copy_nocheck(const void *src, void *dst, int len)
 }
 
 static inline __wsum
-csum_and_copy_from_user(const void __user *src, void *dst, int len,
-			    __wsum sum, int *err)
+csum_and_copy_from_user(const void __user *src, void *dst, int len)
   {
 	register unsigned long ret asm("o0") = (unsigned long)src;
 	register char *d asm("o1") = dst;
 	register int l asm("g1") = len;
-	register __wsum s asm("g7") = sum;
+	register __wsum s asm("g7") = ~0U;
+	int err = 0;
 
-	if (unlikely(!access_ok(src, len))) {
-		if (len)
-			*err = -EFAULT;
-		return sum;
-	}
+	if (unlikely(!access_ok(src, len)))
+		return 0;
 
 	__asm__ __volatile__ (
 	".section __ex_table,#alloc\n\t"
@@ -83,42 +80,40 @@ csum_and_copy_from_user(const void __user *src, void *dst, int len,
 	"call __csum_partial_copy_sparc_generic\n\t"
 	" st %8, [%%sp + 64]\n"
 	: "=&r" (ret), "=&r" (d), "=&r" (l), "=&r" (s)
-	: "0" (ret), "1" (d), "2" (l), "3" (s), "r" (err)
+	: "0" (ret), "1" (d), "2" (l), "3" (s), "r" (&err)
 	: "o2", "o3", "o4", "o5", "o7", "g2", "g3", "g4", "g5",
 	  "cc", "memory");
-	return (__force __wsum)ret;
+	return err ? 0 : (__force __wsum)ret;
 }
 
 #define HAVE_CSUM_COPY_USER
 
 static inline __wsum
-csum_and_copy_to_user(const void *src, void __user *dst, int len,
-			  __wsum sum, int *err)
+csum_and_copy_to_user(const void *src, void __user *dst, int len)
 {
-	if (!access_ok(dst, len)) {
-		*err = -EFAULT;
-		return sum;
-	} else {
-		register unsigned long ret asm("o0") = (unsigned long)src;
-		register char __user *d asm("o1") = dst;
-		register int l asm("g1") = len;
-		register __wsum s asm("g7") = sum;
+	register unsigned long ret asm("o0") = (unsigned long)src;
+	register char __user *d asm("o1") = dst;
+	register int l asm("g1") = len;
+	register __wsum s asm("g7") = ~0U;
+	int err = 0;
 
-		__asm__ __volatile__ (
-		".section __ex_table,#alloc\n\t"
-		".align 4\n\t"
-		".word 1f,1\n\t"
-		".previous\n"
-		"1:\n\t"
-		"call __csum_partial_copy_sparc_generic\n\t"
-		" st %8, [%%sp + 64]\n"
-		: "=&r" (ret), "=&r" (d), "=&r" (l), "=&r" (s)
-		: "0" (ret), "1" (d), "2" (l), "3" (s), "r" (err)
-		: "o2", "o3", "o4", "o5", "o7",
-		  "g2", "g3", "g4", "g5",
-		  "cc", "memory");
-		return (__force __wsum)ret;
-	}
+	if (!access_ok(dst, len))
+		return 0;
+
+	__asm__ __volatile__ (
+	".section __ex_table,#alloc\n\t"
+	".align 4\n\t"
+	".word 1f,1\n\t"
+	".previous\n"
+	"1:\n\t"
+	"call __csum_partial_copy_sparc_generic\n\t"
+	" st %8, [%%sp + 64]\n"
+	: "=&r" (ret), "=&r" (d), "=&r" (l), "=&r" (s)
+	: "0" (ret), "1" (d), "2" (l), "3" (s), "r" (&err)
+	: "o2", "o3", "o4", "o5", "o7",
+	  "g2", "g3", "g4", "g5",
+	  "cc", "memory");
+	return err ? 0 : (__force __wsum)ret;
 }
 
 /* ihl is always 5 or greater, almost always is 5, and iph is word aligned
diff --git a/arch/sparc/include/asm/checksum_64.h b/arch/sparc/include/asm/checksum_64.h
index 7aebdbe3ac96..4d0bbff43e62 100644
--- a/arch/sparc/include/asm/checksum_64.h
+++ b/arch/sparc/include/asm/checksum_64.h
@@ -51,12 +51,11 @@ long __csum_partial_copy_from_user(const void __user *src,
 
 static inline __wsum
 csum_and_copy_from_user(const void __user *src,
-			    void *dst, int len,
-			    __wsum sum, int *err)
+			    void *dst, int len)
 {
-	long ret = __csum_partial_copy_from_user(src, dst, len, sum);
+	long ret = __csum_partial_copy_from_user(src, dst, len, ~0U);
 	if (ret < 0)
-		*err = -EFAULT;
+		return 0;
 	return (__force __wsum) ret;
 }
 
@@ -70,12 +69,11 @@ long __csum_partial_copy_to_user(const void *src,
 
 static inline __wsum
 csum_and_copy_to_user(const void *src,
-		      void __user *dst, int len,
-		      __wsum sum, int *err)
+		      void __user *dst, int len)
 {
-	long ret = __csum_partial_copy_to_user(src, dst, len, sum);
+	long ret = __csum_partial_copy_to_user(src, dst, len, ~0U);
 	if (ret < 0)
-		*err = -EFAULT;
+		return 0;
 	return (__force __wsum) ret;
 }
 
diff --git a/arch/x86/include/asm/checksum_32.h b/arch/x86/include/asm/checksum_32.h
index 137a3033edcc..5948cde9e4ad 100644
--- a/arch/x86/include/asm/checksum_32.h
+++ b/arch/x86/include/asm/checksum_32.h
@@ -44,22 +44,19 @@ static inline __wsum csum_partial_copy_nocheck(const void *src, void *dst, int l
 }
 
 static inline __wsum csum_and_copy_from_user(const void __user *src,
-					     void *dst, int len,
-					     __wsum sum, int *err_ptr)
+					     void *dst, int len)
 {
 	__wsum ret;
+	int err = 0;
 
 	might_sleep();
-	if (!user_access_begin(src, len)) {
-		if (len)
-			*err_ptr = -EFAULT;
-		return sum;
-	}
+	if (!user_access_begin(src, len))
+		return 0;
 	ret = csum_partial_copy_generic((__force void *)src, dst,
-					len, sum, err_ptr, NULL);
+					len, ~0U, &err, NULL);
 	user_access_end();
 
-	return ret;
+	return err ? 0 : ret;
 }
 
 /*
@@ -177,23 +174,19 @@ static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
  */
 static inline __wsum csum_and_copy_to_user(const void *src,
 					   void __user *dst,
-					   int len, __wsum sum,
-					   int *err_ptr)
+					   int len)
 {
 	__wsum ret;
+	int err = 0;
 
 	might_sleep();
-	if (user_access_begin(dst, len)) {
-		ret = csum_partial_copy_generic(src, (__force void *)dst,
-						len, sum, NULL, err_ptr);
-		user_access_end();
-		return ret;
-	}
+	if (!user_access_begin(dst, len))
+		return 0;
 
-	if (len)
-		*err_ptr = -EFAULT;
-
-	return (__force __wsum)-1; /* invalid checksum */
+	ret = csum_partial_copy_generic(src, (__force void *)dst,
+					len, ~0U, NULL, &err);
+	user_access_end();
+	return err ? 0 : ret;
 }
 
 #endif /* _ASM_X86_CHECKSUM_32_H */
diff --git a/arch/x86/include/asm/checksum_64.h b/arch/x86/include/asm/checksum_64.h
index 5339f5dfc776..9af3aed54c6b 100644
--- a/arch/x86/include/asm/checksum_64.h
+++ b/arch/x86/include/asm/checksum_64.h
@@ -135,10 +135,8 @@ extern __visible __wsum csum_partial_copy_generic(const void *src, const void *d
 					int *src_err_ptr, int *dst_err_ptr);
 
 
-extern __wsum csum_and_copy_from_user(const void __user *src, void *dst,
-					  int len, __wsum isum, int *errp);
-extern __wsum csum_and_copy_to_user(const void *src, void __user *dst,
-					int len, __wsum isum, int *errp);
+extern __wsum csum_and_copy_from_user(const void __user *src, void *dst, int len);
+extern __wsum csum_and_copy_to_user(const void *src, void __user *dst, int len);
 extern __wsum csum_partial_copy_nocheck(const void *src, void *dst, int len);
 
 /**
diff --git a/arch/x86/lib/csum-wrappers_64.c b/arch/x86/lib/csum-wrappers_64.c
index 245f929a1c2c..ae2fb87e2274 100644
--- a/arch/x86/lib/csum-wrappers_64.c
+++ b/arch/x86/lib/csum-wrappers_64.c
@@ -22,13 +22,15 @@
  */
 __wsum
 csum_and_copy_from_user(const void __user *src, void *dst,
-			    int len, __wsum isum, int *errp)
+			    int len)
 {
+	int err = 0;
+	__wsum isum = ~0U;
+
 	might_sleep();
-	*errp = 0;
 
 	if (!user_access_begin(src, len))
-		goto out_err;
+		return 0;
 
 	/*
 	 * Why 6, not 7? To handle odd addresses aligned we
@@ -53,20 +55,15 @@ csum_and_copy_from_user(const void __user *src, void *dst,
 		}
 	}
 	isum = csum_partial_copy_generic((__force const void *)src,
-				dst, len, isum, errp, NULL);
+				dst, len, isum, &err, NULL);
 	user_access_end();
-	if (unlikely(*errp))
-		goto out_err;
-
+	if (unlikely(err))
+		isum = 0;
 	return isum;
 
 out:
 	user_access_end();
-out_err:
-	*errp = -EFAULT;
-	memset(dst, 0, len);
-
-	return isum;
+	return 0;
 }
 EXPORT_SYMBOL(csum_and_copy_from_user);
 
@@ -83,16 +80,15 @@ EXPORT_SYMBOL(csum_and_copy_from_user);
  */
 __wsum
 csum_and_copy_to_user(const void *src, void __user *dst,
-			  int len, __wsum isum, int *errp)
+			  int len)
 {
-	__wsum ret;
+	__wsum ret, isum = ~0U;
+	int err = 0;
 
 	might_sleep();
 
-	if (!user_access_begin(dst, len)) {
-		*errp = -EFAULT;
+	if (!user_access_begin(dst, len))
 		return 0;
-	}
 
 	if (unlikely((unsigned long)dst & 6)) {
 		while (((unsigned long)dst & 6) && len >= 2) {
@@ -107,15 +103,13 @@ csum_and_copy_to_user(const void *src, void __user *dst,
 		}
 	}
 
-	*errp = 0;
 	ret = csum_partial_copy_generic(src, (void __force *)dst,
-					len, isum, NULL, errp);
+					len, isum, NULL, &err);
 	user_access_end();
-	return ret;
+	return err ? 0 : ret;
 out:
 	user_access_end();
-	*errp = -EFAULT;
-	return isum;
+	return 0;
 }
 EXPORT_SYMBOL(csum_and_copy_to_user);
 
diff --git a/arch/x86/um/asm/checksum_32.h b/arch/x86/um/asm/checksum_32.h
index b9ac7c9eb72c..0b13c2947ad1 100644
--- a/arch/x86/um/asm/checksum_32.h
+++ b/arch/x86/um/asm/checksum_32.h
@@ -35,27 +35,4 @@ static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
 	return csum_fold(sum);
 }
 
-/*
- *	Copy and checksum to user
- */
-#define HAVE_CSUM_COPY_USER
-static __inline__ __wsum csum_and_copy_to_user(const void *src,
-						     void __user *dst,
-						     int len, __wsum sum, int *err_ptr)
-{
-	if (access_ok(dst, len)) {
-		if (copy_to_user(dst, src, len)) {
-			*err_ptr = -EFAULT;
-			return (__force __wsum)-1;
-		}
-
-		return csum_partial(src, len, sum);
-	}
-
-	if (len)
-		*err_ptr = -EFAULT;
-
-	return (__force __wsum)-1; /* invalid checksum */
-}
-
 #endif
diff --git a/arch/xtensa/include/asm/checksum.h b/arch/xtensa/include/asm/checksum.h
index dc09448935bf..fe78fba7bd64 100644
--- a/arch/xtensa/include/asm/checksum.h
+++ b/arch/xtensa/include/asm/checksum.h
@@ -55,14 +55,16 @@ __wsum csum_partial_copy_nocheck(const void *src, void *dst, int len)
 #define _HAVE_ARCH_COPY_AND_CSUM_FROM_USER
 static inline
 __wsum csum_and_copy_from_user(const void __user *src, void *dst,
-				   int len, __wsum sum, int *err_ptr)
+				   int len)
 {
-	if (access_ok(src, len))
-		return csum_partial_copy_generic((__force const void *)src, dst,
-					len, sum, err_ptr, NULL);
-	if (len)
-		*err_ptr = -EFAULT;
-	return sum;
+	int err = 0;
+
+	if (!access_ok(src, len))
+		return 0;
+
+	sum = csum_partial_copy_generic((__force const void *)src, dst,
+					len, ~0U, &err, NULL);
+	return err ? 0 : sum;
 }
 
 /*
@@ -243,15 +245,15 @@ static __inline__ __sum16 csum_ipv6_magic(const struct in6_addr *saddr,
  */
 #define HAVE_CSUM_COPY_USER
 static __inline__ __wsum csum_and_copy_to_user(const void *src,
-					       void __user *dst, int len,
-					       __wsum sum, int *err_ptr)
+					       void __user *dst, int len)
 {
-	if (access_ok(dst, len))
-		return csum_partial_copy_generic(src,dst,len,sum,NULL,err_ptr);
+	int err = 0;
+	__wsum sum = ~0U;
 
-	if (len)
-		*err_ptr = -EFAULT;
+	if (!access_ok(dst, len))
+		return 0;
 
-	return (__force __wsum)-1; /* invalid checksum */
+	sum = csum_partial_copy_generic(src,dst,len,sum,NULL,&err);
+	return err ? 0 : sum;
 }
 #endif
diff --git a/include/net/checksum.h b/include/net/checksum.h
index 1029191986e3..0d05b9e8690b 100644
--- a/include/net/checksum.h
+++ b/include/net/checksum.h
@@ -24,26 +24,23 @@
 #ifndef _HAVE_ARCH_COPY_AND_CSUM_FROM_USER
 static inline
 __wsum csum_and_copy_from_user (const void __user *src, void *dst,
-				      int len, __wsum sum, int *err_ptr)
+				      int len)
 {
 	if (copy_from_user(dst, src, len))
-		*err_ptr = -EFAULT;
-	return csum_partial(dst, len, sum);
+		return 0;
+	return csum_partial(dst, len, ~0U);
 }
 #endif
 
 #ifndef HAVE_CSUM_COPY_USER
 static __inline__ __wsum csum_and_copy_to_user
-(const void *src, void __user *dst, int len, __wsum sum, int *err_ptr)
+(const void *src, void __user *dst, int len)
 {
-	sum = csum_partial(src, len, sum);
+	__wsum sum = csum_partial(src, len, ~0U);
 
 	if (copy_to_user(dst, src, len) == 0)
 		return sum;
-	if (len)
-		*err_ptr = -EFAULT;
-
-	return (__force __wsum)-1; /* invalid checksum */
+	return 0;
 }
 #endif
 
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 437fbc14c972..ccb247faf79b 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -1449,15 +1449,14 @@ size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum,
 		return 0;
 	}
 	iterate_and_advance(i, bytes, v, ({
-		int err = 0;
 		next = csum_and_copy_from_user(v.iov_base,
 					       (to += v.iov_len) - v.iov_len,
-					       v.iov_len, ~0U, &err);
-		if (!err) {
+					       v.iov_len);
+		if (next) {
 			sum = csum_block_add(sum, next, off);
 			off += v.iov_len;
 		}
-		err ? v.iov_len : 0;
+		next ? 0 : v.iov_len;
 	}), ({
 		char *p = kmap_atomic(v.bv_page);
 		sum = csum_and_memcpy((to += v.bv_len) - v.bv_len,
@@ -1491,11 +1490,10 @@ bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum,
 	if (unlikely(i->count < bytes))
 		return false;
 	iterate_all_kinds(i, bytes, v, ({
-		int err = 0;
 		next = csum_and_copy_from_user(v.iov_base,
 					       (to += v.iov_len) - v.iov_len,
-					       v.iov_len, ~0U, &err);
-		if (err)
+					       v.iov_len);
+		if (!next)
 			return false;
 		sum = csum_block_add(sum, next, off);
 		off += v.iov_len;
@@ -1537,15 +1535,14 @@ size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csump,
 		return 0;
 	}
 	iterate_and_advance(i, bytes, v, ({
-		int err = 0;
 		next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len,
 					     v.iov_base,
-					     v.iov_len, ~0U, &err);
-		if (!err) {
+					     v.iov_len);
+		if (next) {
 			sum = csum_block_add(sum, next, off);
 			off += v.iov_len;
 		}
-		err ? v.iov_len : 0;
+		next ? 0 : v.iov_len;
 	}), ({
 		char *p = kmap_atomic(v.bv_page);
 		sum = csum_and_memcpy(p + v.bv_offset,
-- 
cgit v1.2.3


From 160c7ba34605d9b59ee406a1b4a61b0f942b1ae9 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Wed, 8 Jul 2020 16:25:43 -0700
Subject: lib: Add backtrace_idle parameter to force backtrace of idle CPUs

Currently, the nmi_cpu_backtrace() declines to produce backtraces for
idle CPUs.  This is a good choice in the common case in which problems are
caused only by non-idle CPUs.  However, there are occasionally situations
in which idle CPUs are helping to cause problems.  This commit therefore
adds an nmi_backtrace.backtrace_idle kernel boot parameter that causes
nmi_cpu_backtrace() to dump stacks even of idle CPUs.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: <linux-doc@vger.kernel.org>
---
 Documentation/admin-guide/kernel-parameters.txt | 4 ++++
 lib/nmi_backtrace.c                             | 6 +++++-
 2 files changed, 9 insertions(+), 1 deletion(-)

(limited to 'lib')

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index bdc1f33fd3d1..5e6d19182c5f 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -3073,6 +3073,10 @@
 			and gids from such clients.  This is intended to ease
 			migration from NFSv2/v3.
 
+	nmi_backtrace.backtrace_idle [KNL]
+			Dump stacks even of idle CPUs in response to an
+			NMI stack-backtrace request.
+
 	nmi_debug=	[KNL,SH] Specify one or more actions to take
 			when a NMI is triggered.
 			Format: [state][,regs][,debounce][,die]
diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c
index 15ca78e1c7d4..8abe1870dba4 100644
--- a/lib/nmi_backtrace.c
+++ b/lib/nmi_backtrace.c
@@ -85,12 +85,16 @@ void nmi_trigger_cpumask_backtrace(const cpumask_t *mask,
 	put_cpu();
 }
 
+// Dump stacks even for idle CPUs.
+static bool backtrace_idle;
+module_param(backtrace_idle, bool, 0644);
+
 bool nmi_cpu_backtrace(struct pt_regs *regs)
 {
 	int cpu = smp_processor_id();
 
 	if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) {
-		if (regs && cpu_in_idle(instruction_pointer(regs))) {
+		if (!READ_ONCE(backtrace_idle) && regs && cpu_in_idle(instruction_pointer(regs))) {
 			pr_warn("NMI backtrace for cpu %d skipped: idling at %pS\n",
 				cpu, (void *)instruction_pointer(regs));
 		} else {
-- 
cgit v1.2.3


From bec4a2474890a6884eb890c778ea02bccaaae6eb Mon Sep 17 00:00:00 2001
From: Marco Elver <elver@google.com>
Date: Fri, 24 Jul 2020 09:00:05 +0200
Subject: kcsan: Test support for compound instrumentation

Changes kcsan-test module to support checking reports that include
compound instrumentation. Since we should not fail the test if this
support is unavailable, we have to add a config variable that the test
can use to decide what to check for.

Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Signed-off-by: Marco Elver <elver@google.com>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/kcsan/kcsan-test.c | 65 +++++++++++++++++++++++++++++++++++++----------
 lib/Kconfig.kcsan         |  5 ++++
 2 files changed, 56 insertions(+), 14 deletions(-)

(limited to 'lib')

diff --git a/kernel/kcsan/kcsan-test.c b/kernel/kcsan/kcsan-test.c
index 721180cbbab1..ebe7fd245104 100644
--- a/kernel/kcsan/kcsan-test.c
+++ b/kernel/kcsan/kcsan-test.c
@@ -27,6 +27,12 @@
 #include <linux/types.h>
 #include <trace/events/printk.h>
 
+#ifdef CONFIG_CC_HAS_TSAN_COMPOUND_READ_BEFORE_WRITE
+#define __KCSAN_ACCESS_RW(alt) (KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE)
+#else
+#define __KCSAN_ACCESS_RW(alt) (alt)
+#endif
+
 /* Points to current test-case memory access "kernels". */
 static void (*access_kernels[2])(void);
 
@@ -186,20 +192,21 @@ static bool report_matches(const struct expect_report *r)
 
 	/* Access 1 & 2 */
 	for (i = 0; i < 2; ++i) {
+		const int ty = r->access[i].type;
 		const char *const access_type =
-			(r->access[i].type & KCSAN_ACCESS_ASSERT) ?
-				((r->access[i].type & KCSAN_ACCESS_WRITE) ?
-					 "assert no accesses" :
-					 "assert no writes") :
-				((r->access[i].type & KCSAN_ACCESS_WRITE) ?
-					 "write" :
-					 "read");
+			(ty & KCSAN_ACCESS_ASSERT) ?
+				      ((ty & KCSAN_ACCESS_WRITE) ?
+					       "assert no accesses" :
+					       "assert no writes") :
+				      ((ty & KCSAN_ACCESS_WRITE) ?
+					       ((ty & KCSAN_ACCESS_COMPOUND) ?
+							"read-write" :
+							"write") :
+					       "read");
 		const char *const access_type_aux =
-			(r->access[i].type & KCSAN_ACCESS_ATOMIC) ?
-				" (marked)" :
-				((r->access[i].type & KCSAN_ACCESS_SCOPED) ?
-					 " (scoped)" :
-					 "");
+			(ty & KCSAN_ACCESS_ATOMIC) ?
+				      " (marked)" :
+				      ((ty & KCSAN_ACCESS_SCOPED) ? " (scoped)" : "");
 
 		if (i == 1) {
 			/* Access 2 */
@@ -277,6 +284,12 @@ static noinline void test_kernel_write_atomic(void)
 	WRITE_ONCE(test_var, READ_ONCE_NOCHECK(test_sink) + 1);
 }
 
+static noinline void test_kernel_atomic_rmw(void)
+{
+	/* Use builtin, so we can set up the "bad" atomic/non-atomic scenario. */
+	__atomic_fetch_add(&test_var, 1, __ATOMIC_RELAXED);
+}
+
 __no_kcsan
 static noinline void test_kernel_write_uninstrumented(void) { test_var++; }
 
@@ -439,8 +452,8 @@ static void test_concurrent_races(struct kunit *test)
 	const struct expect_report expect = {
 		.access = {
 			/* NULL will match any address. */
-			{ test_kernel_rmw_array, NULL, 0, KCSAN_ACCESS_WRITE },
-			{ test_kernel_rmw_array, NULL, 0, 0 },
+			{ test_kernel_rmw_array, NULL, 0, __KCSAN_ACCESS_RW(KCSAN_ACCESS_WRITE) },
+			{ test_kernel_rmw_array, NULL, 0, __KCSAN_ACCESS_RW(0) },
 		},
 	};
 	static const struct expect_report never = {
@@ -629,6 +642,29 @@ static void test_read_plain_atomic_write(struct kunit *test)
 	KUNIT_EXPECT_TRUE(test, match_expect);
 }
 
+/* Test that atomic RMWs generate correct report. */
+__no_kcsan
+static void test_read_plain_atomic_rmw(struct kunit *test)
+{
+	const struct expect_report expect = {
+		.access = {
+			{ test_kernel_read, &test_var, sizeof(test_var), 0 },
+			{ test_kernel_atomic_rmw, &test_var, sizeof(test_var),
+				KCSAN_ACCESS_COMPOUND | KCSAN_ACCESS_WRITE | KCSAN_ACCESS_ATOMIC },
+		},
+	};
+	bool match_expect = false;
+
+	if (IS_ENABLED(CONFIG_KCSAN_IGNORE_ATOMICS))
+		return;
+
+	begin_test_checks(test_kernel_read, test_kernel_atomic_rmw);
+	do {
+		match_expect = report_matches(&expect);
+	} while (!end_test_checks(match_expect));
+	KUNIT_EXPECT_TRUE(test, match_expect);
+}
+
 /* Zero-sized accesses should never cause data race reports. */
 __no_kcsan
 static void test_zero_size_access(struct kunit *test)
@@ -942,6 +978,7 @@ static struct kunit_case kcsan_test_cases[] = {
 	KCSAN_KUNIT_CASE(test_write_write_struct_part),
 	KCSAN_KUNIT_CASE(test_read_atomic_write_atomic),
 	KCSAN_KUNIT_CASE(test_read_plain_atomic_write),
+	KCSAN_KUNIT_CASE(test_read_plain_atomic_rmw),
 	KCSAN_KUNIT_CASE(test_zero_size_access),
 	KCSAN_KUNIT_CASE(test_data_race),
 	KCSAN_KUNIT_CASE(test_assert_exclusive_writer),
diff --git a/lib/Kconfig.kcsan b/lib/Kconfig.kcsan
index 3d282d51849b..f271ff5fbb5a 100644
--- a/lib/Kconfig.kcsan
+++ b/lib/Kconfig.kcsan
@@ -40,6 +40,11 @@ menuconfig KCSAN
 
 if KCSAN
 
+# Compiler capabilities that should not fail the test if they are unavailable.
+config CC_HAS_TSAN_COMPOUND_READ_BEFORE_WRITE
+	def_bool (CC_IS_CLANG && $(cc-option,-fsanitize=thread -mllvm -tsan-compound-read-before-write=1)) || \
+		 (CC_IS_GCC && $(cc-option,-fsanitize=thread --param tsan-compound-read-before-write=1))
+
 config KCSAN_VERBOSE
 	bool "Show verbose reports with more information about system state"
 	depends on PROVE_LOCKING
-- 
cgit v1.2.3


From e9d338a0b1799c988b678e8ccb66a442272e6aa3 Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Wed, 24 Jun 2020 15:59:59 -0700
Subject: scftorture: Add smp_call_function() torture test

This commit adds an smp_call_function() torture test that repeatedly
invokes this function and complains if things go badly awry.

Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 Documentation/admin-guide/kernel-parameters.txt |  92 +++++++
 kernel/Makefile                                 |   2 +
 kernel/scftorture.c                             | 350 ++++++++++++++++++++++++
 lib/Kconfig.debug                               |  10 +
 4 files changed, 454 insertions(+)
 create mode 100644 kernel/scftorture.c

(limited to 'lib')

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index bdc1f33fd3d1..91a56382ae56 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4637,6 +4637,98 @@
 			Format: integer between 0 and 10
 			Default is 0.
 
+	scftorture.holdoff= [KNL]
+			Number of seconds to hold off before starting
+			test.  Defaults to zero for module insertion and
+			to 10 seconds for built-in smp_call_function()
+			tests.
+
+	scftorture.longwait= [KNL]
+			Request ridiculously long waits randomly selected
+			up to the chosen limit in seconds.  Zero (the
+			default) disables this feature.  Please note
+			that requesting even small non-zero numbers of
+			seconds can result in RCU CPU stall warnings,
+			softlockup complaints, and so on.
+
+	scftorture.nthreads= [KNL]
+			Number of kthreads to spawn to invoke the
+			smp_call_function() family of functions.
+			The default of -1 specifies a number of kthreads
+			equal to the number of CPUs.
+
+	scftorture.onoff_holdoff= [KNL]
+			Number seconds to wait after the start of the
+			test before initiating CPU-hotplug operations.
+
+	scftorture.onoff_interval= [KNL]
+			Number seconds to wait between successive
+			CPU-hotplug operations.  Specifying zero (which
+			is the default) disables CPU-hotplug operations.
+
+	scftorture.shutdown_secs= [KNL]
+			The number of seconds following the start of the
+			test after which to shut down the system.  The
+			default of zero avoids shutting down the system.
+			Non-zero values are useful for automated tests.
+
+	scftorture.stat_interval= [KNL]
+			The number of seconds between outputting the
+			current test statistics to the console.  A value
+			of zero disables statistics output.
+
+	scftorture.stutter_cpus= [KNL]
+			The number of jiffies to wait between each change
+			to the set of CPUs under test.
+
+	scftorture.use_cpus_read_lock= [KNL]
+			Use use_cpus_read_lock() instead of the default
+			preempt_disable() to disable CPU hotplug
+			while invoking one of the smp_call_function*()
+			functions.
+
+	scftorture.verbose= [KNL]
+			Enable additional printk() statements.
+
+	scftorture.weight_single= [KNL]
+			The probability weighting to use for the
+			smp_call_function_single() function with a zero
+			"wait" parameter.  A value of -1 selects the
+			default if all other weights are -1.  However,
+			if at least one weight has some other value, a
+			value of -1 will instead select a weight of zero.
+
+	scftorture.weight_single_wait= [KNL]
+			The probability weighting to use for the
+			smp_call_function_single() function with a
+			non-zero "wait" parameter.  See weight_single.
+
+	scftorture.weight_many= [KNL]
+			The probability weighting to use for the
+			smp_call_function_many() function with a zero
+			"wait" parameter.  See weight_single.
+			Note well that setting a high probability for
+			this weighting can place serious IPI load
+			on the system.
+
+	scftorture.weight_many_wait= [KNL]
+			The probability weighting to use for the
+			smp_call_function_many() function with a
+			non-zero "wait" parameter.  See weight_single
+			and weight_many.
+
+	scftorture.weight_all= [KNL]
+			The probability weighting to use for the
+			smp_call_function_all() function with a zero
+			"wait" parameter.  See weight_single and
+			weight_many.
+
+	scftorture.weight_all_wait= [KNL]
+			The probability weighting to use for the
+			smp_call_function_all() function with a
+			non-zero "wait" parameter.  See weight_single
+			and weight_many.
+
 	skew_tick=	[KNL] Offset the periodic timer tick per cpu to mitigate
 			xtime_lock contention on larger systems, and/or RCU lock
 			contention on all systems with CONFIG_MAXSMP set.
diff --git a/kernel/Makefile b/kernel/Makefile
index 9a20016d4900..c45f551deaaa 100644
--- a/kernel/Makefile
+++ b/kernel/Makefile
@@ -133,6 +133,8 @@ KASAN_SANITIZE_stackleak.o := n
 KCSAN_SANITIZE_stackleak.o := n
 KCOV_INSTRUMENT_stackleak.o := n
 
+obj-$(CONFIG_SCF_TORTURE_TEST) += scftorture.o
+
 $(obj)/configs.o: $(obj)/config_data.gz
 
 targets += config_data.gz
diff --git a/kernel/scftorture.c b/kernel/scftorture.c
new file mode 100644
index 000000000000..44f1e49ba6e9
--- /dev/null
+++ b/kernel/scftorture.c
@@ -0,0 +1,350 @@
+// SPDX-License-Identifier: GPL-2.0+
+//
+// Torture test for smp_call_function() and friends.
+//
+// Copyright (C) Facebook, 2020.
+//
+// Author: Paul E. McKenney <paulmck@kernel.org>
+
+#define pr_fmt(fmt) fmt
+
+#include <linux/atomic.h>
+#include <linux/bitops.h>
+#include <linux/completion.h>
+#include <linux/cpu.h>
+#include <linux/delay.h>
+#include <linux/err.h>
+#include <linux/init.h>
+#include <linux/interrupt.h>
+#include <linux/kthread.h>
+#include <linux/kernel.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+#include <linux/moduleparam.h>
+#include <linux/notifier.h>
+#include <linux/percpu.h>
+#include <linux/rcupdate.h>
+#include <linux/rcupdate_trace.h>
+#include <linux/reboot.h>
+#include <linux/sched.h>
+#include <linux/spinlock.h>
+#include <linux/smp.h>
+#include <linux/stat.h>
+#include <linux/srcu.h>
+#include <linux/slab.h>
+#include <linux/torture.h>
+#include <linux/types.h>
+
+#define SCFTORT_STRING "scftorture"
+#define SCFTORT_FLAG SCFTORT_STRING ": "
+
+#define SCFTORTOUT(s, x...) \
+	pr_alert(SCFTORT_FLAG s, ## x)
+
+#define VERBOSE_SCFTORTOUT(s, x...) \
+	do { if (verbose) pr_alert(SCFTORT_FLAG s, ## x); } while (0)
+
+#define VERBOSE_SCFTORTOUT_ERRSTRING(s, x...) \
+	do { if (verbose) pr_alert(SCFTORT_FLAG "!!! " s, ## x); } while (0)
+
+MODULE_LICENSE("GPL");
+MODULE_AUTHOR("Paul E. McKenney <paulmck@kernel.org>");
+
+// Wait until there are multiple CPUs before starting test.
+torture_param(int, holdoff, IS_BUILTIN(CONFIG_SCF_TORTURE_TEST) ? 10 : 0,
+	      "Holdoff time before test start (s)");
+torture_param(int, longwait, 0, "Include ridiculously long waits? (seconds)");
+torture_param(int, nthreads, -1, "# threads, defaults to -1 for all CPUs.");
+torture_param(int, onoff_holdoff, 0, "Time after boot before CPU hotplugs (s)");
+torture_param(int, onoff_interval, 0, "Time between CPU hotplugs (s), 0=disable");
+torture_param(int, shutdown_secs, 0, "Shutdown time (ms), <= zero to disable.");
+torture_param(int, stat_interval, 60, "Number of seconds between stats printk()s.");
+torture_param(int, stutter_cpus, 5, "Number of jiffies to change CPUs under test, 0=disable");
+torture_param(bool, use_cpus_read_lock, 0, "Use cpus_read_lock() to exclude CPU hotplug.");
+torture_param(int, verbose, 0, "Enable verbose debugging printk()s");
+torture_param(int, weight_single, -1, "Testing weight for single-CPU no-wait operations.");
+torture_param(int, weight_single_wait, -1, "Testing weight for single-CPU operations.");
+torture_param(int, weight_mult, -1, "Testing weight for multi-CPU no-wait operations.");
+torture_param(int, weight_mult_wait, -1, "Testing weight for multi-CPU operations.");
+torture_param(int, weight_all, -1, "Testing weight for all-CPU no-wait operations.");
+torture_param(int, weight_all_wait, -1, "Testing weight for all-CPU operations.");
+
+char *torture_type = "";
+
+#ifdef MODULE
+# define SCFTORT_SHUTDOWN 0
+#else
+# define SCFTORT_SHUTDOWN 1
+#endif
+
+torture_param(bool, shutdown, SCFTORT_SHUTDOWN, "Shutdown at end of torture test.");
+
+struct scf_statistics {
+	struct task_struct *task;
+	int cpu;
+	long long n_single;
+	long long n_single_wait;
+	long long n_multi;
+	long long n_multi_wait;
+	long long n_all;
+	long long n_all_wait;
+};
+
+static struct scf_statistics *scf_stats_p;
+static struct task_struct *scf_torture_stats_task;
+static DEFINE_PER_CPU(long long, scf_invoked_count);
+
+// Use to wait for all threads to start.
+static atomic_t n_started;
+static atomic_t n_errs;
+static bool scfdone;
+
+DEFINE_TORTURE_RANDOM_PERCPU(scf_torture_rand);
+
+// Print torture statistics.  Caller must ensure serialization.
+static void scf_torture_stats_print(void)
+{
+	int cpu;
+	long long invoked_count = 0;
+	bool isdone = READ_ONCE(scfdone);
+
+	for_each_possible_cpu(cpu)
+		invoked_count += data_race(per_cpu(scf_invoked_count, cpu));
+	pr_alert("%s scf_invoked_count %s: %lld ",
+		 SCFTORT_FLAG, isdone ? "VER" : "ver", invoked_count);
+	torture_onoff_stats();
+	pr_cont("\n");
+}
+
+// Periodically prints torture statistics, if periodic statistics printing
+// was specified via the stat_interval module parameter.
+static int
+scf_torture_stats(void *arg)
+{
+	VERBOSE_TOROUT_STRING("scf_torture_stats task started");
+	do {
+		schedule_timeout_interruptible(stat_interval * HZ);
+		scf_torture_stats_print();
+		torture_shutdown_absorb("scf_torture_stats");
+	} while (!torture_must_stop());
+	torture_kthread_stopping("scf_torture_stats");
+	return 0;
+}
+
+// Update statistics and occasionally burn up mass quantities of CPU time,
+// if told to do so via scftorture.longwait.  Otherwise, occasionally burn
+// a little bit.
+static void scf_handler(void *unused)
+{
+	int i;
+	int j;
+	unsigned long r = torture_random(this_cpu_ptr(&scf_torture_rand));
+
+	this_cpu_inc(scf_invoked_count);
+	if (longwait <= 0) {
+		if (!(r & 0xffc0))
+			udelay(r & 0x3f);
+		return;
+	}
+	if (r & 0xfff)
+		return;
+	r = (r >> 12);
+	if (longwait <= 0) {
+		udelay((r & 0xff) + 1);
+		return;
+	}
+	r = r % longwait + 1;
+	for (i = 0; i < r; i++) {
+		for (j = 0; j < 1000; j++) {
+			udelay(1000);
+			cpu_relax();
+		}
+	}
+}
+
+// Randomly do an smp_call_function*() invocation.
+static void scftorture_invoke_one(struct scf_statistics *scfp,struct torture_random_state *trsp)
+{
+	if (use_cpus_read_lock)
+		cpus_read_lock();
+	else
+		preempt_disable();
+	scfp->n_all++;
+	smp_call_function(scf_handler, NULL, 0);
+	if (use_cpus_read_lock)
+		cpus_read_unlock();
+	else
+		preempt_enable();
+	if (!(torture_random(trsp) & 0xfff))
+		schedule_timeout_uninterruptible(1);
+}
+
+// SCF test kthread.  Repeatedly does calls to members of the
+// smp_call_function() family of functions.
+static int scftorture_invoker(void *arg)
+{
+	DEFINE_TORTURE_RANDOM(rand);
+	struct scf_statistics *scfp = (struct scf_statistics *)arg;
+
+	VERBOSE_SCFTORTOUT("scftorture_invoker %d: task started", scfp->cpu);
+	set_cpus_allowed_ptr(current, cpumask_of(scfp->cpu % nr_cpu_ids));
+	set_user_nice(current, MAX_NICE);
+	if (holdoff)
+		schedule_timeout_interruptible(holdoff * HZ);
+
+	VERBOSE_SCFTORTOUT("scftorture_invoker %d: Waiting for all SCF torturers from cpu %d", scfp->cpu, smp_processor_id());
+
+	// Make sure that the CPU is affinitized appropriately during testing.
+	WARN_ON_ONCE(smp_processor_id() != scfp->cpu);
+
+	if (!atomic_dec_return(&n_started))
+		while (atomic_read_acquire(&n_started)) {
+			if (torture_must_stop()) {
+				VERBOSE_SCFTORTOUT("scftorture_invoker %d ended before starting", scfp->cpu);
+				goto end;
+			}
+			schedule_timeout_uninterruptible(1);
+		}
+
+	VERBOSE_SCFTORTOUT("scftorture_invoker %d started", scfp->cpu);
+
+	do {
+		scftorture_invoke_one(scfp, &rand);
+	} while (!torture_must_stop());
+
+	VERBOSE_SCFTORTOUT("scftorture_invoker %d ended", scfp->cpu);
+end:
+	torture_kthread_stopping("scftorture_invoker");
+	return 0;
+}
+
+static void
+scftorture_print_module_parms(const char *tag)
+{
+	pr_alert(SCFTORT_FLAG
+		 "--- %s:  verbose=%d holdoff=%d longwait=%d nthreads=%d onoff_holdoff=%d onoff_interval=%d shutdown_secs=%d stat_interval=%d stutter_cpus=%d use_cpus_read_lock=%d, weight_single=%d, weight_single_wait=%d, weight_mult=%d, weight_mult_wait=%d, weight_all=%d, weight_all_wait=%d\n", tag,
+		 verbose, holdoff, longwait, nthreads, onoff_holdoff, onoff_interval, shutdown, stat_interval, stutter_cpus, use_cpus_read_lock, weight_single, weight_single_wait, weight_mult, weight_mult_wait, weight_all, weight_all_wait);
+}
+
+static void scf_cleanup_handler(void *unused)
+{
+}
+
+static void scf_torture_cleanup(void)
+{
+	int i;
+
+	if (torture_cleanup_begin())
+		return;
+
+	WRITE_ONCE(scfdone, true);
+	if (nthreads)
+		for (i = 0; i < nthreads; i++)
+			torture_stop_kthread("scftorture_invoker", scf_stats_p[i].task);
+	else
+		goto end;
+	kfree(scf_stats_p);
+	scf_stats_p = NULL;
+	smp_call_function(scf_cleanup_handler, NULL, 0);
+	torture_stop_kthread(scf_torture_stats, scf_torture_stats_task);
+	scf_torture_stats_print();  // -After- the stats thread is stopped!
+
+	if (atomic_read(&n_errs))
+		scftorture_print_module_parms("End of test: FAILURE");
+	else if (torture_onoff_failures())
+		scftorture_print_module_parms("End of test: LOCK_HOTPLUG");
+	else
+		scftorture_print_module_parms("End of test: SUCCESS");
+
+end:
+	torture_cleanup_end();
+}
+
+static int __init scf_torture_init(void)
+{
+	long i;
+	int firsterr = 0;
+
+	if (!torture_init_begin(SCFTORT_STRING, verbose))
+		return -EBUSY;
+
+	scftorture_print_module_parms("Start of test");
+
+	if (weight_single == -1 && weight_single_wait == -1 &&
+	    weight_mult == -1 && weight_mult_wait == -1 &&
+	    weight_all == -1 && weight_all_wait == -1) {
+		weight_single = 1;
+		weight_single_wait = 1;
+		weight_mult = 1;
+		weight_mult_wait = 1;
+		weight_all = 1;
+		weight_all_wait = 1;
+	} else {
+		if (weight_single == -1)
+			weight_single = 0;
+		if (weight_single_wait == -1)
+			weight_single_wait = 0;
+		if (weight_mult == -1)
+			weight_mult = 0;
+		if (weight_mult_wait == -1)
+			weight_mult_wait = 0;
+		if (weight_all == -1)
+			weight_all = 0;
+		if (weight_all_wait == -1)
+			weight_all_wait = 0;
+	}
+	if (weight_single == 0 && weight_single_wait == 0 &&
+	    weight_mult == 0 && weight_mult_wait == 0 &&
+	    weight_all == 0 && weight_all_wait == 0) {
+		firsterr = -EINVAL;
+		goto unwind;
+	}
+
+	if (onoff_interval > 0) {
+		firsterr = torture_onoff_init(onoff_holdoff * HZ, onoff_interval, NULL);
+		if (firsterr)
+			goto unwind;
+	}
+	if (shutdown_secs > 0) {
+		firsterr = torture_shutdown_init(shutdown_secs, scf_torture_cleanup);
+		if (firsterr)
+			goto unwind;
+	}
+
+	// Worker tasks invoking smp_call_function().
+	if (nthreads < 0)
+		nthreads = num_online_cpus();
+	scf_stats_p = kcalloc(nthreads, sizeof(scf_stats_p[0]), GFP_KERNEL);
+	if (!scf_stats_p) {
+		VERBOSE_SCFTORTOUT_ERRSTRING("out of memory");
+		firsterr = -ENOMEM;
+		goto unwind;
+	}
+
+	VERBOSE_SCFTORTOUT("Starting %d smp_call_function() threads\n", nthreads);
+
+	atomic_set(&n_started, nthreads);
+	for (i = 0; i < nthreads; i++) {
+		scf_stats_p[i].cpu = i;
+		firsterr = torture_create_kthread(scftorture_invoker, (void *)&scf_stats_p[i],
+						  scf_stats_p[i].task);
+		if (firsterr)
+			goto unwind;
+	}
+	if (stat_interval > 0) {
+		firsterr = torture_create_kthread(scf_torture_stats, NULL, scf_torture_stats_task);
+		if (firsterr)
+			goto unwind;
+	}
+
+	torture_init_end();
+	return 0;
+
+unwind:
+	torture_init_end();
+	scf_torture_cleanup();
+	return firsterr;
+}
+
+module_init(scf_torture_init);
+module_exit(scf_torture_cleanup);
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index e068c3c7189a..0c3a6c752ede 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1367,6 +1367,16 @@ config WW_MUTEX_SELFTEST
 	  Say M if you want these self tests to build as a module.
 	  Say N if you are unsure.
 
+config SCF_TORTURE_TEST
+	tristate "torture tests for smp_call_function*()"
+	depends on DEBUG_KERNEL
+	select TORTURE_TEST
+	help
+	  This option provides a kernel module that runs torture tests
+	  on the smp_call_function() family of primitives.  The kernel
+	  module may be built after the fact on the running kernel to
+	  be tested, if desired.
+
 endmenu # lock debugging
 
 config TRACE_IRQFLAGS
-- 
cgit v1.2.3


From 4718a471f1a7fc5cc943377c09300bb35138daf9 Mon Sep 17 00:00:00 2001
From: Miaohe Lin <linmiaohe@huawei.com>
Date: Mon, 24 Aug 2020 23:25:17 -0400
Subject: netlink: remove duplicated nla_need_padding_for_64bit() check

The need for padding 64bit is implicitly checked by nla_align_64bit(), so
remove this explicit one.

Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/nlattr.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

(limited to 'lib')

diff --git a/lib/nlattr.c b/lib/nlattr.c
index 665bdaff02c8..80ff9fe83696 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -834,8 +834,7 @@ EXPORT_SYMBOL(__nla_reserve);
 struct nlattr *__nla_reserve_64bit(struct sk_buff *skb, int attrtype,
 				   int attrlen, int padattr)
 {
-	if (nla_need_padding_for_64bit(skb))
-		nla_align_64bit(skb, padattr);
+	nla_align_64bit(skb, padattr);
 
 	return __nla_reserve(skb, attrtype, attrlen);
 }
-- 
cgit v1.2.3


From e918188611f073063415f40fae568fa4d86d9044 Mon Sep 17 00:00:00 2001
From: Boqun Feng <boqun.feng@gmail.com>
Date: Fri, 7 Aug 2020 15:42:20 +0800
Subject: locking: More accurate annotations for read_lock()

On the archs using QUEUED_RWLOCKS, read_lock() is not always a recursive
read lock, actually it's only recursive if in_interrupt() is true. So
change the annotation accordingly to catch more deadlocks.

Note we used to treat read_lock() as pure recursive read locks in
lib/locking-seftest.c, and this is useful, especially for the lockdep
development selftest, so we keep this via a variable to force switching
lock annotation for read_lock().

Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200807074238.1632519-2-boqun.feng@gmail.com
---
 include/linux/lockdep.h  | 23 ++++++++++++++++++++++-
 kernel/locking/lockdep.c | 14 ++++++++++++++
 lib/locking-selftest.c   | 11 +++++++++++
 3 files changed, 47 insertions(+), 1 deletion(-)

(limited to 'lib')

diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h
index 6a584b3e5c74..7cae5ea00d59 100644
--- a/include/linux/lockdep.h
+++ b/include/linux/lockdep.h
@@ -469,6 +469,20 @@ static inline void print_irqtrace_events(struct task_struct *curr)
 }
 #endif
 
+/* Variable used to make lockdep treat read_lock() as recursive in selftests */
+#ifdef CONFIG_DEBUG_LOCKING_API_SELFTESTS
+extern unsigned int force_read_lock_recursive;
+#else /* CONFIG_DEBUG_LOCKING_API_SELFTESTS */
+#define force_read_lock_recursive 0
+#endif /* CONFIG_DEBUG_LOCKING_API_SELFTESTS */
+
+#ifdef CONFIG_LOCKDEP
+extern bool read_lock_is_recursive(void);
+#else /* CONFIG_LOCKDEP */
+/* If !LOCKDEP, the value is meaningless */
+#define read_lock_is_recursive() 0
+#endif
+
 /*
  * For trivial one-depth nesting of a lock-class, the following
  * global define can be used. (Subsystems with multiple levels
@@ -490,7 +504,14 @@ static inline void print_irqtrace_events(struct task_struct *curr)
 #define spin_release(l, i)			lock_release(l, i)
 
 #define rwlock_acquire(l, s, t, i)		lock_acquire_exclusive(l, s, t, NULL, i)
-#define rwlock_acquire_read(l, s, t, i)		lock_acquire_shared_recursive(l, s, t, NULL, i)
+#define rwlock_acquire_read(l, s, t, i)					\
+do {									\
+	if (read_lock_is_recursive())					\
+		lock_acquire_shared_recursive(l, s, t, NULL, i);	\
+	else								\
+		lock_acquire_shared(l, s, t, NULL, i);			\
+} while (0)
+
 #define rwlock_release(l, i)			lock_release(l, i)
 
 #define seqcount_acquire(l, s, t, i)		lock_acquire_exclusive(l, s, t, NULL, i)
diff --git a/kernel/locking/lockdep.c b/kernel/locking/lockdep.c
index 54b74fabf40c..77cd9e6520c4 100644
--- a/kernel/locking/lockdep.c
+++ b/kernel/locking/lockdep.c
@@ -4967,6 +4967,20 @@ static bool lockdep_nmi(void)
 	return true;
 }
 
+/*
+ * read_lock() is recursive if:
+ * 1. We force lockdep think this way in selftests or
+ * 2. The implementation is not queued read/write lock or
+ * 3. The locker is at an in_interrupt() context.
+ */
+bool read_lock_is_recursive(void)
+{
+	return force_read_lock_recursive ||
+	       !IS_ENABLED(CONFIG_QUEUED_RWLOCKS) ||
+	       in_interrupt();
+}
+EXPORT_SYMBOL_GPL(read_lock_is_recursive);
+
 /*
  * We are not always called with irqs disabled - do that here,
  * and also avoid lockdep recursion:
diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
index 14f44f59e733..caadc4dd3368 100644
--- a/lib/locking-selftest.c
+++ b/lib/locking-selftest.c
@@ -28,6 +28,7 @@
  * Change this to 1 if you want to see the failure printouts:
  */
 static unsigned int debug_locks_verbose;
+unsigned int force_read_lock_recursive;
 
 static DEFINE_WD_CLASS(ww_lockdep);
 
@@ -1978,6 +1979,11 @@ void locking_selftest(void)
 		return;
 	}
 
+	/*
+	 * treats read_lock() as recursive read locks for testing purpose
+	 */
+	force_read_lock_recursive = 1;
+
 	/*
 	 * Run the testsuite:
 	 */
@@ -2073,6 +2079,11 @@ void locking_selftest(void)
 
 	ww_tests();
 
+	force_read_lock_recursive = 0;
+	/*
+	 * queued_read_lock() specific test cases can be put here
+	 */
+
 	if (unexpected_testcase_failures) {
 		printk("-----------------------------------------------------------------\n");
 		debug_locks = 0;
-- 
cgit v1.2.3


From d4f200e579e96051f1f081f795820787826eb234 Mon Sep 17 00:00:00 2001
From: Boqun Feng <boqun.feng@gmail.com>
Date: Fri, 7 Aug 2020 15:42:32 +0800
Subject: lockdep/selftest: Add a R-L/L-W test case specific to chain cache
 behavior

As our chain cache doesn't differ read/write locks, so even we can
detect a read-lock/lock-write deadlock in check_noncircular(), we can
still be fooled if a read-lock/lock-read case(which is not a deadlock)
comes first.

So introduce this test case to test specific to the chain cache behavior
on detecting recursive read lock related deadlocks.

Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200807074238.1632519-14-boqun.feng@gmail.com
---
 lib/locking-selftest.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 47 insertions(+)

(limited to 'lib')

diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
index caadc4dd3368..002d1ec09852 100644
--- a/lib/locking-selftest.c
+++ b/lib/locking-selftest.c
@@ -396,6 +396,49 @@ static void rwsem_ABBA1(void)
 	MU(Y1); // should fail
 }
 
+/*
+ * read_lock(A)
+ * spin_lock(B)
+ *		spin_lock(B)
+ *		write_lock(A)
+ *
+ * This test case is aimed at poking whether the chain cache prevents us from
+ * detecting a read-lock/lock-write deadlock: if the chain cache doesn't differ
+ * read/write locks, the following case may happen
+ *
+ * 	{ read_lock(A)->lock(B) dependency exists }
+ *
+ * 	P0:
+ * 	lock(B);
+ * 	read_lock(A);
+ *
+ *	{ Not a deadlock, B -> A is added in the chain cache }
+ *
+ *	P1:
+ *	lock(B);
+ *	write_lock(A);
+ *
+ *	{ B->A found in chain cache, not reported as a deadlock }
+ *
+ */
+static void rlock_chaincache_ABBA1(void)
+{
+	RL(X1);
+	L(Y1);
+	U(Y1);
+	RU(X1);
+
+	L(Y1);
+	RL(X1);
+	RU(X1);
+	U(Y1);
+
+	L(Y1);
+	WL(X1);
+	WU(X1);
+	U(Y1); // should fail
+}
+
 /*
  * read_lock(A)
  * spin_lock(B)
@@ -2062,6 +2105,10 @@ void locking_selftest(void)
 	pr_cont("             |");
 	dotest(rwsem_ABBA3, FAILURE, LOCKTYPE_RWSEM);
 
+	print_testname("chain cached mixed R-L/L-W ABBA");
+	pr_cont("             |");
+	dotest(rlock_chaincache_ABBA1, FAILURE, LOCKTYPE_RWLOCK);
+
 	printk("  --------------------------------------------------------------------------\n");
 
 	/*
-- 
cgit v1.2.3


From 31e0d747708272356bee9b6a1b90c1e6525b0f6d Mon Sep 17 00:00:00 2001
From: Boqun Feng <boqun.feng@gmail.com>
Date: Fri, 7 Aug 2020 15:42:34 +0800
Subject: lockdep/selftest: Unleash irq_read_recursion2 and add more

Now since we can handle recursive read related irq inversion deadlocks
correctly, uncomment the irq_read_recursion2 and add more testcases.

Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200807074238.1632519-16-boqun.feng@gmail.com
---
 lib/locking-selftest.c | 59 ++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 47 insertions(+), 12 deletions(-)

(limited to 'lib')

diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
index 002d1ec09852..f65a658cc9e3 100644
--- a/lib/locking-selftest.c
+++ b/lib/locking-selftest.c
@@ -1053,20 +1053,28 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_soft_wlock)
 #define E3()				\
 					\
 	IRQ_ENTER();			\
-	RL(A);				\
+	LOCK(A);			\
 	L(B);				\
 	U(B);				\
-	RU(A);				\
+	UNLOCK(A);			\
 	IRQ_EXIT();
 
 /*
- * Generate 12 testcases:
+ * Generate 24 testcases:
  */
 #include "locking-selftest-hardirq.h"
-GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_hard)
+#include "locking-selftest-rlock.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_hard_rlock)
+
+#include "locking-selftest-wlock.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_hard_wlock)
 
 #include "locking-selftest-softirq.h"
-GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft)
+#include "locking-selftest-rlock.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft_rlock)
+
+#include "locking-selftest-wlock.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft_wlock)
 
 #undef E1
 #undef E2
@@ -1080,8 +1088,8 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft)
 					\
 	IRQ_DISABLE();			\
 	L(B);				\
-	WL(A);				\
-	WU(A);				\
+	LOCK(A);			\
+	UNLOCK(A);			\
 	U(B);				\
 	IRQ_ENABLE();
 
@@ -1098,13 +1106,21 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft)
 	IRQ_EXIT();
 
 /*
- * Generate 12 testcases:
+ * Generate 24 testcases:
  */
 #include "locking-selftest-hardirq.h"
-// GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_hard)
+#include "locking-selftest-rlock.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_hard_rlock)
+
+#include "locking-selftest-wlock.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_hard_wlock)
 
 #include "locking-selftest-softirq.h"
-// GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_soft)
+#include "locking-selftest-rlock.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_soft_rlock)
+
+#include "locking-selftest-wlock.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_soft_wlock)
 
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 # define I_SPINLOCK(x)	lockdep_reset_lock(&lock_##x.dep_map)
@@ -1257,6 +1273,25 @@ static inline void print_testname(const char *testname)
 	dotest(name##_rlock_##nr, SUCCESS, LOCKTYPE_RWLOCK);	\
 	pr_cont("\n");
 
+#define DO_TESTCASE_2RW(desc, name, nr)				\
+	print_testname(desc"/"#nr);				\
+	pr_cont("      |");					\
+	dotest(name##_wlock_##nr, FAILURE, LOCKTYPE_RWLOCK);	\
+	dotest(name##_rlock_##nr, SUCCESS, LOCKTYPE_RWLOCK);	\
+	pr_cont("\n");
+
+#define DO_TESTCASE_2x2RW(desc, name, nr)			\
+	DO_TESTCASE_2RW("hard-"desc, name##_hard, nr)		\
+	DO_TESTCASE_2RW("soft-"desc, name##_soft, nr)		\
+
+#define DO_TESTCASE_6x2x2RW(desc, name)				\
+	DO_TESTCASE_2x2RW(desc, name, 123);			\
+	DO_TESTCASE_2x2RW(desc, name, 132);			\
+	DO_TESTCASE_2x2RW(desc, name, 213);			\
+	DO_TESTCASE_2x2RW(desc, name, 231);			\
+	DO_TESTCASE_2x2RW(desc, name, 312);			\
+	DO_TESTCASE_2x2RW(desc, name, 321);
+
 #define DO_TESTCASE_6(desc, name)				\
 	print_testname(desc);					\
 	dotest(name##_spin, FAILURE, LOCKTYPE_SPIN);		\
@@ -2121,8 +2156,8 @@ void locking_selftest(void)
 	DO_TESTCASE_6x6("safe-A + unsafe-B #2", irqsafe4);
 	DO_TESTCASE_6x6RW("irq lock-inversion", irq_inversion);
 
-	DO_TESTCASE_6x2("irq read-recursion", irq_read_recursion);
-//	DO_TESTCASE_6x2B("irq read-recursion #2", irq_read_recursion2);
+	DO_TESTCASE_6x2x2RW("irq read-recursion", irq_read_recursion);
+	DO_TESTCASE_6x2x2RW("irq read-recursion #2", irq_read_recursion2);
 
 	ww_tests();
 
-- 
cgit v1.2.3


From 8ef7ca75120a39167def40f41daefee013c4b5af Mon Sep 17 00:00:00 2001
From: Boqun Feng <boqun.feng@gmail.com>
Date: Fri, 7 Aug 2020 15:42:35 +0800
Subject: lockdep/selftest: Add more recursive read related test cases

Add those four test cases:

1.	X --(ER)--> Y --(ER)--> Z --(ER)--> X is deadlock.

2.	X --(EN)--> Y --(SR)--> Z --(ER)--> X is deadlock.

3.	X --(EN)--> Y --(SR)--> Z --(SN)--> X is not deadlock.

4.	X --(ER)--> Y --(SR)--> Z --(EN)--> X is not deadlock.

Those self testcases are valuable for the development of supporting
recursive read related deadlock detection.

Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200807074238.1632519-17-boqun.feng@gmail.com
---
 lib/locking-selftest.c | 161 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 161 insertions(+)

(limited to 'lib')

diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
index f65a658cc9e3..76c314ab4f03 100644
--- a/lib/locking-selftest.c
+++ b/lib/locking-selftest.c
@@ -1034,6 +1034,133 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_inversion_soft_wlock)
 #undef E2
 #undef E3
 
+/*
+ * write-read / write-read / write-read deadlock even if read is recursive
+ */
+
+#define E1()				\
+					\
+	WL(X1);				\
+	RL(Y1);				\
+	RU(Y1);				\
+	WU(X1);
+
+#define E2()				\
+					\
+	WL(Y1);				\
+	RL(Z1);				\
+	RU(Z1);				\
+	WU(Y1);
+
+#define E3()				\
+					\
+	WL(Z1);				\
+	RL(X1);				\
+	RU(X1);				\
+	WU(Z1);
+
+#include "locking-selftest-rlock.h"
+GENERATE_PERMUTATIONS_3_EVENTS(W1R2_W2R3_W3R1)
+
+#undef E1
+#undef E2
+#undef E3
+
+/*
+ * write-write / read-read / write-read deadlock even if read is recursive
+ */
+
+#define E1()				\
+					\
+	WL(X1);				\
+	WL(Y1);				\
+	WU(Y1);				\
+	WU(X1);
+
+#define E2()				\
+					\
+	RL(Y1);				\
+	RL(Z1);				\
+	RU(Z1);				\
+	RU(Y1);
+
+#define E3()				\
+					\
+	WL(Z1);				\
+	RL(X1);				\
+	RU(X1);				\
+	WU(Z1);
+
+#include "locking-selftest-rlock.h"
+GENERATE_PERMUTATIONS_3_EVENTS(W1W2_R2R3_W3R1)
+
+#undef E1
+#undef E2
+#undef E3
+
+/*
+ * write-write / read-read / read-write is not deadlock when read is recursive
+ */
+
+#define E1()				\
+					\
+	WL(X1);				\
+	WL(Y1);				\
+	WU(Y1);				\
+	WU(X1);
+
+#define E2()				\
+					\
+	RL(Y1);				\
+	RL(Z1);				\
+	RU(Z1);				\
+	RU(Y1);
+
+#define E3()				\
+					\
+	RL(Z1);				\
+	WL(X1);				\
+	WU(X1);				\
+	RU(Z1);
+
+#include "locking-selftest-rlock.h"
+GENERATE_PERMUTATIONS_3_EVENTS(W1R2_R2R3_W3W1)
+
+#undef E1
+#undef E2
+#undef E3
+
+/*
+ * write-read / read-read / write-write is not deadlock when read is recursive
+ */
+
+#define E1()				\
+					\
+	WL(X1);				\
+	RL(Y1);				\
+	RU(Y1);				\
+	WU(X1);
+
+#define E2()				\
+					\
+	RL(Y1);				\
+	RL(Z1);				\
+	RU(Z1);				\
+	RU(Y1);
+
+#define E3()				\
+					\
+	WL(Z1);				\
+	WL(X1);				\
+	WU(X1);				\
+	WU(Z1);
+
+#include "locking-selftest-rlock.h"
+GENERATE_PERMUTATIONS_3_EVENTS(W1W2_R2R3_R3W1)
+
+#undef E1
+#undef E2
+#undef E3
 /*
  * read-lock / write-lock recursion that is actually safe.
  */
@@ -1259,6 +1386,19 @@ static inline void print_testname(const char *testname)
 	dotest(name##_##nr, FAILURE, LOCKTYPE_RWLOCK);		\
 	pr_cont("\n");
 
+#define DO_TESTCASE_1RR(desc, name, nr)				\
+	print_testname(desc"/"#nr);				\
+	pr_cont("             |");				\
+	dotest(name##_##nr, SUCCESS, LOCKTYPE_RWLOCK);		\
+	pr_cont("\n");
+
+#define DO_TESTCASE_1RRB(desc, name, nr)			\
+	print_testname(desc"/"#nr);				\
+	pr_cont("             |");				\
+	dotest(name##_##nr, FAILURE, LOCKTYPE_RWLOCK);		\
+	pr_cont("\n");
+
+
 #define DO_TESTCASE_3(desc, name, nr)				\
 	print_testname(desc"/"#nr);				\
 	dotest(name##_spin_##nr, FAILURE, LOCKTYPE_SPIN);	\
@@ -1368,6 +1508,22 @@ static inline void print_testname(const char *testname)
 	DO_TESTCASE_2IB(desc, name, 312);			\
 	DO_TESTCASE_2IB(desc, name, 321);
 
+#define DO_TESTCASE_6x1RR(desc, name)				\
+	DO_TESTCASE_1RR(desc, name, 123);			\
+	DO_TESTCASE_1RR(desc, name, 132);			\
+	DO_TESTCASE_1RR(desc, name, 213);			\
+	DO_TESTCASE_1RR(desc, name, 231);			\
+	DO_TESTCASE_1RR(desc, name, 312);			\
+	DO_TESTCASE_1RR(desc, name, 321);
+
+#define DO_TESTCASE_6x1RRB(desc, name)				\
+	DO_TESTCASE_1RRB(desc, name, 123);			\
+	DO_TESTCASE_1RRB(desc, name, 132);			\
+	DO_TESTCASE_1RRB(desc, name, 213);			\
+	DO_TESTCASE_1RRB(desc, name, 231);			\
+	DO_TESTCASE_1RRB(desc, name, 312);			\
+	DO_TESTCASE_1RRB(desc, name, 321);
+
 #define DO_TESTCASE_6x6(desc, name)				\
 	DO_TESTCASE_6I(desc, name, 123);			\
 	DO_TESTCASE_6I(desc, name, 132);			\
@@ -2144,6 +2300,11 @@ void locking_selftest(void)
 	pr_cont("             |");
 	dotest(rlock_chaincache_ABBA1, FAILURE, LOCKTYPE_RWLOCK);
 
+	DO_TESTCASE_6x1RRB("rlock W1R2/W2R3/W3R1", W1R2_W2R3_W3R1);
+	DO_TESTCASE_6x1RRB("rlock W1W2/R2R3/W3R1", W1W2_R2R3_W3R1);
+	DO_TESTCASE_6x1RR("rlock W1W2/R2R3/R3W1", W1W2_R2R3_R3W1);
+	DO_TESTCASE_6x1RR("rlock W1R2/R2R3/W3W1", W1R2_R2R3_W3W1);
+
 	printk("  --------------------------------------------------------------------------\n");
 
 	/*
-- 
cgit v1.2.3


From 108dc42ed3507fe06214d51ab15fca7771df8bbd Mon Sep 17 00:00:00 2001
From: Boqun Feng <boqun.feng@gmail.com>
Date: Fri, 7 Aug 2020 15:42:36 +0800
Subject: Revert "locking/lockdep/selftests: Fix mixed read-write ABBA tests"

This reverts commit d82fed75294229abc9d757f08a4817febae6c4f4.

Since we now could handle mixed read-write deadlock detection well, the
self tests could be detected as expected, no need to use this
work-around.

Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200807074238.1632519-18-boqun.feng@gmail.com
---
 lib/locking-selftest.c | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'lib')

diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
index 76c314ab4f03..4264cf4b60bb 100644
--- a/lib/locking-selftest.c
+++ b/lib/locking-selftest.c
@@ -2273,14 +2273,6 @@ void locking_selftest(void)
 	print_testname("mixed read-lock/lock-write ABBA");
 	pr_cont("             |");
 	dotest(rlock_ABBA1, FAILURE, LOCKTYPE_RWLOCK);
-#ifdef CONFIG_PROVE_LOCKING
-	/*
-	 * Lockdep does indeed fail here, but there's nothing we can do about
-	 * that now.  Don't kill lockdep for it.
-	 */
-	unexpected_testcase_failures--;
-#endif
-
 	pr_cont("             |");
 	dotest(rwsem_ABBA1, FAILURE, LOCKTYPE_RWSEM);
 
-- 
cgit v1.2.3


From ad56450db86413ff911eb527b5a49e04a4345e61 Mon Sep 17 00:00:00 2001
From: Boqun Feng <boqun.feng@gmail.com>
Date: Fri, 7 Aug 2020 15:42:37 +0800
Subject: locking/selftest: Add test cases for queued_read_lock()

Add two self test cases for the following case:

	P0:			P1:			P2:

				<in irq handler>
	spin_lock_irq(&slock)	read_lock(&rwlock)
							write_lock_irq(&rwlock)
	read_lock(&rwlock)	spin_lock(&slock)

, which is a deadlock, as the read_lock() on P0 cannot get the lock
because of the fairness.

	P0:			P1:			P2:

	<in irq handler>
	spin_lock(&slock)	read_lock(&rwlock)
							write_lock(&rwlock)
	read_lock(&rwlock)	spin_lock_irq(&slock)

, which is not a deadlock, as the read_lock() on P0 can get the lock
because it could use the unfair fastpass.

Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200807074238.1632519-19-boqun.feng@gmail.com
---
 lib/locking-selftest.c | 104 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 104 insertions(+)

(limited to 'lib')

diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
index 4264cf4b60bb..17f8f6f37165 100644
--- a/lib/locking-selftest.c
+++ b/lib/locking-selftest.c
@@ -2201,6 +2201,108 @@ static void ww_tests(void)
 	pr_cont("\n");
 }
 
+
+/*
+ * <in hardirq handler>
+ * read_lock(&A);
+ *			<hardirq disable>
+ *			spin_lock(&B);
+ * spin_lock(&B);
+ *			read_lock(&A);
+ *
+ * is a deadlock.
+ */
+static void queued_read_lock_hardirq_RE_Er(void)
+{
+	HARDIRQ_ENTER();
+	read_lock(&rwlock_A);
+	LOCK(B);
+	UNLOCK(B);
+	read_unlock(&rwlock_A);
+	HARDIRQ_EXIT();
+
+	HARDIRQ_DISABLE();
+	LOCK(B);
+	read_lock(&rwlock_A);
+	read_unlock(&rwlock_A);
+	UNLOCK(B);
+	HARDIRQ_ENABLE();
+}
+
+/*
+ * <in hardirq handler>
+ * spin_lock(&B);
+ *			<hardirq disable>
+ *			read_lock(&A);
+ * read_lock(&A);
+ *			spin_lock(&B);
+ *
+ * is not a deadlock.
+ */
+static void queued_read_lock_hardirq_ER_rE(void)
+{
+	HARDIRQ_ENTER();
+	LOCK(B);
+	read_lock(&rwlock_A);
+	read_unlock(&rwlock_A);
+	UNLOCK(B);
+	HARDIRQ_EXIT();
+
+	HARDIRQ_DISABLE();
+	read_lock(&rwlock_A);
+	LOCK(B);
+	UNLOCK(B);
+	read_unlock(&rwlock_A);
+	HARDIRQ_ENABLE();
+}
+
+/*
+ * <hardirq disable>
+ * spin_lock(&B);
+ *			read_lock(&A);
+ *			<in hardirq handler>
+ *			spin_lock(&B);
+ * read_lock(&A);
+ *
+ * is a deadlock. Because the two read_lock()s are both non-recursive readers.
+ */
+static void queued_read_lock_hardirq_inversion(void)
+{
+
+	HARDIRQ_ENTER();
+	LOCK(B);
+	UNLOCK(B);
+	HARDIRQ_EXIT();
+
+	HARDIRQ_DISABLE();
+	LOCK(B);
+	read_lock(&rwlock_A);
+	read_unlock(&rwlock_A);
+	UNLOCK(B);
+	HARDIRQ_ENABLE();
+
+	read_lock(&rwlock_A);
+	read_unlock(&rwlock_A);
+}
+
+static void queued_read_lock_tests(void)
+{
+	printk("  --------------------------------------------------------------------------\n");
+	printk("  | queued read lock tests |\n");
+	printk("  ---------------------------\n");
+	print_testname("hardirq read-lock/lock-read");
+	dotest(queued_read_lock_hardirq_RE_Er, FAILURE, LOCKTYPE_RWLOCK);
+	pr_cont("\n");
+
+	print_testname("hardirq lock-read/read-lock");
+	dotest(queued_read_lock_hardirq_ER_rE, SUCCESS, LOCKTYPE_RWLOCK);
+	pr_cont("\n");
+
+	print_testname("hardirq inversion");
+	dotest(queued_read_lock_hardirq_inversion, FAILURE, LOCKTYPE_RWLOCK);
+	pr_cont("\n");
+}
+
 void locking_selftest(void)
 {
 	/*
@@ -2318,6 +2420,8 @@ void locking_selftest(void)
 	/*
 	 * queued_read_lock() specific test cases can be put here
 	 */
+	if (IS_ENABLED(CONFIG_QUEUED_RWLOCKS))
+		queued_read_lock_tests();
 
 	if (unexpected_testcase_failures) {
 		printk("-----------------------------------------------------------------\n");
-- 
cgit v1.2.3


From 96a16f45aed89cf024606a11679b0609d09552c7 Mon Sep 17 00:00:00 2001
From: Boqun Feng <boqun.feng@gmail.com>
Date: Fri, 7 Aug 2020 15:42:38 +0800
Subject: lockdep/selftest: Introduce recursion3

Add a test case shows that USED_IN_*_READ and ENABLE_*_READ can cause
deadlock too.

Signed-off-by: Boqun Feng <boqun.feng@gmail.com>
Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200807074238.1632519-20-boqun.feng@gmail.com
---
 lib/locking-selftest.c | 55 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 55 insertions(+)

(limited to 'lib')

diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c
index 17f8f6f37165..a899b3f0e2e5 100644
--- a/lib/locking-selftest.c
+++ b/lib/locking-selftest.c
@@ -1249,6 +1249,60 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_soft_rlock)
 #include "locking-selftest-wlock.h"
 GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion2_soft_wlock)
 
+#undef E1
+#undef E2
+#undef E3
+/*
+ * read-lock / write-lock recursion that is unsafe.
+ *
+ * A is a ENABLED_*_READ lock
+ * B is a USED_IN_*_READ lock
+ *
+ * read_lock(A);
+ *			write_lock(B);
+ * <interrupt>
+ * read_lock(B);
+ * 			write_lock(A); // if this one is read_lock(), no deadlock
+ */
+
+#define E1()				\
+					\
+	IRQ_DISABLE();			\
+	WL(B);				\
+	LOCK(A);			\
+	UNLOCK(A);			\
+	WU(B);				\
+	IRQ_ENABLE();
+
+#define E2()				\
+					\
+	RL(A);				\
+	RU(A);				\
+
+#define E3()				\
+					\
+	IRQ_ENTER();			\
+	RL(B);				\
+	RU(B);				\
+	IRQ_EXIT();
+
+/*
+ * Generate 24 testcases:
+ */
+#include "locking-selftest-hardirq.h"
+#include "locking-selftest-rlock.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion3_hard_rlock)
+
+#include "locking-selftest-wlock.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion3_hard_wlock)
+
+#include "locking-selftest-softirq.h"
+#include "locking-selftest-rlock.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion3_soft_rlock)
+
+#include "locking-selftest-wlock.h"
+GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion3_soft_wlock)
+
 #ifdef CONFIG_DEBUG_LOCK_ALLOC
 # define I_SPINLOCK(x)	lockdep_reset_lock(&lock_##x.dep_map)
 # define I_RWLOCK(x)	lockdep_reset_lock(&rwlock_##x.dep_map)
@@ -2413,6 +2467,7 @@ void locking_selftest(void)
 
 	DO_TESTCASE_6x2x2RW("irq read-recursion", irq_read_recursion);
 	DO_TESTCASE_6x2x2RW("irq read-recursion #2", irq_read_recursion2);
+	DO_TESTCASE_6x2x2RW("irq read-recursion #3", irq_read_recursion3);
 
 	ww_tests();
 
-- 
cgit v1.2.3


From 695afd3d7d58ec3044d25c9d2fe384ab8627fd20 Mon Sep 17 00:00:00 2001
From: Sedat Dilek <sedat.dilek@gmail.com>
Date: Sun, 16 Aug 2020 14:32:44 +0200
Subject: kbuild: Simplify DEBUG_INFO Kconfig handling

While playing with [1] I saw that the handling
of CONFIG_DEBUG_INFO can be simplified.

[1] https://patchwork.kernel.org/patch/11716107/

Signed-off-by: Sedat Dilek <sedat.dilek@gmail.com>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
---
 Makefile          |  6 +++++-
 lib/Kconfig.debug | 10 ++++------
 2 files changed, 9 insertions(+), 7 deletions(-)

(limited to 'lib')

diff --git a/Makefile b/Makefile
index f21168154160..4ef6a73e48a7 100644
--- a/Makefile
+++ b/Makefile
@@ -816,13 +816,15 @@ endif
 DEBUG_CFLAGS	:= $(call cc-option, -fno-var-tracking-assignments)
 
 ifdef CONFIG_DEBUG_INFO
+
 ifdef CONFIG_DEBUG_INFO_SPLIT
 DEBUG_CFLAGS	+= -gsplit-dwarf
 else
 DEBUG_CFLAGS	+= -g
 endif
+
 KBUILD_AFLAGS	+= -Wa,-gdwarf-2
-endif
+
 ifdef CONFIG_DEBUG_INFO_DWARF4
 DEBUG_CFLAGS	+= -gdwarf-4
 endif
@@ -838,6 +840,8 @@ KBUILD_AFLAGS	+= -gz=zlib
 KBUILD_LDFLAGS	+= --compress-debug-sections=zlib
 endif
 
+endif # CONFIG_DEBUG_INFO
+
 KBUILD_CFLAGS += $(DEBUG_CFLAGS)
 export DEBUG_CFLAGS
 
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index e068c3c7189a..60f3837f608c 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -212,9 +212,10 @@ config DEBUG_INFO
 
 	  If unsure, say N.
 
+if DEBUG_INFO
+
 config DEBUG_INFO_REDUCED
 	bool "Reduce debugging information"
-	depends on DEBUG_INFO
 	help
 	  If you say Y here gcc is instructed to generate less debugging
 	  information for structure types. This means that tools that
@@ -227,7 +228,6 @@ config DEBUG_INFO_REDUCED
 
 config DEBUG_INFO_COMPRESSED
 	bool "Compressed debugging information"
-	depends on DEBUG_INFO
 	depends on $(cc-option,-gz=zlib)
 	depends on $(ld-option,--compress-debug-sections=zlib)
 	help
@@ -243,7 +243,6 @@ config DEBUG_INFO_COMPRESSED
 
 config DEBUG_INFO_SPLIT
 	bool "Produce split debuginfo in .dwo files"
-	depends on DEBUG_INFO
 	depends on $(cc-option,-gsplit-dwarf)
 	help
 	  Generate debug info into separate .dwo files. This significantly
@@ -259,7 +258,6 @@ config DEBUG_INFO_SPLIT
 
 config DEBUG_INFO_DWARF4
 	bool "Generate dwarf4 debuginfo"
-	depends on DEBUG_INFO
 	depends on $(cc-option,-gdwarf-4)
 	help
 	  Generate dwarf4 debug info. This requires recent versions
@@ -269,7 +267,6 @@ config DEBUG_INFO_DWARF4
 
 config DEBUG_INFO_BTF
 	bool "Generate BTF typeinfo"
-	depends on DEBUG_INFO
 	depends on !DEBUG_INFO_SPLIT && !DEBUG_INFO_REDUCED
 	depends on !GCC_PLUGIN_RANDSTRUCT || COMPILE_TEST
 	help
@@ -279,7 +276,6 @@ config DEBUG_INFO_BTF
 
 config GDB_SCRIPTS
 	bool "Provide GDB scripts for kernel debugging"
-	depends on DEBUG_INFO
 	help
 	  This creates the required links to GDB helper scripts in the
 	  build directory. If you load vmlinux into gdb, the helper
@@ -288,6 +284,8 @@ config GDB_SCRIPTS
 	  instance. See Documentation/dev-tools/gdb-kernel-debugging.rst
 	  for further details.
 
+endif # DEBUG_INFO
+
 config ENABLE_MUST_CHECK
 	bool "Enable __must_check logic"
 	default y
-- 
cgit v1.2.3


From 1c4dd334df3a0627ff57b35612057e2b497e373b Mon Sep 17 00:00:00 2001
From: Paul Cercueil <paul@crapouillou.net>
Date: Tue, 1 Sep 2020 16:26:50 +0200
Subject: lib: decompress_unzstd: Limit output size

The zstd decompression code, as it is right now, will most likely fail
on 32-bit systems, as the default output buffer size causes the buffer's
end address to overflow.

Address this issue by setting a sane default to the default output size,
with a value that won't overflow the buffer's end address.

Signed-off-by: Paul Cercueil <paul@crapouillou.net>
Reviewed-by: Nick Terrell <terrelln@fb.com>
Signed-off-by: Thomas Bogendoerfer <tsbogend@alpha.franken.de>
---
 lib/decompress_unzstd.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/decompress_unzstd.c b/lib/decompress_unzstd.c
index 0ad2c15479ed..790abc472f5b 100644
--- a/lib/decompress_unzstd.c
+++ b/lib/decompress_unzstd.c
@@ -178,8 +178,13 @@ static int INIT __unzstd(unsigned char *in_buf, long in_len,
 	int err;
 	size_t ret;
 
+	/*
+	 * ZSTD decompression code won't be happy if the buffer size is so big
+	 * that its end address overflows. When the size is not provided, make
+	 * it as big as possible without having the end address overflow.
+	 */
 	if (out_len == 0)
-		out_len = LONG_MAX; /* no limit */
+		out_len = UINTPTR_MAX - (uintptr_t)out_buf;
 
 	if (fill == NULL && flush == NULL)
 		/*
-- 
cgit v1.2.3


From 35feb60474bf4f7fa7840e14fc7fd344996b919d Mon Sep 17 00:00:00 2001
From: "Paul E. McKenney" <paulmck@kernel.org>
Date: Tue, 30 Jun 2020 13:22:54 -0700
Subject: kernel/smp: Provide CSD lock timeout diagnostics

This commit causes csd_lock_wait() to emit diagnostics when a CPU
fails to respond quickly enough to one of the smp_call_function()
family of function calls.  These diagnostics are enabled by a new
CSD_LOCK_WAIT_DEBUG Kconfig option that depends on DEBUG_KERNEL.

This commit was inspired by an earlier patch by Josef Bacik.

[ paulmck: Fix for syzbot+0f719294463916a3fc0e@syzkaller.appspotmail.com ]
[ paulmck: Fix KASAN use-after-free issue reported by Qian Cai. ]
[ paulmck: Fix botched nr_cpu_ids comparison per Dan Carpenter. ]
[ paulmck: Apply Peter Zijlstra feedback. ]
Link: https://lore.kernel.org/lkml/00000000000042f21905a991ecea@google.com
Link: https://lore.kernel.org/lkml/0000000000002ef21705a9933cf3@google.com
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Ingo Molnar <mingo@kernel.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Paul E. McKenney <paulmck@kernel.org>
---
 kernel/smp.c      | 132 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
 lib/Kconfig.debug |  11 +++++
 2 files changed, 141 insertions(+), 2 deletions(-)

(limited to 'lib')

diff --git a/kernel/smp.c b/kernel/smp.c
index 865a876f83ce..c5d31885bd30 100644
--- a/kernel/smp.c
+++ b/kernel/smp.c
@@ -20,6 +20,9 @@
 #include <linux/sched.h>
 #include <linux/sched/idle.h>
 #include <linux/hypervisor.h>
+#include <linux/sched/clock.h>
+#include <linux/nmi.h>
+#include <linux/sched/debug.h>
 
 #include "smpboot.h"
 #include "sched/smp.h"
@@ -96,6 +99,103 @@ void __init call_function_init(void)
 	smpcfd_prepare_cpu(smp_processor_id());
 }
 
+#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
+
+static DEFINE_PER_CPU(call_single_data_t *, cur_csd);
+static DEFINE_PER_CPU(smp_call_func_t, cur_csd_func);
+static DEFINE_PER_CPU(void *, cur_csd_info);
+
+#define CSD_LOCK_TIMEOUT (5ULL * NSEC_PER_SEC)
+atomic_t csd_bug_count = ATOMIC_INIT(0);
+
+/* Record current CSD work for current CPU, NULL to erase. */
+static void csd_lock_record(call_single_data_t *csd)
+{
+	if (!csd) {
+		smp_mb(); /* NULL cur_csd after unlock. */
+		__this_cpu_write(cur_csd, NULL);
+		return;
+	}
+	__this_cpu_write(cur_csd_func, csd->func);
+	__this_cpu_write(cur_csd_info, csd->info);
+	smp_wmb(); /* func and info before csd. */
+	__this_cpu_write(cur_csd, csd);
+	smp_mb(); /* Update cur_csd before function call. */
+		  /* Or before unlock, as the case may be. */
+}
+
+static __always_inline int csd_lock_wait_getcpu(call_single_data_t *csd)
+{
+	unsigned int csd_type;
+
+	csd_type = CSD_TYPE(csd);
+	if (csd_type == CSD_TYPE_ASYNC || csd_type == CSD_TYPE_SYNC)
+		return csd->dst; /* Other CSD_TYPE_ values might not have ->dst. */
+	return -1;
+}
+
+/*
+ * Complain if too much time spent waiting.  Note that only
+ * the CSD_TYPE_SYNC/ASYNC types provide the destination CPU,
+ * so waiting on other types gets much less information.
+ */
+static __always_inline bool csd_lock_wait_toolong(call_single_data_t *csd, u64 ts0, u64 *ts1, int *bug_id)
+{
+	int cpu = -1;
+	int cpux;
+	bool firsttime;
+	u64 ts2, ts_delta;
+	call_single_data_t *cpu_cur_csd;
+	unsigned int flags = READ_ONCE(csd->flags);
+
+	if (!(flags & CSD_FLAG_LOCK)) {
+		if (!unlikely(*bug_id))
+			return true;
+		cpu = csd_lock_wait_getcpu(csd);
+		pr_alert("csd: CSD lock (#%d) got unstuck on CPU#%02d, CPU#%02d released the lock.\n",
+			 *bug_id, raw_smp_processor_id(), cpu);
+		return true;
+	}
+
+	ts2 = sched_clock();
+	ts_delta = ts2 - *ts1;
+	if (likely(ts_delta <= CSD_LOCK_TIMEOUT))
+		return false;
+
+	firsttime = !*bug_id;
+	if (firsttime)
+		*bug_id = atomic_inc_return(&csd_bug_count);
+	cpu = csd_lock_wait_getcpu(csd);
+	if (WARN_ONCE(cpu < 0 || cpu >= nr_cpu_ids, "%s: cpu = %d\n", __func__, cpu))
+		cpux = 0;
+	else
+		cpux = cpu;
+	cpu_cur_csd = smp_load_acquire(&per_cpu(cur_csd, cpux)); /* Before func and info. */
+	pr_alert("csd: %s non-responsive CSD lock (#%d) on CPU#%d, waiting %llu ns for CPU#%02d %pS(%ps).\n",
+		 firsttime ? "Detected" : "Continued", *bug_id, raw_smp_processor_id(), ts2 - ts0,
+		 cpu, csd->func, csd->info);
+	if (cpu_cur_csd && csd != cpu_cur_csd) {
+		pr_alert("\tcsd: CSD lock (#%d) handling prior %pS(%ps) request.\n",
+			 *bug_id, READ_ONCE(per_cpu(cur_csd_func, cpux)),
+			 READ_ONCE(per_cpu(cur_csd_info, cpux)));
+	} else {
+		pr_alert("\tcsd: CSD lock (#%d) %s.\n",
+			 *bug_id, !cpu_cur_csd ? "unresponsive" : "handling this request");
+	}
+	if (cpu >= 0) {
+		if (!trigger_single_cpu_backtrace(cpu))
+			dump_cpu_task(cpu);
+		if (!cpu_cur_csd) {
+			pr_alert("csd: Re-sending CSD lock (#%d) IPI from CPU#%02d to CPU#%02d\n", *bug_id, raw_smp_processor_id(), cpu);
+			arch_send_call_function_single_ipi(cpu);
+		}
+	}
+	dump_stack();
+	*ts1 = ts2;
+
+	return false;
+}
+
 /*
  * csd_lock/csd_unlock used to serialize access to per-cpu csd resources
  *
@@ -103,10 +203,30 @@ void __init call_function_init(void)
  * previous function call. For multi-cpu calls its even more interesting
  * as we'll have to ensure no other cpu is observing our csd.
  */
+static __always_inline void csd_lock_wait(call_single_data_t *csd)
+{
+	int bug_id = 0;
+	u64 ts0, ts1;
+
+	ts1 = ts0 = sched_clock();
+	for (;;) {
+		if (csd_lock_wait_toolong(csd, ts0, &ts1, &bug_id))
+			break;
+		cpu_relax();
+	}
+	smp_acquire__after_ctrl_dep();
+}
+
+#else
+static void csd_lock_record(call_single_data_t *csd)
+{
+}
+
 static __always_inline void csd_lock_wait(call_single_data_t *csd)
 {
 	smp_cond_load_acquire(&csd->flags, !(VAL & CSD_FLAG_LOCK));
 }
+#endif
 
 static __always_inline void csd_lock(call_single_data_t *csd)
 {
@@ -166,9 +286,11 @@ static int generic_exec_single(int cpu, call_single_data_t *csd)
 		 * We can unlock early even for the synchronous on-stack case,
 		 * since we're doing this from the same CPU..
 		 */
+		csd_lock_record(csd);
 		csd_unlock(csd);
 		local_irq_save(flags);
 		func(info);
+		csd_lock_record(NULL);
 		local_irq_restore(flags);
 		return 0;
 	}
@@ -268,8 +390,10 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
 				entry = &csd_next->llist;
 			}
 
+			csd_lock_record(csd);
 			func(info);
 			csd_unlock(csd);
+			csd_lock_record(NULL);
 		} else {
 			prev = &csd->llist;
 		}
@@ -296,8 +420,10 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline)
 				smp_call_func_t func = csd->func;
 				void *info = csd->info;
 
+				csd_lock_record(csd);
 				csd_unlock(csd);
 				func(info);
+				csd_lock_record(NULL);
 			} else if (type == CSD_TYPE_IRQ_WORK) {
 				irq_work_single(csd);
 			}
@@ -375,7 +501,8 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info,
 
 	csd->func = func;
 	csd->info = info;
-#ifdef CONFIG_64BIT
+#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
+	csd->src = smp_processor_id();
 	csd->dst = cpu;
 #endif
 
@@ -543,7 +670,8 @@ static void smp_call_function_many_cond(const struct cpumask *mask,
 			csd->flags |= CSD_TYPE_SYNC;
 		csd->func = func;
 		csd->info = info;
-#ifdef CONFIG_64BIT
+#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG
+		csd->src = smp_processor_id();
 		csd->dst = cpu;
 #endif
 		if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu)))
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index e068c3c7189a..86a35fdfe021 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1367,6 +1367,17 @@ config WW_MUTEX_SELFTEST
 	  Say M if you want these self tests to build as a module.
 	  Say N if you are unsure.
 
+config CSD_LOCK_WAIT_DEBUG
+	bool "Debugging for csd_lock_wait(), called from smp_call_function*()"
+	depends on DEBUG_KERNEL
+	depends on 64BIT
+	default n
+	help
+	  This option enables debug prints when CPUs are slow to respond
+	  to the smp_call_function*() IPI wrappers.  These debug prints
+	  include the IPI handler function currently executing (if any)
+	  and relevant stack traces.
+
 endmenu # lock debugging
 
 config TRACE_IRQFLAGS
-- 
cgit v1.2.3


From 28d9fdf04573cfed6a205220fb038dd444568fa3 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Sat, 22 Aug 2020 21:04:43 -0700
Subject: lib: devres: delete duplicated words

Drop the repeated word "the".

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Andrew Morton <akpm@linux-foundation.org>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/20200823040443.25900-1-rdunlap@infradead.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 lib/devres.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/devres.c b/lib/devres.c
index ebb1573d9ae3..8bd3ed450614 100644
--- a/lib/devres.c
+++ b/lib/devres.c
@@ -217,7 +217,7 @@ void __iomem *devm_ioremap_resource_wc(struct device *dev,
  * Please Note: This is not a one-to-one replacement for of_iomap() because the
  * of_iomap() function does not track whether the region is already mapped.  If
  * two drivers try to map the same memory, the of_iomap() function will succeed
- * but the the devm_of_iomap() function will return -EBUSY.
+ * but the devm_of_iomap() function will return -EBUSY.
  *
  */
 void __iomem *devm_of_iomap(struct device *dev, struct device_node *node, int index,
-- 
cgit v1.2.3


From 18efb2f9e897ac65e7a1b2892f4a53e404534eba Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 29 Jul 2020 10:58:29 -0700
Subject: test_firmware: Test platform fw loading on non-EFI systems

On non-EFI systems, it wasn't possible to test the platform firmware
loader because it will have never set "checked_fw" during __init.
Instead, allow the test code to override this check. Additionally split
the declarations into a private header file so it there is greater
enforcement of the symbol visibility.

Fixes: 548193cba2a7 ("test_firmware: add support for firmware_request_platform")
Cc: stable@vger.kernel.org
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Acked-by: Scott Branden <scott.branden@broadcom.com>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20200729175845.1745471-2-keescook@chromium.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/firmware/efi/embedded-firmware.c | 21 ++++++++++++++++-----
 drivers/firmware/efi/embedded-firmware.h | 21 +++++++++++++++++++++
 include/linux/efi_embedded_fw.h          | 13 -------------
 lib/test_firmware.c                      |  5 +++++
 4 files changed, 42 insertions(+), 18 deletions(-)
 create mode 100644 drivers/firmware/efi/embedded-firmware.h

(limited to 'lib')

diff --git a/drivers/firmware/efi/embedded-firmware.c b/drivers/firmware/efi/embedded-firmware.c
index e97a9c9d010c..f1330f55f7f4 100644
--- a/drivers/firmware/efi/embedded-firmware.c
+++ b/drivers/firmware/efi/embedded-firmware.c
@@ -14,11 +14,22 @@
 #include <linux/vmalloc.h>
 #include <crypto/sha.h>
 
+#include "embedded-firmware.h"
+
+#ifdef CONFIG_TEST_FIRMWARE
+# define EFI_EMBEDDED_FW_VISIBILITY
+#else
+# define EFI_EMBEDDED_FW_VISIBILITY static
+#endif
+
+EFI_EMBEDDED_FW_VISIBILITY LIST_HEAD(efi_embedded_fw_list);
+EFI_EMBEDDED_FW_VISIBILITY bool efi_embedded_fw_checked;
+
 /* Exported for use by lib/test_firmware.c only */
-LIST_HEAD(efi_embedded_fw_list);
+#ifdef CONFIG_TEST_FIRMWARE
 EXPORT_SYMBOL_GPL(efi_embedded_fw_list);
-
-static bool checked_for_fw;
+EXPORT_SYMBOL_GPL(efi_embedded_fw_checked);
+#endif
 
 static const struct dmi_system_id * const embedded_fw_table[] = {
 #ifdef CONFIG_TOUCHSCREEN_DMI
@@ -116,14 +127,14 @@ void __init efi_check_for_embedded_firmwares(void)
 		}
 	}
 
-	checked_for_fw = true;
+	efi_embedded_fw_checked = true;
 }
 
 int efi_get_embedded_fw(const char *name, const u8 **data, size_t *size)
 {
 	struct efi_embedded_fw *iter, *fw = NULL;
 
-	if (!checked_for_fw) {
+	if (!efi_embedded_fw_checked) {
 		pr_warn("Warning %s called while we did not check for embedded fw\n",
 			__func__);
 		return -ENOENT;
diff --git a/drivers/firmware/efi/embedded-firmware.h b/drivers/firmware/efi/embedded-firmware.h
new file mode 100644
index 000000000000..bb894eae0906
--- /dev/null
+++ b/drivers/firmware/efi/embedded-firmware.h
@@ -0,0 +1,21 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _EFI_EMBEDDED_FW_INTERNAL_H_
+#define _EFI_EMBEDDED_FW_INTERNAL_H_
+
+/*
+ * This struct and efi_embedded_fw_list are private to the efi-embedded fw
+ * implementation they only in separate header for use by lib/test_firmware.c.
+ */
+struct efi_embedded_fw {
+	struct list_head list;
+	const char *name;
+	const u8 *data;
+	size_t length;
+};
+
+#ifdef CONFIG_TEST_FIRMWARE
+extern struct list_head efi_embedded_fw_list;
+extern bool efi_embedded_fw_checked;
+#endif
+
+#endif /* _EFI_EMBEDDED_FW_INTERNAL_H_ */
diff --git a/include/linux/efi_embedded_fw.h b/include/linux/efi_embedded_fw.h
index 57eac5241303..4ad5db9f5312 100644
--- a/include/linux/efi_embedded_fw.h
+++ b/include/linux/efi_embedded_fw.h
@@ -7,19 +7,6 @@
 
 #define EFI_EMBEDDED_FW_PREFIX_LEN		8
 
-/*
- * This struct and efi_embedded_fw_list are private to the efi-embedded fw
- * implementation they are in this header for use by lib/test_firmware.c only!
- */
-struct efi_embedded_fw {
-	struct list_head list;
-	const char *name;
-	const u8 *data;
-	size_t length;
-};
-
-extern struct list_head efi_embedded_fw_list;
-
 /**
  * struct efi_embedded_fw_desc - This struct is used by the EFI embedded-fw
  *                               code to search for embedded firmwares.
diff --git a/lib/test_firmware.c b/lib/test_firmware.c
index 9fee2b93a8d1..62af792e151c 100644
--- a/lib/test_firmware.c
+++ b/lib/test_firmware.c
@@ -489,6 +489,7 @@ out:
 static DEVICE_ATTR_WO(trigger_request);
 
 #ifdef CONFIG_EFI_EMBEDDED_FIRMWARE
+#include "../drivers/firmware/efi/embedded-firmware.h"
 static ssize_t trigger_request_platform_store(struct device *dev,
 					      struct device_attribute *attr,
 					      const char *buf, size_t count)
@@ -501,6 +502,7 @@ static ssize_t trigger_request_platform_store(struct device *dev,
 	};
 	struct efi_embedded_fw efi_embedded_fw;
 	const struct firmware *firmware = NULL;
+	bool saved_efi_embedded_fw_checked;
 	char *name;
 	int rc;
 
@@ -513,6 +515,8 @@ static ssize_t trigger_request_platform_store(struct device *dev,
 	efi_embedded_fw.data = (void *)test_data;
 	efi_embedded_fw.length = sizeof(test_data);
 	list_add(&efi_embedded_fw.list, &efi_embedded_fw_list);
+	saved_efi_embedded_fw_checked = efi_embedded_fw_checked;
+	efi_embedded_fw_checked = true;
 
 	pr_info("loading '%s'\n", name);
 	rc = firmware_request_platform(&firmware, name, dev);
@@ -530,6 +534,7 @@ static ssize_t trigger_request_platform_store(struct device *dev,
 	rc = count;
 
 out:
+	efi_embedded_fw_checked = saved_efi_embedded_fw_checked;
 	release_firmware(firmware);
 	list_del(&efi_embedded_fw.list);
 	kfree(name);
-- 
cgit v1.2.3


From e2028c8e6bf9a12dfe83fc12ce6d5d9ab1628b0b Mon Sep 17 00:00:00 2001
From: Sven Schneider <s.schneider@arkona-technologies.de>
Date: Thu, 20 Aug 2020 10:21:37 +0200
Subject: lib/fonts: add font 6x8 for OLED display

This font is derived from lib/fonts/font_6x10.c and is useful for small
OLED displays

Signed-off-by: Sven Schneider <s.schneider@arkona-technologies.de>
Signed-off-by: Sascha Hauer <s.hauer@pengutronix.de>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Signed-off-by: Bartlomiej Zolnierkiewicz <b.zolnierkie@samsung.com>
Link: https://patchwork.freedesktop.org/patch/msgid/20200820082137.5907-1-s.hauer@pengutronix.de
---
 include/linux/font.h |    4 +-
 lib/fonts/Kconfig    |    7 +
 lib/fonts/Makefile   |    1 +
 lib/fonts/font_6x8.c | 2576 ++++++++++++++++++++++++++++++++++++++++++++++++++
 lib/fonts/fonts.c    |    3 +
 5 files changed, 2590 insertions(+), 1 deletion(-)
 create mode 100644 lib/fonts/font_6x8.c

(limited to 'lib')

diff --git a/include/linux/font.h b/include/linux/font.h
index 51b91c8b69d5..4a3f8741bb7e 100644
--- a/include/linux/font.h
+++ b/include/linux/font.h
@@ -33,6 +33,7 @@ struct font_desc {
 #define	MINI4x6_IDX	9
 #define FONT6x10_IDX	10
 #define TER16x32_IDX	11
+#define FONT6x8_IDX	12
 
 extern const struct font_desc	font_vga_8x8,
 			font_vga_8x16,
@@ -45,7 +46,8 @@ extern const struct font_desc	font_vga_8x8,
 			font_acorn_8x8,
 			font_mini_4x6,
 			font_6x10,
-			font_ter_16x32;
+			font_ter_16x32,
+			font_6x8;
 
 /* Find a font with a specific name */
 
diff --git a/lib/fonts/Kconfig b/lib/fonts/Kconfig
index 37baa79cdd71..c035fde66aeb 100644
--- a/lib/fonts/Kconfig
+++ b/lib/fonts/Kconfig
@@ -119,6 +119,12 @@ config FONT_TER16x32
 	  This is the high resolution, large version for use with HiDPI screens.
 	  If the standard font is unreadable for you, say Y, otherwise say N.
 
+config FONT_6x8
+	bool "OLED 6x8 font" if FONTS
+	depends on FRAMEBUFFER_CONSOLE
+	help
+	  This font is useful for small displays (OLED).
+
 config FONT_AUTOSELECT
 	def_bool y
 	depends on !FONT_8x8
@@ -132,6 +138,7 @@ config FONT_AUTOSELECT
 	depends on !FONT_SUN12x22
 	depends on !FONT_10x18
 	depends on !FONT_TER16x32
+	depends on !FONT_6x8
 	select FONT_8x16
 
 endif # FONT_SUPPORT
diff --git a/lib/fonts/Makefile b/lib/fonts/Makefile
index ed95070860de..e16f68492174 100644
--- a/lib/fonts/Makefile
+++ b/lib/fonts/Makefile
@@ -15,6 +15,7 @@ font-objs-$(CONFIG_FONT_ACORN_8x8) += font_acorn_8x8.o
 font-objs-$(CONFIG_FONT_MINI_4x6)  += font_mini_4x6.o
 font-objs-$(CONFIG_FONT_6x10)      += font_6x10.o
 font-objs-$(CONFIG_FONT_TER16x32)  += font_ter16x32.o
+font-objs-$(CONFIG_FONT_6x8)       += font_6x8.o
 
 font-objs += $(font-objs-y)
 
diff --git a/lib/fonts/font_6x8.c b/lib/fonts/font_6x8.c
new file mode 100644
index 000000000000..e06447788418
--- /dev/null
+++ b/lib/fonts/font_6x8.c
@@ -0,0 +1,2576 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/font.h>
+
+#define FONTDATAMAX 2048
+
+static const unsigned char fontdata_6x8[FONTDATAMAX] = {
+
+	/* 0 0x00 '^@' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 1 0x01 '^A' */
+	0x78, /* 011110 */
+	0x84, /* 100001 */
+	0xCC, /* 110011 */
+	0x84, /* 100001 */
+	0xCC, /* 110011 */
+	0xB4, /* 101101 */
+	0x78, /* 011110 */
+	0x00, /* 000000 */
+
+	/* 2 0x02 '^B' */
+	0x78, /* 011110 */
+	0xFC, /* 111111 */
+	0xB4, /* 101101 */
+	0xFC, /* 111111 */
+	0xB4, /* 101101 */
+	0xCC, /* 110011 */
+	0x78, /* 011110 */
+	0x00, /* 000000 */
+
+	/* 3 0x03 '^C' */
+	0x00, /* 000000 */
+	0x28, /* 001010 */
+	0x7C, /* 011111 */
+	0x7C, /* 011111 */
+	0x38, /* 001110 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 4 0x04 '^D' */
+	0x00, /* 000000 */
+	0x10, /* 000100 */
+	0x38, /* 001110 */
+	0x7C, /* 011111 */
+	0x38, /* 001110 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 5 0x05 '^E' */
+	0x00, /* 000000 */
+	0x38, /* 001110 */
+	0x38, /* 001110 */
+	0x6C, /* 011011 */
+	0x6C, /* 011011 */
+	0x10, /* 000100 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 6 0x06 '^F' */
+	0x00, /* 000000 */
+	0x10, /* 000100 */
+	0x38, /* 001110 */
+	0x7C, /* 011111 */
+	0x7C, /* 011111 */
+	0x10, /* 000100 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 7 0x07 '^G' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x30, /* 001100 */
+	0x78, /* 011110 */
+	0x30, /* 001100 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 8 0x08 '^H' */
+	0xFC, /* 111111 */
+	0xFC, /* 111111 */
+	0xCC, /* 110011 */
+	0x84, /* 100001 */
+	0xCC, /* 110011 */
+	0xFC, /* 111111 */
+	0xFC, /* 111111 */
+	0xFC, /* 111111 */
+
+	/* 9 0x09 '^I' */
+	0x00, /* 000000 */
+	0x30, /* 001100 */
+	0x48, /* 010010 */
+	0x84, /* 100001 */
+	0x48, /* 010010 */
+	0x30, /* 001100 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 10 0x0A '^J' */
+	0xFC, /* 111111 */
+	0xCC, /* 110011 */
+	0xB4, /* 101101 */
+	0x78, /* 011110 */
+	0xB4, /* 101101 */
+	0xCC, /* 110011 */
+	0xFC, /* 111111 */
+	0xFC, /* 111111 */
+
+	/* 11 0x0B '^K' */
+	0x3C, /* 001111 */
+	0x14, /* 000101 */
+	0x20, /* 001000 */
+	0x78, /* 011110 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 12 0x0C '^L' */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x10, /* 000100 */
+	0x38, /* 001110 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+
+	/* 13 0x0D '^M' */
+	0x18, /* 000110 */
+	0x14, /* 000101 */
+	0x14, /* 000101 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x70, /* 011100 */
+	0x60, /* 011000 */
+	0x00, /* 000000 */
+
+	/* 14 0x0E '^N' */
+	0x3C, /* 001111 */
+	0x24, /* 001001 */
+	0x3C, /* 001111 */
+	0x24, /* 001001 */
+	0x24, /* 001001 */
+	0x6C, /* 011011 */
+	0x6C, /* 011011 */
+	0x00, /* 000000 */
+
+	/* 15 0x0F '^O' */
+	0x10, /* 000100 */
+	0x54, /* 010101 */
+	0x38, /* 001110 */
+	0x6C, /* 011011 */
+	0x38, /* 001110 */
+	0x54, /* 010101 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+
+	/* 16 0x10 '^P' */
+	0x40, /* 010000 */
+	0x60, /* 011000 */
+	0x70, /* 011100 */
+	0x78, /* 011110 */
+	0x70, /* 011100 */
+	0x60, /* 011000 */
+	0x40, /* 010000 */
+	0x00, /* 000000 */
+
+	/* 17 0x11 '^Q' */
+	0x04, /* 000001 */
+	0x0C, /* 000011 */
+	0x1C, /* 000111 */
+	0x3C, /* 001111 */
+	0x1C, /* 000111 */
+	0x0C, /* 000011 */
+	0x04, /* 000001 */
+	0x00, /* 000000 */
+
+	/* 18 0x12 '^R' */
+	0x10, /* 000100 */
+	0x38, /* 001110 */
+	0x54, /* 010101 */
+	0x10, /* 000100 */
+	0x54, /* 010101 */
+	0x38, /* 001110 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+
+	/* 19 0x13 '^S' */
+	0x48, /* 010010 */
+	0x48, /* 010010 */
+	0x48, /* 010010 */
+	0x48, /* 010010 */
+	0x48, /* 010010 */
+	0x00, /* 000000 */
+	0x48, /* 010010 */
+	0x00, /* 000000 */
+
+	/* 20 0x14 '^T' */
+	0x3C, /* 001111 */
+	0x54, /* 010101 */
+	0x54, /* 010101 */
+	0x3C, /* 001111 */
+	0x14, /* 000101 */
+	0x14, /* 000101 */
+	0x14, /* 000101 */
+	0x00, /* 000000 */
+
+	/* 21 0x15 '^U' */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x30, /* 001100 */
+	0x28, /* 001010 */
+	0x14, /* 000101 */
+	0x0C, /* 000011 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+
+	/* 22 0x16 '^V' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0xF8, /* 111110 */
+	0xF8, /* 111110 */
+	0xF8, /* 111110 */
+	0x00, /* 000000 */
+
+	/* 23 0x17 '^W' */
+	0x10, /* 000100 */
+	0x38, /* 001110 */
+	0x54, /* 010101 */
+	0x10, /* 000100 */
+	0x54, /* 010101 */
+	0x38, /* 001110 */
+	0x10, /* 000100 */
+	0x7C, /* 011111 */
+
+	/* 24 0x18 '^X' */
+	0x10, /* 000100 */
+	0x38, /* 001110 */
+	0x54, /* 010101 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+
+	/* 25 0x19 '^Y' */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x54, /* 010101 */
+	0x38, /* 001110 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+
+	/* 26 0x1A '^Z' */
+	0x00, /* 000000 */
+	0x10, /* 000100 */
+	0x08, /* 000010 */
+	0x7C, /* 011111 */
+	0x08, /* 000010 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 27 0x1B '^[' */
+	0x00, /* 000000 */
+	0x10, /* 000100 */
+	0x20, /* 001000 */
+	0x7C, /* 011111 */
+	0x20, /* 001000 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 28 0x1C '^\' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x78, /* 011110 */
+	0x00, /* 000000 */
+
+	/* 29 0x1D '^]' */
+	0x00, /* 000000 */
+	0x48, /* 010010 */
+	0x84, /* 100001 */
+	0xFC, /* 111111 */
+	0x84, /* 100001 */
+	0x48, /* 010010 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 30 0x1E '^^' */
+	0x00, /* 000000 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x38, /* 001110 */
+	0x38, /* 001110 */
+	0x7C, /* 011111 */
+	0x7C, /* 011111 */
+	0x00, /* 000000 */
+
+	/* 31 0x1F '^_' */
+	0x00, /* 000000 */
+	0x7C, /* 011111 */
+	0x7C, /* 011111 */
+	0x38, /* 001110 */
+	0x38, /* 001110 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+
+	/* 32 0x20 ' ' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 33 0x21 '!' */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+
+	/* 34 0x22 '"' */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 35 0x23 '#' */
+	0x00, /* 000000 */
+	0x28, /* 001010 */
+	0x7C, /* 011111 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x7C, /* 011111 */
+	0x28, /* 001010 */
+	0x00, /* 000000 */
+
+	/* 36 0x24 '$' */
+	0x10, /* 000000 */
+	0x38, /* 001000 */
+	0x40, /* 010000 */
+	0x30, /* 001000 */
+	0x08, /* 000000 */
+	0x70, /* 011000 */
+	0x20, /* 001000 */
+	0x00, /* 000000 */
+
+	/* 37 0x25 '%' */
+	0x64, /* 011001 */
+	0x64, /* 011001 */
+	0x08, /* 000010 */
+	0x10, /* 000100 */
+	0x20, /* 001000 */
+	0x4C, /* 010011 */
+	0x4C, /* 010011 */
+	0x00, /* 000000 */
+
+	/* 38 0x26 '&' */
+	0x30, /* 001100 */
+	0x48, /* 010010 */
+	0x50, /* 010100 */
+	0x20, /* 001000 */
+	0x54, /* 010101 */
+	0x48, /* 010010 */
+	0x34, /* 001101 */
+	0x00, /* 000000 */
+
+	/* 39 0x27 ''' */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 40 0x28 '(' */
+	0x08, /* 000010 */
+	0x10, /* 000100 */
+	0x20, /* 001000 */
+	0x20, /* 001000 */
+	0x20, /* 001000 */
+	0x10, /* 000100 */
+	0x08, /* 000010 */
+	0x00, /* 000000 */
+
+	/* 41 0x29 ')' */
+	0x20, /* 001000 */
+	0x10, /* 000100 */
+	0x08, /* 000010 */
+	0x08, /* 000010 */
+	0x08, /* 000010 */
+	0x10, /* 000100 */
+	0x20, /* 001000 */
+	0x00, /* 000000 */
+
+	/* 42 0x2A '*' */
+	0x10, /* 000100 */
+	0x54, /* 010101 */
+	0x38, /* 001110 */
+	0x54, /* 010101 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 43 0x2B '+' */
+	0x00, /* 000000 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x7C, /* 011111 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 44 0x2C ',' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x30, /* 001100 */
+	0x30, /* 001100 */
+	0x20, /* 001000 */
+
+	/* 45 0x2D '-' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x7C, /* 011111 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 46 0x2E '.' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x18, /* 000110 */
+	0x18, /* 000110 */
+	0x00, /* 000000 */
+
+	/* 47 0x2F '/' */
+	0x04, /* 000001 */
+	0x08, /* 000010 */
+	0x08, /* 000010 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x20, /* 001000 */
+	0x20, /* 001000 */
+	0x40, /* 010000 */
+
+	/* 48 0x30 '0' */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x4C, /* 010011 */
+	0x54, /* 010101 */
+	0x64, /* 011001 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 49 0x31 '1' */
+	0x10, /* 000100 */
+	0x30, /* 001100 */
+	0x50, /* 010100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x7C, /* 011111 */
+	0x00, /* 000000 */
+
+	/* 50 0x32 '2' */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x04, /* 000001 */
+	0x08, /* 000010 */
+	0x10, /* 000100 */
+	0x20, /* 001000 */
+	0x7C, /* 011111 */
+	0x00, /* 000000 */
+
+	/* 51 0x33 '3' */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x04, /* 000001 */
+	0x18, /* 000110 */
+	0x04, /* 000001 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 52 0x34 '4' */
+	0x08, /* 000010 */
+	0x18, /* 000110 */
+	0x28, /* 001010 */
+	0x48, /* 010010 */
+	0x7C, /* 011111 */
+	0x08, /* 000010 */
+	0x08, /* 000010 */
+	0x00, /* 000000 */
+
+	/* 53 0x35 '5' */
+	0x7C, /* 011111 */
+	0x40, /* 010000 */
+	0x78, /* 011110 */
+	0x04, /* 000001 */
+	0x04, /* 000001 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 54 0x36 '6' */
+	0x18, /* 000110 */
+	0x20, /* 001000 */
+	0x40, /* 010000 */
+	0x78, /* 011110 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 55 0x37 '7' */
+	0x7C, /* 011111 */
+	0x04, /* 000001 */
+	0x04, /* 000001 */
+	0x08, /* 000010 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+
+	/* 56 0x38 '8' */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 57 0x39 '9' */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x3C, /* 001111 */
+	0x04, /* 000001 */
+	0x08, /* 000010 */
+	0x30, /* 001100 */
+	0x00, /* 000000 */
+
+	/* 58 0x3A ':' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x18, /* 000110 */
+	0x18, /* 000110 */
+	0x00, /* 000000 */
+	0x18, /* 000110 */
+	0x18, /* 000110 */
+	0x00, /* 000000 */
+
+	/* 59 0x3B ';' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x30, /* 001100 */
+	0x30, /* 001100 */
+	0x00, /* 000000 */
+	0x30, /* 001100 */
+	0x30, /* 001100 */
+	0x20, /* 001000 */
+
+	/* 60 0x3C '<' */
+	0x04, /* 000001 */
+	0x08, /* 000010 */
+	0x10, /* 000100 */
+	0x20, /* 001000 */
+	0x10, /* 000100 */
+	0x08, /* 000010 */
+	0x04, /* 000001 */
+	0x00, /* 000000 */
+
+	/* 61 0x3D '=' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x7C, /* 011111 */
+	0x00, /* 000000 */
+	0x7C, /* 011111 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 62 0x3E '>' */
+	0x20, /* 001000 */
+	0x10, /* 000100 */
+	0x08, /* 000010 */
+	0x04, /* 000001 */
+	0x08, /* 000010 */
+	0x10, /* 000100 */
+	0x20, /* 001000 */
+	0x00, /* 000000 */
+
+	/* 63 0x3F '?' */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x04, /* 000001 */
+	0x08, /* 000010 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+
+	/* 64 0x40 '@' */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x5C, /* 010111 */
+	0x54, /* 010101 */
+	0x5C, /* 010111 */
+	0x40, /* 010000 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 65 0x41 'A' */
+	0x10, /* 000100 */
+	0x28, /* 001010 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x7C, /* 011111 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x00, /* 000000 */
+
+	/* 66 0x42 'B' */
+	0x78, /* 011110 */
+	0x24, /* 001001 */
+	0x24, /* 001001 */
+	0x38, /* 001110 */
+	0x24, /* 001001 */
+	0x24, /* 001001 */
+	0x78, /* 011110 */
+	0x00, /* 000000 */
+
+	/* 67 0x43 'C' */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 68 0x44 'D' */
+	0x78, /* 011110 */
+	0x24, /* 001001 */
+	0x24, /* 001001 */
+	0x24, /* 001001 */
+	0x24, /* 001001 */
+	0x24, /* 001001 */
+	0x78, /* 011110 */
+	0x00, /* 000000 */
+
+	/* 69 0x45 'E' */
+	0x7C, /* 011111 */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x78, /* 011110 */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x7C, /* 011111 */
+	0x00, /* 000000 */
+
+	/* 70 0x46 'F' */
+	0x7C, /* 011111 */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x78, /* 011110 */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x00, /* 000000 */
+
+	/* 71 0x47 'G' */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x40, /* 010000 */
+	0x5C, /* 010111 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 72 0x48 'H' */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x7C, /* 011111 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x00, /* 000000 */
+
+	/* 73 0x49 'I' */
+	0x38, /* 001110 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 74 0x4A 'J' */
+	0x1C, /* 000111 */
+	0x08, /* 000010 */
+	0x08, /* 000010 */
+	0x08, /* 000010 */
+	0x48, /* 010010 */
+	0x48, /* 010010 */
+	0x30, /* 001100 */
+	0x00, /* 000000 */
+
+	/* 75 0x4B 'K' */
+	0x44, /* 010001 */
+	0x48, /* 010010 */
+	0x50, /* 010100 */
+	0x60, /* 011000 */
+	0x50, /* 010100 */
+	0x48, /* 010010 */
+	0x44, /* 010001 */
+	0x00, /* 000000 */
+
+	/* 76 0x4C 'L' */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x7C, /* 011111 */
+	0x00, /* 000000 */
+
+	/* 77 0x4D 'M' */
+	0x44, /* 010001 */
+	0x6C, /* 011011 */
+	0x54, /* 010101 */
+	0x54, /* 010101 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x00, /* 000000 */
+
+	/* 78 0x4E 'N' */
+	0x44, /* 010001 */
+	0x64, /* 011001 */
+	0x54, /* 010101 */
+	0x4C, /* 010011 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x00, /* 000000 */
+
+	/* 79 0x4F 'O' */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 80 0x50 'P' */
+	0x78, /* 011110 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x78, /* 011110 */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x00, /* 000000 */
+
+	/* 81 0x51 'Q' */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x54, /* 010101 */
+	0x48, /* 010010 */
+	0x34, /* 001101 */
+	0x00, /* 000000 */
+
+	/* 82 0x52 'R' */
+	0x78, /* 011110 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x78, /* 011110 */
+	0x50, /* 010100 */
+	0x48, /* 010010 */
+	0x44, /* 010001 */
+	0x00, /* 000000 */
+
+	/* 83 0x53 'S' */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x40, /* 010000 */
+	0x38, /* 001110 */
+	0x04, /* 000001 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 84 0x54 'T' */
+	0x7C, /* 011111 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+
+	/* 85 0x55 'U' */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 86 0x56 'V' */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x28, /* 001010 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+
+	/* 87 0x57 'W' */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x54, /* 010101 */
+	0x54, /* 010101 */
+	0x6C, /* 011011 */
+	0x44, /* 010001 */
+	0x00, /* 000000 */
+
+	/* 88 0x58 'X' */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x28, /* 001010 */
+	0x10, /* 000100 */
+	0x28, /* 001010 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x00, /* 000000 */
+
+	/* 89 0x59 'Y' */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x28, /* 001010 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+
+	/* 90 0x5A 'Z' */
+	0x7C, /* 011111 */
+	0x04, /* 000001 */
+	0x08, /* 000010 */
+	0x10, /* 000100 */
+	0x20, /* 001000 */
+	0x40, /* 010000 */
+	0x7C, /* 011111 */
+	0x00, /* 000000 */
+
+	/* 91 0x5B '[' */
+	0x18, /* 000110 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x18, /* 000110 */
+	0x00, /* 000000 */
+
+	/* 92 0x5C '\' */
+	0x40, /* 010000 */
+	0x20, /* 001000 */
+	0x20, /* 001000 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x08, /* 000010 */
+	0x08, /* 000010 */
+	0x04, /* 000001 */
+
+	/* 93 0x5D ']' */
+	0x30, /* 001100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x30, /* 001100 */
+	0x00, /* 000000 */
+
+	/* 94 0x5E '^' */
+	0x10, /* 000100 */
+	0x28, /* 001010 */
+	0x44, /* 010001 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 95 0x5F '_' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x7C, /* 011111 */
+
+	/* 96 0x60 '`' */
+	0x20, /* 001000 */
+	0x10, /* 000100 */
+	0x08, /* 000010 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 97 0x61 'a' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x38, /* 001110 */
+	0x04, /* 000001 */
+	0x3C, /* 001111 */
+	0x44, /* 010001 */
+	0x3C, /* 001111 */
+	0x00, /* 000000 */
+
+	/* 98 0x62 'b' */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x58, /* 010110 */
+	0x64, /* 011001 */
+	0x44, /* 010001 */
+	0x64, /* 011001 */
+	0x58, /* 010110 */
+	0x00, /* 000000 */
+
+	/* 99 0x63 'c' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x40, /* 010000 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 100 0x64 'd' */
+	0x04, /* 000001 */
+	0x04, /* 000001 */
+	0x34, /* 001101 */
+	0x4C, /* 010011 */
+	0x44, /* 010001 */
+	0x4C, /* 010011 */
+	0x34, /* 001101 */
+	0x00, /* 000000 */
+
+	/* 101 0x65 'e' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x7C, /* 011111 */
+	0x40, /* 010000 */
+	0x3C, /* 001111 */
+	0x00, /* 000000 */
+
+	/* 102 0x66 'f' */
+	0x0C, /* 000011 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x38, /* 001110 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+
+	/* 103 0x67 'g' */
+	0x00, /* 000000 */
+	0x34, /* 001101 */
+	0x4C, /* 010011 */
+	0x44, /* 010001 */
+	0x4C, /* 010011 */
+	0x34, /* 001101 */
+	0x04, /* 000001 */
+	0x38, /* 001110 */
+
+	/* 104 0x68 'h' */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x78, /* 011110 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x00, /* 000000 */
+
+	/* 105 0x69 'i' */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+	0x30, /* 001100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 106 0x6A 'j' */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+	0x30, /* 001100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x60, /* 011000 */
+
+	/* 107 0x6B 'k' */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x48, /* 010010 */
+	0x50, /* 010100 */
+	0x70, /* 011100 */
+	0x48, /* 010010 */
+	0x44, /* 010001 */
+	0x00, /* 000000 */
+
+	/* 108 0x6C 'l' */
+	0x30, /* 001100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 109 0x6D 'm' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x68, /* 011010 */
+	0x54, /* 010101 */
+	0x54, /* 010101 */
+	0x54, /* 010101 */
+	0x54, /* 010101 */
+	0x00, /* 000000 */
+
+	/* 110 0x6E 'n' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x58, /* 010110 */
+	0x64, /* 011001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x00, /* 000000 */
+
+	/* 111 0x6F 'o' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 112 0x70 'p' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x78, /* 011110 */
+	0x44, /* 010001 */
+	0x64, /* 011001 */
+	0x58, /* 010110 */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+
+	/* 113 0x71 'q' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x3C, /* 001111 */
+	0x44, /* 010001 */
+	0x4C, /* 010011 */
+	0x34, /* 001101 */
+	0x04, /* 000001 */
+	0x04, /* 000001 */
+
+	/* 114 0x72 'r' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x58, /* 010110 */
+	0x64, /* 011001 */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x00, /* 000000 */
+
+	/* 115 0x73 's' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x3C, /* 001111 */
+	0x40, /* 010000 */
+	0x38, /* 001110 */
+	0x04, /* 000001 */
+	0x78, /* 011110 */
+	0x00, /* 000000 */
+
+	/* 116 0x74 't' */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x38, /* 001110 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x0C, /* 000011 */
+	0x00, /* 000000 */
+
+	/* 117 0x75 'u' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x4C, /* 010011 */
+	0x34, /* 001101 */
+	0x00, /* 000000 */
+
+	/* 118 0x76 'v' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x28, /* 001010 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+
+	/* 119 0x77 'w' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x54, /* 010101 */
+	0x54, /* 010101 */
+	0x54, /* 010101 */
+	0x54, /* 010101 */
+	0x28, /* 001010 */
+	0x00, /* 000000 */
+
+	/* 120 0x78 'x' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x44, /* 010001 */
+	0x28, /* 001010 */
+	0x10, /* 000100 */
+	0x28, /* 001010 */
+	0x44, /* 010001 */
+	0x00, /* 000000 */
+
+	/* 121 0x79 'y' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x3C, /* 001111 */
+	0x04, /* 000001 */
+	0x38, /* 001110 */
+
+	/* 122 0x7A 'z' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x7C, /* 011111 */
+	0x08, /* 000010 */
+	0x10, /* 000100 */
+	0x20, /* 001000 */
+	0x7C, /* 011111 */
+	0x00, /* 000000 */
+
+	/* 123 0x7B '{' */
+	0x08, /* 000010 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x20, /* 001000 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x08, /* 000010 */
+	0x00, /* 000000 */
+
+	/* 124 0x7C '|' */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+
+	/* 125 0x7D '}' */
+	0x20, /* 001000 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x08, /* 000010 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x20, /* 001000 */
+	0x00, /* 000000 */
+
+	/* 126 0x7E '~' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x20, /* 001000 */
+	0x54, /* 010101 */
+	0x08, /* 000010 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 127 0x7F '' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x10, /* 000100 */
+	0x28, /* 001010 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x7C, /* 011111 */
+	0x00, /* 000000 */
+
+	/* 128 0x80 '\200' */
+	0x00, /* 000000 */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x40, /* 010000 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x10, /* 000100 */
+	0x20, /* 001000 */
+
+	/* 129 0x81 '\201' */
+	0x00, /* 000000 */
+	0x28, /* 001010 */
+	0x00, /* 000000 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x4C, /* 010011 */
+	0x34, /* 001101 */
+	0x00, /* 000000 */
+
+	/* 130 0x82 '\202' */
+	0x18, /* 000110 */
+	0x00, /* 000000 */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x7C, /* 011111 */
+	0x40, /* 010000 */
+	0x3C, /* 001111 */
+	0x00, /* 000000 */
+
+	/* 131 0x83 '\203' */
+	0x18, /* 000110 */
+	0x00, /* 000000 */
+	0x38, /* 001110 */
+	0x04, /* 000001 */
+	0x3C, /* 001111 */
+	0x44, /* 010001 */
+	0x3C, /* 001111 */
+	0x00, /* 000000 */
+
+	/* 132 0x84 '\204' */
+	0x28, /* 001010 */
+	0x00, /* 000000 */
+	0x38, /* 001110 */
+	0x04, /* 000001 */
+	0x3C, /* 001111 */
+	0x44, /* 010001 */
+	0x3C, /* 001111 */
+	0x00, /* 000000 */
+
+	/* 133 0x85 '\205' */
+	0x18, /* 000110 */
+	0x00, /* 000000 */
+	0x38, /* 001110 */
+	0x04, /* 000001 */
+	0x3C, /* 001111 */
+	0x44, /* 010001 */
+	0x3C, /* 001111 */
+	0x00, /* 000000 */
+
+	/* 134 0x86 '\206' */
+	0x3C, /* 001111 */
+	0x18, /* 000110 */
+	0x38, /* 001110 */
+	0x04, /* 000001 */
+	0x3C, /* 001111 */
+	0x44, /* 010001 */
+	0x3C, /* 001111 */
+	0x00, /* 000000 */
+
+	/* 135 0x87 '\207' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x40, /* 010000 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x10, /* 000100 */
+
+	/* 136 0x88 '\210' */
+	0x18, /* 000110 */
+	0x00, /* 000000 */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x7C, /* 011111 */
+	0x40, /* 010000 */
+	0x3C, /* 001111 */
+	0x00, /* 000000 */
+
+	/* 137 0x89 '\211' */
+	0x28, /* 001010 */
+	0x00, /* 000000 */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x7C, /* 011111 */
+	0x40, /* 010000 */
+	0x3C, /* 001111 */
+	0x00, /* 000000 */
+
+	/* 138 0x8A '\212' */
+	0x18, /* 000110 */
+	0x00, /* 000000 */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x7C, /* 011111 */
+	0x40, /* 010000 */
+	0x3C, /* 001111 */
+	0x00, /* 000000 */
+
+	/* 139 0x8B '\213' */
+	0x28, /* 001010 */
+	0x00, /* 000000 */
+	0x30, /* 001100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 140 0x8C '\214' */
+	0x18, /* 000110 */
+	0x00, /* 000000 */
+	0x30, /* 001100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 141 0x8D '\215' */
+	0x18, /* 000110 */
+	0x00, /* 000000 */
+	0x30, /* 001100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 142 0x8E '\216' */
+	0x44, /* 010001 */
+	0x10, /* 000100 */
+	0x28, /* 001010 */
+	0x44, /* 010001 */
+	0x7C, /* 011111 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x00, /* 000000 */
+
+	/* 143 0x8F '\217' */
+	0x30, /* 001100 */
+	0x48, /* 010010 */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x7C, /* 011111 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x00, /* 000000 */
+
+	/* 144 0x90 '\220' */
+	0x10, /* 000100 */
+	0x7C, /* 011111 */
+	0x40, /* 010000 */
+	0x78, /* 011110 */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x7C, /* 011111 */
+	0x00, /* 000000 */
+
+	/* 145 0x91 '\221' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x78, /* 011110 */
+	0x14, /* 000101 */
+	0x7C, /* 011111 */
+	0x50, /* 010100 */
+	0x3C, /* 001111 */
+	0x00, /* 000000 */
+
+	/* 146 0x92 '\222' */
+	0x3C, /* 001111 */
+	0x50, /* 010100 */
+	0x50, /* 010100 */
+	0x78, /* 011110 */
+	0x50, /* 010100 */
+	0x50, /* 010100 */
+	0x5C, /* 010111 */
+	0x00, /* 000000 */
+
+	/* 147 0x93 '\223' */
+	0x18, /* 000110 */
+	0x00, /* 000000 */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 148 0x94 '\224' */
+	0x28, /* 001010 */
+	0x00, /* 000000 */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 149 0x95 '\225' */
+	0x18, /* 000110 */
+	0x00, /* 000000 */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 150 0x96 '\226' */
+	0x10, /* 000100 */
+	0x28, /* 001010 */
+	0x00, /* 000000 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x4C, /* 010011 */
+	0x34, /* 001101 */
+	0x00, /* 000000 */
+
+	/* 151 0x97 '\227' */
+	0x20, /* 001000 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x4C, /* 010011 */
+	0x34, /* 001101 */
+	0x00, /* 000000 */
+
+	/* 152 0x98 '\230' */
+	0x28, /* 001010 */
+	0x00, /* 000000 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x3C, /* 001111 */
+	0x04, /* 000001 */
+	0x38, /* 001110 */
+
+	/* 153 0x99 '\231' */
+	0x84, /* 100001 */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 154 0x9A '\232' */
+	0x88, /* 100010 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 155 0x9B '\233' */
+	0x10, /* 000100 */
+	0x38, /* 001110 */
+	0x54, /* 010101 */
+	0x50, /* 010100 */
+	0x54, /* 010101 */
+	0x38, /* 001110 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+
+	/* 156 0x9C '\234' */
+	0x30, /* 001100 */
+	0x48, /* 010010 */
+	0x40, /* 010000 */
+	0x70, /* 011100 */
+	0x40, /* 010000 */
+	0x44, /* 010001 */
+	0x78, /* 011110 */
+	0x00, /* 000000 */
+
+	/* 157 0x9D '\235' */
+	0x44, /* 010001 */
+	0x28, /* 001010 */
+	0x7C, /* 011111 */
+	0x10, /* 000100 */
+	0x7C, /* 011111 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+
+	/* 158 0x9E '\236' */
+	0x70, /* 011100 */
+	0x48, /* 010010 */
+	0x70, /* 011100 */
+	0x48, /* 010010 */
+	0x5C, /* 010111 */
+	0x48, /* 010010 */
+	0x44, /* 010001 */
+	0x00, /* 000000 */
+
+	/* 159 0x9F '\237' */
+	0x0C, /* 000011 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x38, /* 001110 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x60, /* 011000 */
+	0x00, /* 000000 */
+
+	/* 160 0xA0 '\240' */
+	0x18, /* 000110 */
+	0x00, /* 000000 */
+	0x38, /* 001110 */
+	0x04, /* 000001 */
+	0x3C, /* 001111 */
+	0x44, /* 010001 */
+	0x3C, /* 001111 */
+	0x00, /* 000000 */
+
+	/* 161 0xA1 '\241' */
+	0x08, /* 000010 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+	0x30, /* 001100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 162 0xA2 '\242' */
+	0x08, /* 000010 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 163 0xA3 '\243' */
+	0x08, /* 000010 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x4C, /* 010011 */
+	0x34, /* 001101 */
+	0x00, /* 000000 */
+
+	/* 164 0xA4 '\244' */
+	0x34, /* 001101 */
+	0x58, /* 010110 */
+	0x00, /* 000000 */
+	0x58, /* 010110 */
+	0x64, /* 011001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x00, /* 000000 */
+
+	/* 165 0xA5 '\245' */
+	0x58, /* 010110 */
+	0x44, /* 010001 */
+	0x64, /* 011001 */
+	0x54, /* 010101 */
+	0x4C, /* 010011 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x00, /* 000000 */
+
+	/* 166 0xA6 '\246' */
+	0x38, /* 001110 */
+	0x04, /* 000001 */
+	0x3C, /* 001111 */
+	0x44, /* 010001 */
+	0x3C, /* 001111 */
+	0x00, /* 000000 */
+	0x7C, /* 011111 */
+	0x00, /* 000000 */
+
+	/* 167 0xA7 '\247' */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+	0x7C, /* 011111 */
+	0x00, /* 000000 */
+
+	/* 168 0xA8 '\250' */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+	0x10, /* 000100 */
+	0x20, /* 001000 */
+	0x40, /* 010000 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 169 0xA9 '\251' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x7C, /* 011111 */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 170 0xAA '\252' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x7C, /* 011111 */
+	0x04, /* 000001 */
+	0x04, /* 000001 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 171 0xAB '\253' */
+	0x20, /* 001000 */
+	0x24, /* 001001 */
+	0x28, /* 001010 */
+	0x10, /* 000100 */
+	0x28, /* 001010 */
+	0x44, /* 010001 */
+	0x08, /* 000010 */
+	0x1C, /* 000111 */
+
+	/* 172 0xAC '\254' */
+	0x20, /* 001000 */
+	0x24, /* 001001 */
+	0x28, /* 001010 */
+	0x10, /* 000100 */
+	0x28, /* 001010 */
+	0x58, /* 010110 */
+	0x3C, /* 001111 */
+	0x08, /* 000010 */
+
+	/* 173 0xAD '\255' */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+
+	/* 174 0xAE '\256' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x24, /* 001001 */
+	0x48, /* 010010 */
+	0x90, /* 100100 */
+	0x48, /* 010010 */
+	0x24, /* 001001 */
+	0x00, /* 000000 */
+
+	/* 175 0xAF '\257' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x90, /* 100100 */
+	0x48, /* 010010 */
+	0x24, /* 001001 */
+	0x48, /* 010010 */
+	0x90, /* 100100 */
+	0x00, /* 000000 */
+
+	/* 176 0xB0 '\260' */
+	0x10, /* 000100 */
+	0x44, /* 010001 */
+	0x10, /* 000100 */
+	0x44, /* 010001 */
+	0x10, /* 000100 */
+	0x44, /* 010001 */
+	0x10, /* 000100 */
+	0x44, /* 010001 */
+
+	/* 177 0xB1 '\261' */
+	0xA8, /* 101010 */
+	0x54, /* 010101 */
+	0xA8, /* 101010 */
+	0x54, /* 010101 */
+	0xA8, /* 101010 */
+	0x54, /* 010101 */
+	0xA8, /* 101010 */
+	0x54, /* 010101 */
+
+	/* 178 0xB2 '\262' */
+	0xDC, /* 110111 */
+	0x74, /* 011101 */
+	0xDC, /* 110111 */
+	0x74, /* 011101 */
+	0xDC, /* 110111 */
+	0x74, /* 011101 */
+	0xDC, /* 110111 */
+	0x74, /* 011101 */
+
+	/* 179 0xB3 '\263' */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+
+	/* 180 0xB4 '\264' */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0xF0, /* 111100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+
+	/* 181 0xB5 '\265' */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0xF0, /* 111100 */
+	0x10, /* 000100 */
+	0xF0, /* 111100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+
+	/* 182 0xB6 '\266' */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0xE8, /* 111010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+
+	/* 183 0xB7 '\267' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0xF8, /* 111110 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+
+	/* 184 0xB8 '\270' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0xF0, /* 111100 */
+	0x10, /* 000100 */
+	0xF0, /* 111100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+
+	/* 185 0xB9 '\271' */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0xE8, /* 111010 */
+	0x08, /* 000010 */
+	0xE8, /* 111010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+
+	/* 186 0xBA '\272' */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+
+	/* 187 0xBB '\273' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0xF8, /* 111110 */
+	0x08, /* 000010 */
+	0xE8, /* 111010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+
+	/* 188 0xBC '\274' */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0xE8, /* 111010 */
+	0x08, /* 000010 */
+	0xF8, /* 111110 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 189 0xBD '\275' */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0xF8, /* 111110 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 190 0xBE '\276' */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0xF0, /* 111100 */
+	0x10, /* 000100 */
+	0xF0, /* 111100 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 191 0xBF '\277' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0xF0, /* 111100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+
+	/* 192 0xC0 '\300' */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x1C, /* 000111 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 193 0xC1 '\301' */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0xFC, /* 111111 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 194 0xC2 '\302' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0xFC, /* 111111 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+
+	/* 195 0xC3 '\303' */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x1C, /* 000111 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+
+	/* 196 0xC4 '\304' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0xFC, /* 111111 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 197 0xC5 '\305' */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0xFC, /* 111111 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+
+	/* 198 0xC6 '\306' */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x1C, /* 000111 */
+	0x10, /* 000100 */
+	0x1C, /* 000111 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+
+	/* 199 0xC7 '\307' */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x2C, /* 001011 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+
+	/* 200 0xC8 '\310' */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x2C, /* 001011 */
+	0x20, /* 001000 */
+	0x3C, /* 001111 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 201 0xC9 '\311' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x3C, /* 001111 */
+	0x20, /* 001000 */
+	0x2C, /* 001011 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+
+	/* 202 0xCA '\312' */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0xEC, /* 111011 */
+	0x00, /* 000000 */
+	0xFC, /* 111111 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 203 0xCB '\313' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0xFC, /* 111111 */
+	0x00, /* 000000 */
+	0xEC, /* 111011 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+
+	/* 204 0xCC '\314' */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x2C, /* 001011 */
+	0x20, /* 001000 */
+	0x2C, /* 001011 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+
+	/* 205 0xCD '\315' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0xFC, /* 111111 */
+	0x00, /* 000000 */
+	0xFC, /* 111111 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 206 0xCE '\316' */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0xEC, /* 111011 */
+	0x00, /* 000000 */
+	0xEC, /* 111011 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+
+	/* 207 0xCF '\317' */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0xFC, /* 111111 */
+	0x00, /* 000000 */
+	0xFC, /* 111111 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 208 0xD0 '\320' */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0xFC, /* 111111 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 209 0xD1 '\321' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0xFC, /* 111111 */
+	0x00, /* 000000 */
+	0xFC, /* 111111 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+
+	/* 210 0xD2 '\322' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0xFC, /* 111111 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+
+	/* 211 0xD3 '\323' */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x3C, /* 001111 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 212 0xD4 '\324' */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x1C, /* 000111 */
+	0x10, /* 000100 */
+	0x1C, /* 000111 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 213 0xD5 '\325' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x1C, /* 000111 */
+	0x10, /* 000100 */
+	0x1C, /* 000111 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+
+	/* 214 0xD6 '\326' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x3C, /* 001111 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+
+	/* 215 0xD7 '\327' */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0xFC, /* 111111 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+
+	/* 216 0xD8 '\330' */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0xFC, /* 111111 */
+	0x10, /* 000100 */
+	0xFC, /* 111111 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+
+	/* 217 0xD9 '\331' */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0xF0, /* 111100 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 218 0xDA '\332' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x1C, /* 000111 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+
+	/* 219 0xDB '\333' */
+	0xFC, /* 111111 */
+	0xFC, /* 111111 */
+	0xFC, /* 111111 */
+	0xFC, /* 111111 */
+	0xFC, /* 111111 */
+	0xFC, /* 111111 */
+	0xFC, /* 111111 */
+	0xFC, /* 111111 */
+
+	/* 220 0xDC '\334' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0xFC, /* 111111 */
+	0xFC, /* 111111 */
+	0xFC, /* 111111 */
+	0xFC, /* 111111 */
+
+	/* 221 0xDD '\335' */
+	0xE0, /* 111000 */
+	0xE0, /* 111000 */
+	0xE0, /* 111000 */
+	0xE0, /* 111000 */
+	0xE0, /* 111000 */
+	0xE0, /* 111000 */
+	0xE0, /* 111000 */
+	0xE0, /* 111000 */
+
+	/* 222 0xDE '\336' */
+	0x1C, /* 000111 */
+	0x1C, /* 000111 */
+	0x1C, /* 000111 */
+	0x1C, /* 000111 */
+	0x1C, /* 000111 */
+	0x1C, /* 000111 */
+	0x1C, /* 000111 */
+	0x1C, /* 000111 */
+
+	/* 223 0xDF '\337' */
+	0xFC, /* 111111 */
+	0xFC, /* 111111 */
+	0xFC, /* 111111 */
+	0xFC, /* 111111 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 224 0xE0 '\340' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x34, /* 001101 */
+	0x48, /* 010010 */
+	0x48, /* 010010 */
+	0x48, /* 010010 */
+	0x34, /* 001101 */
+	0x00, /* 000000 */
+
+	/* 225 0xE1 '\341' */
+	0x24, /* 001001 */
+	0x44, /* 010001 */
+	0x48, /* 010010 */
+	0x48, /* 010010 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x58, /* 010110 */
+	0x40, /* 010000 */
+
+	/* 226 0xE2 '\342' */
+	0x7C, /* 011111 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x00, /* 000000 */
+
+	/* 227 0xE3 '\343' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x7C, /* 011111 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x28, /* 001010 */
+	0x00, /* 000000 */
+
+	/* 228 0xE4 '\344' */
+	0x7C, /* 011111 */
+	0x24, /* 001001 */
+	0x10, /* 000100 */
+	0x08, /* 000010 */
+	0x10, /* 000100 */
+	0x24, /* 001001 */
+	0x7C, /* 011111 */
+	0x00, /* 000000 */
+
+	/* 229 0xE5 '\345' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x3C, /* 001111 */
+	0x48, /* 010010 */
+	0x48, /* 010010 */
+	0x48, /* 010010 */
+	0x30, /* 001100 */
+	0x00, /* 000000 */
+
+	/* 230 0xE6 '\346' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x48, /* 010010 */
+	0x48, /* 010010 */
+	0x48, /* 010010 */
+	0x48, /* 010010 */
+	0x74, /* 011101 */
+	0x40, /* 010000 */
+
+	/* 231 0xE7 '\347' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x7C, /* 011111 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x0C, /* 000011 */
+	0x00, /* 000000 */
+
+	/* 232 0xE8 '\350' */
+	0x10, /* 000100 */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x10, /* 000100 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 233 0xE9 '\351' */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x7C, /* 011111 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 234 0xEA '\352' */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x28, /* 001010 */
+	0x6C, /* 011011 */
+	0x00, /* 000000 */
+
+	/* 235 0xEB '\353' */
+	0x18, /* 000110 */
+	0x20, /* 001000 */
+	0x18, /* 000110 */
+	0x24, /* 001001 */
+	0x24, /* 001001 */
+	0x24, /* 001001 */
+	0x18, /* 000110 */
+	0x00, /* 000000 */
+
+	/* 236 0xEC '\354' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x38, /* 001110 */
+	0x54, /* 010101 */
+	0x54, /* 010101 */
+	0x54, /* 010101 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 237 0xED '\355' */
+	0x00, /* 000000 */
+	0x04, /* 000001 */
+	0x38, /* 001110 */
+	0x54, /* 010101 */
+	0x54, /* 010101 */
+	0x38, /* 001110 */
+	0x40, /* 010000 */
+	0x00, /* 000000 */
+
+	/* 238 0xEE '\356' */
+	0x3C, /* 001111 */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x38, /* 001110 */
+	0x40, /* 010000 */
+	0x40, /* 010000 */
+	0x3C, /* 001111 */
+	0x00, /* 000000 */
+
+	/* 239 0xEF '\357' */
+	0x38, /* 001110 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x44, /* 010001 */
+	0x00, /* 000000 */
+
+	/* 240 0xF0 '\360' */
+	0x00, /* 000000 */
+	0xFC, /* 111111 */
+	0x00, /* 000000 */
+	0xFC, /* 111111 */
+	0x00, /* 000000 */
+	0xFC, /* 111111 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 241 0xF1 '\361' */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x7C, /* 011111 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+	0x7C, /* 011111 */
+	0x00, /* 000000 */
+
+	/* 242 0xF2 '\362' */
+	0x20, /* 001000 */
+	0x10, /* 000100 */
+	0x08, /* 000010 */
+	0x10, /* 000100 */
+	0x20, /* 001000 */
+	0x00, /* 000000 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 243 0xF3 '\363' */
+	0x08, /* 000010 */
+	0x10, /* 000100 */
+	0x20, /* 001000 */
+	0x10, /* 000100 */
+	0x08, /* 000010 */
+	0x00, /* 000000 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 244 0xF4 '\364' */
+	0x0C, /* 000011 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+
+	/* 245 0xF5 '\365' */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x10, /* 000100 */
+	0x60, /* 011000 */
+
+	/* 246 0xF6 '\366' */
+	0x00, /* 000000 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+	0x7C, /* 011111 */
+	0x00, /* 000000 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 247 0xF7 '\367' */
+	0x00, /* 000000 */
+	0x20, /* 001000 */
+	0x54, /* 010101 */
+	0x08, /* 000010 */
+	0x20, /* 001000 */
+	0x54, /* 010101 */
+	0x08, /* 000010 */
+	0x00, /* 000000 */
+
+	/* 248 0xF8 '\370' */
+	0x30, /* 001100 */
+	0x48, /* 010010 */
+	0x48, /* 010010 */
+	0x30, /* 001100 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 249 0xF9 '\371' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x10, /* 000100 */
+	0x38, /* 001110 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 250 0xFA '\372' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x10, /* 000100 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 251 0xFB '\373' */
+	0x04, /* 000001 */
+	0x08, /* 000010 */
+	0x08, /* 000010 */
+	0x50, /* 010100 */
+	0x50, /* 010100 */
+	0x20, /* 001000 */
+	0x20, /* 001000 */
+	0x00, /* 000000 */
+
+	/* 252 0xFC '\374' */
+	0x60, /* 011000 */
+	0x50, /* 010100 */
+	0x50, /* 010100 */
+	0x50, /* 010100 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 253 0xFD '\375' */
+	0x60, /* 011000 */
+	0x10, /* 000100 */
+	0x20, /* 001000 */
+	0x70, /* 011100 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+
+	/* 254 0xFE '\376' */
+	0x00, /* 000000 */
+	0x38, /* 001110 */
+	0x38, /* 001110 */
+	0x38, /* 001110 */
+	0x38, /* 001110 */
+	0x38, /* 001110 */
+	0x38, /* 001110 */
+	0x00, /* 000000 */
+
+	/* 255 0xFF '\377' */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+	0x00, /* 000000 */
+};
+
+const struct font_desc font_6x8 = {
+	.idx	= FONT6x8_IDX,
+	.name	= "6x8",
+	.width	= 6,
+	.height	= 8,
+	.data	= fontdata_6x8,
+	.pref	= 0,
+};
diff --git a/lib/fonts/fonts.c b/lib/fonts/fonts.c
index e7258d8c252b..5f4b07b56cd9 100644
--- a/lib/fonts/fonts.c
+++ b/lib/fonts/fonts.c
@@ -57,6 +57,9 @@ static const struct font_desc *fonts[] = {
 #ifdef CONFIG_FONT_TER16x32
 	&font_ter_16x32,
 #endif
+#ifdef CONFIG_FONT_6x8
+	&font_6x8,
+#endif
 };
 
 #define num_fonts ARRAY_SIZE(fonts)
-- 
cgit v1.2.3


From 81b1e242b8bdc1f5094b8d172cdc0c032fc761c1 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 3 Sep 2020 16:22:36 +0200
Subject: test_bitmap: remove user bitmap tests

We can't run the tests for userspace bitmap parsing if set_fs() doesn't
exist, and it is about to go away for x86, powerpc with other major
architectures to follow.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 lib/test_bitmap.c | 91 +++++++++++++------------------------------------------
 1 file changed, 21 insertions(+), 70 deletions(-)

(limited to 'lib')

diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c
index df903c53952b..4425a1dd4ef1 100644
--- a/lib/test_bitmap.c
+++ b/lib/test_bitmap.c
@@ -354,50 +354,37 @@ static const struct test_bitmap_parselist parselist_tests[] __initconst = {
 
 };
 
-static void __init __test_bitmap_parselist(int is_user)
+static void __init test_bitmap_parselist(void)
 {
 	int i;
 	int err;
 	ktime_t time;
 	DECLARE_BITMAP(bmap, 2048);
-	char *mode = is_user ? "_user"  : "";
 
 	for (i = 0; i < ARRAY_SIZE(parselist_tests); i++) {
 #define ptest parselist_tests[i]
 
-		if (is_user) {
-			mm_segment_t orig_fs = get_fs();
-			size_t len = strlen(ptest.in);
-
-			set_fs(KERNEL_DS);
-			time = ktime_get();
-			err = bitmap_parselist_user((__force const char __user *)ptest.in, len,
-						    bmap, ptest.nbits);
-			time = ktime_get() - time;
-			set_fs(orig_fs);
-		} else {
-			time = ktime_get();
-			err = bitmap_parselist(ptest.in, bmap, ptest.nbits);
-			time = ktime_get() - time;
-		}
+		time = ktime_get();
+		err = bitmap_parselist(ptest.in, bmap, ptest.nbits);
+		time = ktime_get() - time;
 
 		if (err != ptest.errno) {
-			pr_err("parselist%s: %d: input is %s, errno is %d, expected %d\n",
-					mode, i, ptest.in, err, ptest.errno);
+			pr_err("parselist: %d: input is %s, errno is %d, expected %d\n",
+					i, ptest.in, err, ptest.errno);
 			continue;
 		}
 
 		if (!err && ptest.expected
 			 && !__bitmap_equal(bmap, ptest.expected, ptest.nbits)) {
-			pr_err("parselist%s: %d: input is %s, result is 0x%lx, expected 0x%lx\n",
-					mode, i, ptest.in, bmap[0],
+			pr_err("parselist: %d: input is %s, result is 0x%lx, expected 0x%lx\n",
+					i, ptest.in, bmap[0],
 					*ptest.expected);
 			continue;
 		}
 
 		if (ptest.flags & PARSE_TIME)
-			pr_err("parselist%s: %d: input is '%s' OK, Time: %llu\n",
-					mode, i, ptest.in, time);
+			pr_err("parselist: %d: input is '%s' OK, Time: %llu\n",
+					i, ptest.in, time);
 
 #undef ptest
 	}
@@ -443,75 +430,41 @@ static const struct test_bitmap_parselist parse_tests[] __initconst = {
 #undef step
 };
 
-static void __init __test_bitmap_parse(int is_user)
+static void __init test_bitmap_parse(void)
 {
 	int i;
 	int err;
 	ktime_t time;
 	DECLARE_BITMAP(bmap, 2048);
-	char *mode = is_user ? "_user"  : "";
 
 	for (i = 0; i < ARRAY_SIZE(parse_tests); i++) {
 		struct test_bitmap_parselist test = parse_tests[i];
+		size_t len = test.flags & NO_LEN ? UINT_MAX : strlen(test.in);
 
-		if (is_user) {
-			size_t len = strlen(test.in);
-			mm_segment_t orig_fs = get_fs();
-
-			set_fs(KERNEL_DS);
-			time = ktime_get();
-			err = bitmap_parse_user((__force const char __user *)test.in, len,
-						bmap, test.nbits);
-			time = ktime_get() - time;
-			set_fs(orig_fs);
-		} else {
-			size_t len = test.flags & NO_LEN ?
-				UINT_MAX : strlen(test.in);
-			time = ktime_get();
-			err = bitmap_parse(test.in, len, bmap, test.nbits);
-			time = ktime_get() - time;
-		}
+		time = ktime_get();
+		err = bitmap_parse(test.in, len, bmap, test.nbits);
+		time = ktime_get() - time;
 
 		if (err != test.errno) {
-			pr_err("parse%s: %d: input is %s, errno is %d, expected %d\n",
-					mode, i, test.in, err, test.errno);
+			pr_err("parse: %d: input is %s, errno is %d, expected %d\n",
+					i, test.in, err, test.errno);
 			continue;
 		}
 
 		if (!err && test.expected
 			 && !__bitmap_equal(bmap, test.expected, test.nbits)) {
-			pr_err("parse%s: %d: input is %s, result is 0x%lx, expected 0x%lx\n",
-					mode, i, test.in, bmap[0],
+			pr_err("parse: %d: input is %s, result is 0x%lx, expected 0x%lx\n",
+					i, test.in, bmap[0],
 					*test.expected);
 			continue;
 		}
 
 		if (test.flags & PARSE_TIME)
-			pr_err("parse%s: %d: input is '%s' OK, Time: %llu\n",
-					mode, i, test.in, time);
+			pr_err("parse: %d: input is '%s' OK, Time: %llu\n",
+					i, test.in, time);
 	}
 }
 
-static void __init test_bitmap_parselist(void)
-{
-	__test_bitmap_parselist(0);
-}
-
-static void __init test_bitmap_parselist_user(void)
-{
-	__test_bitmap_parselist(1);
-}
-
-static void __init test_bitmap_parse(void)
-{
-	__test_bitmap_parse(0);
-}
-
-static void __init test_bitmap_parse_user(void)
-{
-	__test_bitmap_parse(1);
-}
-
 #define EXP1_IN_BITS	(sizeof(exp1) * 8)
 
 static void __init test_bitmap_arr32(void)
@@ -675,9 +628,7 @@ static void __init selftest(void)
 	test_replace();
 	test_bitmap_arr32();
 	test_bitmap_parse();
-	test_bitmap_parse_user();
 	test_bitmap_parselist();
-	test_bitmap_parselist_user();
 	test_mem_optimisations();
 	test_for_each_set_clump8();
 	test_bitmap_cut();
-- 
cgit v1.2.3


From 7c69898b86b45842e1c2799df845e203c71a667e Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 9 Sep 2020 09:25:33 +0200
Subject: Revert "test_firmware: Test platform fw loading on non-EFI systems"

This reverts commit 18efb2f9e897ac65e7a1b2892f4a53e404534eba as it is
reported to break the build:
	https://lore.kernel.org/r/20200909154709.619fe9bb@canb.auug.org.au

Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Fixes: 18efb2f9e897 ("test_firmware: Test platform fw loading on non-EFI systems")
Cc: stable@vger.kernel.org
Cc: Luis Chamberlain <mcgrof@kernel.org>
Cc: Scott Branden <scott.branden@broadcom.com>
Cc: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20200909154709.619fe9bb@canb.auug.org.au
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/firmware/efi/embedded-firmware.c | 21 +++++----------------
 drivers/firmware/efi/embedded-firmware.h | 21 ---------------------
 include/linux/efi_embedded_fw.h          | 13 +++++++++++++
 lib/test_firmware.c                      |  5 -----
 4 files changed, 18 insertions(+), 42 deletions(-)
 delete mode 100644 drivers/firmware/efi/embedded-firmware.h

(limited to 'lib')

diff --git a/drivers/firmware/efi/embedded-firmware.c b/drivers/firmware/efi/embedded-firmware.c
index f1330f55f7f4..e97a9c9d010c 100644
--- a/drivers/firmware/efi/embedded-firmware.c
+++ b/drivers/firmware/efi/embedded-firmware.c
@@ -14,22 +14,11 @@
 #include <linux/vmalloc.h>
 #include <crypto/sha.h>
 
-#include "embedded-firmware.h"
-
-#ifdef CONFIG_TEST_FIRMWARE
-# define EFI_EMBEDDED_FW_VISIBILITY
-#else
-# define EFI_EMBEDDED_FW_VISIBILITY static
-#endif
-
-EFI_EMBEDDED_FW_VISIBILITY LIST_HEAD(efi_embedded_fw_list);
-EFI_EMBEDDED_FW_VISIBILITY bool efi_embedded_fw_checked;
-
 /* Exported for use by lib/test_firmware.c only */
-#ifdef CONFIG_TEST_FIRMWARE
+LIST_HEAD(efi_embedded_fw_list);
 EXPORT_SYMBOL_GPL(efi_embedded_fw_list);
-EXPORT_SYMBOL_GPL(efi_embedded_fw_checked);
-#endif
+
+static bool checked_for_fw;
 
 static const struct dmi_system_id * const embedded_fw_table[] = {
 #ifdef CONFIG_TOUCHSCREEN_DMI
@@ -127,14 +116,14 @@ void __init efi_check_for_embedded_firmwares(void)
 		}
 	}
 
-	efi_embedded_fw_checked = true;
+	checked_for_fw = true;
 }
 
 int efi_get_embedded_fw(const char *name, const u8 **data, size_t *size)
 {
 	struct efi_embedded_fw *iter, *fw = NULL;
 
-	if (!efi_embedded_fw_checked) {
+	if (!checked_for_fw) {
 		pr_warn("Warning %s called while we did not check for embedded fw\n",
 			__func__);
 		return -ENOENT;
diff --git a/drivers/firmware/efi/embedded-firmware.h b/drivers/firmware/efi/embedded-firmware.h
deleted file mode 100644
index bb894eae0906..000000000000
--- a/drivers/firmware/efi/embedded-firmware.h
+++ /dev/null
@@ -1,21 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _EFI_EMBEDDED_FW_INTERNAL_H_
-#define _EFI_EMBEDDED_FW_INTERNAL_H_
-
-/*
- * This struct and efi_embedded_fw_list are private to the efi-embedded fw
- * implementation they only in separate header for use by lib/test_firmware.c.
- */
-struct efi_embedded_fw {
-	struct list_head list;
-	const char *name;
-	const u8 *data;
-	size_t length;
-};
-
-#ifdef CONFIG_TEST_FIRMWARE
-extern struct list_head efi_embedded_fw_list;
-extern bool efi_embedded_fw_checked;
-#endif
-
-#endif /* _EFI_EMBEDDED_FW_INTERNAL_H_ */
diff --git a/include/linux/efi_embedded_fw.h b/include/linux/efi_embedded_fw.h
index 4ad5db9f5312..57eac5241303 100644
--- a/include/linux/efi_embedded_fw.h
+++ b/include/linux/efi_embedded_fw.h
@@ -7,6 +7,19 @@
 
 #define EFI_EMBEDDED_FW_PREFIX_LEN		8
 
+/*
+ * This struct and efi_embedded_fw_list are private to the efi-embedded fw
+ * implementation they are in this header for use by lib/test_firmware.c only!
+ */
+struct efi_embedded_fw {
+	struct list_head list;
+	const char *name;
+	const u8 *data;
+	size_t length;
+};
+
+extern struct list_head efi_embedded_fw_list;
+
 /**
  * struct efi_embedded_fw_desc - This struct is used by the EFI embedded-fw
  *                               code to search for embedded firmwares.
diff --git a/lib/test_firmware.c b/lib/test_firmware.c
index 62af792e151c..9fee2b93a8d1 100644
--- a/lib/test_firmware.c
+++ b/lib/test_firmware.c
@@ -489,7 +489,6 @@ out:
 static DEVICE_ATTR_WO(trigger_request);
 
 #ifdef CONFIG_EFI_EMBEDDED_FIRMWARE
-#include "../drivers/firmware/efi/embedded-firmware.h"
 static ssize_t trigger_request_platform_store(struct device *dev,
 					      struct device_attribute *attr,
 					      const char *buf, size_t count)
@@ -502,7 +501,6 @@ static ssize_t trigger_request_platform_store(struct device *dev,
 	};
 	struct efi_embedded_fw efi_embedded_fw;
 	const struct firmware *firmware = NULL;
-	bool saved_efi_embedded_fw_checked;
 	char *name;
 	int rc;
 
@@ -515,8 +513,6 @@ static ssize_t trigger_request_platform_store(struct device *dev,
 	efi_embedded_fw.data = (void *)test_data;
 	efi_embedded_fw.length = sizeof(test_data);
 	list_add(&efi_embedded_fw.list, &efi_embedded_fw_list);
-	saved_efi_embedded_fw_checked = efi_embedded_fw_checked;
-	efi_embedded_fw_checked = true;
 
 	pr_info("loading '%s'\n", name);
 	rc = firmware_request_platform(&firmware, name, dev);
@@ -534,7 +530,6 @@ static ssize_t trigger_request_platform_store(struct device *dev,
 	rc = count;
 
 out:
-	efi_embedded_fw_checked = saved_efi_embedded_fw_checked;
 	release_firmware(firmware);
 	list_del(&efi_embedded_fw.list);
 	kfree(name);
-- 
cgit v1.2.3


From 0c7a6b91d2276b09ade6e09766600f809f5a529a Mon Sep 17 00:00:00 2001
From: Stephen Boyd <swboyd@chromium.org>
Date: Wed, 9 Sep 2020 23:04:40 -0700
Subject: driver core: platform: Document return type of more functions

I can't always remember the return values of these functions, and so I
usually jump to the function to read the kernel-doc and see that it
doesn't tell me. Then I have to spend more time reading the code to jump
to the function that actually tells me the return values. Let's document
it here so that we don't all have to spend time digging through the code
to understand the return values.

Cc: <linux-doc@vger.kernel.org>
Signed-off-by: Stephen Boyd <swboyd@chromium.org>
Link: https://lore.kernel.org/r/20200910060440.2302925-1-swboyd@chromium.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 drivers/base/platform.c | 14 ++++++++++++++
 lib/devres.c            | 18 ++++++++++++------
 2 files changed, 26 insertions(+), 6 deletions(-)

(limited to 'lib')

diff --git a/drivers/base/platform.c b/drivers/base/platform.c
index e5d8a0503b4f..4b3dc6813714 100644
--- a/drivers/base/platform.c
+++ b/drivers/base/platform.c
@@ -45,6 +45,8 @@ EXPORT_SYMBOL_GPL(platform_bus);
  * @dev: platform device
  * @type: resource type
  * @num: resource index
+ *
+ * Return: a pointer to the resource or NULL on failure.
  */
 struct resource *platform_get_resource(struct platform_device *dev,
 				       unsigned int type, unsigned int num)
@@ -70,6 +72,9 @@ EXPORT_SYMBOL_GPL(platform_get_resource);
  *        resource management
  * @index: resource index
  * @res: optional output parameter to store a pointer to the obtained resource.
+ *
+ * Return: a pointer to the remapped memory or an ERR_PTR() encoded error code
+ * on failure.
  */
 void __iomem *
 devm_platform_get_and_ioremap_resource(struct platform_device *pdev,
@@ -91,6 +96,9 @@ EXPORT_SYMBOL_GPL(devm_platform_get_and_ioremap_resource);
  * @pdev: platform device to use both for memory resource lookup as well as
  *        resource management
  * @index: resource index
+ *
+ * Return: a pointer to the remapped memory or an ERR_PTR() encoded error code
+ * on failure.
  */
 void __iomem *devm_platform_ioremap_resource(struct platform_device *pdev,
 					     unsigned int index)
@@ -106,6 +114,9 @@ EXPORT_SYMBOL_GPL(devm_platform_ioremap_resource);
  * @pdev: platform device to use both for memory resource lookup as well as
  *        resource management
  * @index: resource index
+ *
+ * Return: a pointer to the remapped memory or an ERR_PTR() encoded error code
+ * on failure.
  */
 void __iomem *devm_platform_ioremap_resource_wc(struct platform_device *pdev,
 						unsigned int index)
@@ -124,6 +135,9 @@ void __iomem *devm_platform_ioremap_resource_wc(struct platform_device *pdev,
  * @pdev: platform device to use both for memory resource lookup as well as
  *	  resource management
  * @name: name of the resource
+ *
+ * Return: a pointer to the remapped memory or an ERR_PTR() encoded error code
+ * on failure.
  */
 void __iomem *
 devm_platform_ioremap_resource_byname(struct platform_device *pdev,
diff --git a/lib/devres.c b/lib/devres.c
index 8bd3ed450614..2a4ff5d64288 100644
--- a/lib/devres.c
+++ b/lib/devres.c
@@ -162,13 +162,15 @@ __devm_ioremap_resource(struct device *dev, const struct resource *res,
  * region and ioremaps it. All operations are managed and will be undone
  * on driver detach.
  *
- * Returns a pointer to the remapped memory or an ERR_PTR() encoded error code
- * on failure. Usage example:
+ * Usage example:
  *
  *	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
  *	base = devm_ioremap_resource(&pdev->dev, res);
  *	if (IS_ERR(base))
  *		return PTR_ERR(base);
+ *
+ * Return: a pointer to the remapped memory or an ERR_PTR() encoded error code
+ * on failure.
  */
 void __iomem *devm_ioremap_resource(struct device *dev,
 				    const struct resource *res)
@@ -183,8 +185,8 @@ EXPORT_SYMBOL(devm_ioremap_resource);
  * @dev: generic device to handle the resource for
  * @res: resource to be handled
  *
- * Returns a pointer to the remapped memory or an ERR_PTR() encoded error code
- * on failure. Usage example:
+ * Return: a pointer to the remapped memory or an ERR_PTR() encoded error code
+ * on failure.
  */
 void __iomem *devm_ioremap_resource_wc(struct device *dev,
 				       const struct resource *res)
@@ -207,8 +209,8 @@ void __iomem *devm_ioremap_resource_wc(struct device *dev,
  * @node:       The device-tree node where the resource resides
  * @index:	index of the MMIO range in the "reg" property
  * @size:	Returns the size of the resource (pass NULL if not needed)
- * Returns a pointer to the requested and mapped memory or an ERR_PTR() encoded
- * error code on failure. Usage example:
+ *
+ * Usage example:
  *
  *	base = devm_of_iomap(&pdev->dev, node, 0, NULL);
  *	if (IS_ERR(base))
@@ -219,6 +221,8 @@ void __iomem *devm_ioremap_resource_wc(struct device *dev,
  * two drivers try to map the same memory, the of_iomap() function will succeed
  * but the devm_of_iomap() function will return -EBUSY.
  *
+ * Return: a pointer to the requested and mapped memory or an ERR_PTR() encoded
+ * error code on failure.
  */
 void __iomem *devm_of_iomap(struct device *dev, struct device_node *node, int index,
 			    resource_size_t *size)
@@ -256,6 +260,8 @@ static int devm_ioport_map_match(struct device *dev, void *res,
  *
  * Managed ioport_map().  Map is automatically unmapped on driver
  * detach.
+ *
+ * Return: a pointer to the remapped memory or NULL on failure.
  */
 void __iomem *devm_ioport_map(struct device *dev, unsigned long port,
 			       unsigned int nr)
-- 
cgit v1.2.3


From aedcade6f4fa9a1e65f327fc42de3fb47660646c Mon Sep 17 00:00:00 2001
From: Stephen Boyd <swboyd@chromium.org>
Date: Fri, 14 Aug 2020 17:40:26 -0700
Subject: debugobjects: Allow debug_obj_descr to be const

The debugobject core could be slightly harder to corrupt if the
debug_obj_descr would be a pointer to const memory.

Depending on the architecture, const data structures are placed into
read-only memory and thus are harder to corrupt or hijack.

This descriptor is used to fix up stuff like timers and workqueues when
core kernel data structures are busted, so moving the descriptors to
read-only memory will make debugobjects more resilient to something going
wrong and then corrupting the function pointers inside struct
debug_obj_descr.

Signed-off-by: Stephen Boyd <swboyd@chromium.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20200815004027.2046113-2-swboyd@chromium.org
---
 include/linux/debugobjects.h | 32 ++++++++++++++++----------------
 lib/debugobjects.c           | 30 +++++++++++++++---------------
 2 files changed, 31 insertions(+), 31 deletions(-)

(limited to 'lib')

diff --git a/include/linux/debugobjects.h b/include/linux/debugobjects.h
index afc416e5dcab..8d2dde23e9fb 100644
--- a/include/linux/debugobjects.h
+++ b/include/linux/debugobjects.h
@@ -30,7 +30,7 @@ struct debug_obj {
 	enum debug_obj_state	state;
 	unsigned int		astate;
 	void			*object;
-	struct debug_obj_descr	*descr;
+	const struct debug_obj_descr *descr;
 };
 
 /**
@@ -64,14 +64,14 @@ struct debug_obj_descr {
 };
 
 #ifdef CONFIG_DEBUG_OBJECTS
-extern void debug_object_init      (void *addr, struct debug_obj_descr *descr);
+extern void debug_object_init      (void *addr, const struct debug_obj_descr *descr);
 extern void
-debug_object_init_on_stack(void *addr, struct debug_obj_descr *descr);
-extern int debug_object_activate  (void *addr, struct debug_obj_descr *descr);
-extern void debug_object_deactivate(void *addr, struct debug_obj_descr *descr);
-extern void debug_object_destroy   (void *addr, struct debug_obj_descr *descr);
-extern void debug_object_free      (void *addr, struct debug_obj_descr *descr);
-extern void debug_object_assert_init(void *addr, struct debug_obj_descr *descr);
+debug_object_init_on_stack(void *addr, const struct debug_obj_descr *descr);
+extern int debug_object_activate  (void *addr, const struct debug_obj_descr *descr);
+extern void debug_object_deactivate(void *addr, const struct debug_obj_descr *descr);
+extern void debug_object_destroy   (void *addr, const struct debug_obj_descr *descr);
+extern void debug_object_free      (void *addr, const struct debug_obj_descr *descr);
+extern void debug_object_assert_init(void *addr, const struct debug_obj_descr *descr);
 
 /*
  * Active state:
@@ -79,26 +79,26 @@ extern void debug_object_assert_init(void *addr, struct debug_obj_descr *descr);
  * - Must return to 0 before deactivation.
  */
 extern void
-debug_object_active_state(void *addr, struct debug_obj_descr *descr,
+debug_object_active_state(void *addr, const struct debug_obj_descr *descr,
 			  unsigned int expect, unsigned int next);
 
 extern void debug_objects_early_init(void);
 extern void debug_objects_mem_init(void);
 #else
 static inline void
-debug_object_init      (void *addr, struct debug_obj_descr *descr) { }
+debug_object_init      (void *addr, const struct debug_obj_descr *descr) { }
 static inline void
-debug_object_init_on_stack(void *addr, struct debug_obj_descr *descr) { }
+debug_object_init_on_stack(void *addr, const struct debug_obj_descr *descr) { }
 static inline int
-debug_object_activate  (void *addr, struct debug_obj_descr *descr) { return 0; }
+debug_object_activate  (void *addr, const struct debug_obj_descr *descr) { return 0; }
 static inline void
-debug_object_deactivate(void *addr, struct debug_obj_descr *descr) { }
+debug_object_deactivate(void *addr, const struct debug_obj_descr *descr) { }
 static inline void
-debug_object_destroy   (void *addr, struct debug_obj_descr *descr) { }
+debug_object_destroy   (void *addr, const struct debug_obj_descr *descr) { }
 static inline void
-debug_object_free      (void *addr, struct debug_obj_descr *descr) { }
+debug_object_free      (void *addr, const struct debug_obj_descr *descr) { }
 static inline void
-debug_object_assert_init(void *addr, struct debug_obj_descr *descr) { }
+debug_object_assert_init(void *addr, const struct debug_obj_descr *descr) { }
 
 static inline void debug_objects_early_init(void) { }
 static inline void debug_objects_mem_init(void) { }
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index fe4557955d97..e2a3171b6c63 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -90,7 +90,7 @@ static int			debug_objects_pool_size __read_mostly
 				= ODEBUG_POOL_SIZE;
 static int			debug_objects_pool_min_level __read_mostly
 				= ODEBUG_POOL_MIN_LEVEL;
-static struct debug_obj_descr	*descr_test  __read_mostly;
+static const struct debug_obj_descr *descr_test  __read_mostly;
 static struct kmem_cache	*obj_cache __read_mostly;
 
 /*
@@ -223,7 +223,7 @@ static struct debug_obj *__alloc_object(struct hlist_head *list)
  * Must be called with interrupts disabled.
  */
 static struct debug_obj *
-alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr)
+alloc_object(void *addr, struct debug_bucket *b, const struct debug_obj_descr *descr)
 {
 	struct debug_percpu_free *percpu_pool = this_cpu_ptr(&percpu_obj_pool);
 	struct debug_obj *obj;
@@ -475,7 +475,7 @@ static struct debug_bucket *get_bucket(unsigned long addr)
 
 static void debug_print_object(struct debug_obj *obj, char *msg)
 {
-	struct debug_obj_descr *descr = obj->descr;
+	const struct debug_obj_descr *descr = obj->descr;
 	static int limit;
 
 	if (limit < 5 && descr != descr_test) {
@@ -529,7 +529,7 @@ static void debug_object_is_on_stack(void *addr, int onstack)
 }
 
 static void
-__debug_object_init(void *addr, struct debug_obj_descr *descr, int onstack)
+__debug_object_init(void *addr, const struct debug_obj_descr *descr, int onstack)
 {
 	enum debug_obj_state state;
 	bool check_stack = false;
@@ -587,7 +587,7 @@ __debug_object_init(void *addr, struct debug_obj_descr *descr, int onstack)
  * @addr:	address of the object
  * @descr:	pointer to an object specific debug description structure
  */
-void debug_object_init(void *addr, struct debug_obj_descr *descr)
+void debug_object_init(void *addr, const struct debug_obj_descr *descr)
 {
 	if (!debug_objects_enabled)
 		return;
@@ -602,7 +602,7 @@ EXPORT_SYMBOL_GPL(debug_object_init);
  * @addr:	address of the object
  * @descr:	pointer to an object specific debug description structure
  */
-void debug_object_init_on_stack(void *addr, struct debug_obj_descr *descr)
+void debug_object_init_on_stack(void *addr, const struct debug_obj_descr *descr)
 {
 	if (!debug_objects_enabled)
 		return;
@@ -617,7 +617,7 @@ EXPORT_SYMBOL_GPL(debug_object_init_on_stack);
  * @descr:	pointer to an object specific debug description structure
  * Returns 0 for success, -EINVAL for check failed.
  */
-int debug_object_activate(void *addr, struct debug_obj_descr *descr)
+int debug_object_activate(void *addr, const struct debug_obj_descr *descr)
 {
 	enum debug_obj_state state;
 	struct debug_bucket *db;
@@ -695,7 +695,7 @@ EXPORT_SYMBOL_GPL(debug_object_activate);
  * @addr:	address of the object
  * @descr:	pointer to an object specific debug description structure
  */
-void debug_object_deactivate(void *addr, struct debug_obj_descr *descr)
+void debug_object_deactivate(void *addr, const struct debug_obj_descr *descr)
 {
 	struct debug_bucket *db;
 	struct debug_obj *obj;
@@ -747,7 +747,7 @@ EXPORT_SYMBOL_GPL(debug_object_deactivate);
  * @addr:	address of the object
  * @descr:	pointer to an object specific debug description structure
  */
-void debug_object_destroy(void *addr, struct debug_obj_descr *descr)
+void debug_object_destroy(void *addr, const struct debug_obj_descr *descr)
 {
 	enum debug_obj_state state;
 	struct debug_bucket *db;
@@ -797,7 +797,7 @@ EXPORT_SYMBOL_GPL(debug_object_destroy);
  * @addr:	address of the object
  * @descr:	pointer to an object specific debug description structure
  */
-void debug_object_free(void *addr, struct debug_obj_descr *descr)
+void debug_object_free(void *addr, const struct debug_obj_descr *descr)
 {
 	enum debug_obj_state state;
 	struct debug_bucket *db;
@@ -838,7 +838,7 @@ EXPORT_SYMBOL_GPL(debug_object_free);
  * @addr:	address of the object
  * @descr:	pointer to an object specific debug description structure
  */
-void debug_object_assert_init(void *addr, struct debug_obj_descr *descr)
+void debug_object_assert_init(void *addr, const struct debug_obj_descr *descr)
 {
 	struct debug_bucket *db;
 	struct debug_obj *obj;
@@ -886,7 +886,7 @@ EXPORT_SYMBOL_GPL(debug_object_assert_init);
  * @next:	state to move to if expected state is found
  */
 void
-debug_object_active_state(void *addr, struct debug_obj_descr *descr,
+debug_object_active_state(void *addr, const struct debug_obj_descr *descr,
 			  unsigned int expect, unsigned int next)
 {
 	struct debug_bucket *db;
@@ -934,7 +934,7 @@ EXPORT_SYMBOL_GPL(debug_object_active_state);
 static void __debug_check_no_obj_freed(const void *address, unsigned long size)
 {
 	unsigned long flags, oaddr, saddr, eaddr, paddr, chunks;
-	struct debug_obj_descr *descr;
+	const struct debug_obj_descr *descr;
 	enum debug_obj_state state;
 	struct debug_bucket *db;
 	struct hlist_node *tmp;
@@ -1052,7 +1052,7 @@ struct self_test {
 	unsigned long	dummy2[3];
 };
 
-static __initdata struct debug_obj_descr descr_type_test;
+static __initconst const struct debug_obj_descr descr_type_test;
 
 static bool __init is_static_object(void *addr)
 {
@@ -1177,7 +1177,7 @@ out:
 	return res;
 }
 
-static __initdata struct debug_obj_descr descr_type_test = {
+static __initconst const struct debug_obj_descr descr_type_test = {
 	.name			= "selftest",
 	.is_static_object	= is_static_object,
 	.fixup_init		= fixup_init,
-- 
cgit v1.2.3


From f9e62f318fd706a54b7ce9b28e5c7e49bbde8788 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <swboyd@chromium.org>
Date: Fri, 14 Aug 2020 17:40:27 -0700
Subject: treewide: Make all debug_obj_descriptors const

This should make it harder for the kernel to corrupt the debug object
descriptor, used to call functions to fixup state and track debug objects,
by moving the structure to read-only memory.

Signed-off-by: Stephen Boyd <swboyd@chromium.org>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Reviewed-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20200815004027.2046113-3-swboyd@chromium.org
---
 drivers/gpu/drm/i915/i915_active.c   | 2 +-
 drivers/gpu/drm/i915/i915_sw_fence.c | 2 +-
 kernel/rcu/rcu.h                     | 2 +-
 kernel/rcu/update.c                  | 2 +-
 kernel/time/hrtimer.c                | 4 ++--
 kernel/time/timer.c                  | 4 ++--
 kernel/workqueue.c                   | 4 ++--
 lib/percpu_counter.c                 | 4 ++--
 8 files changed, 12 insertions(+), 12 deletions(-)

(limited to 'lib')

diff --git a/drivers/gpu/drm/i915/i915_active.c b/drivers/gpu/drm/i915/i915_active.c
index d960d0be5bd2..839bd53df6e9 100644
--- a/drivers/gpu/drm/i915/i915_active.c
+++ b/drivers/gpu/drm/i915/i915_active.c
@@ -81,7 +81,7 @@ static void *active_debug_hint(void *addr)
 	return (void *)ref->active ?: (void *)ref->retire ?: (void *)ref;
 }
 
-static struct debug_obj_descr active_debug_desc = {
+static const struct debug_obj_descr active_debug_desc = {
 	.name = "i915_active",
 	.debug_hint = active_debug_hint,
 };
diff --git a/drivers/gpu/drm/i915/i915_sw_fence.c b/drivers/gpu/drm/i915/i915_sw_fence.c
index 4cd2038cbe35..038d4c6884c5 100644
--- a/drivers/gpu/drm/i915/i915_sw_fence.c
+++ b/drivers/gpu/drm/i915/i915_sw_fence.c
@@ -34,7 +34,7 @@ static void *i915_sw_fence_debug_hint(void *addr)
 
 #ifdef CONFIG_DRM_I915_SW_FENCE_DEBUG_OBJECTS
 
-static struct debug_obj_descr i915_sw_fence_debug_descr = {
+static const struct debug_obj_descr i915_sw_fence_debug_descr = {
 	.name = "i915_sw_fence",
 	.debug_hint = i915_sw_fence_debug_hint,
 };
diff --git a/kernel/rcu/rcu.h b/kernel/rcu/rcu.h
index cf66a3ccd757..e01cba5e4b52 100644
--- a/kernel/rcu/rcu.h
+++ b/kernel/rcu/rcu.h
@@ -167,7 +167,7 @@ static inline unsigned long rcu_seq_diff(unsigned long new, unsigned long old)
 # define STATE_RCU_HEAD_READY	0
 # define STATE_RCU_HEAD_QUEUED	1
 
-extern struct debug_obj_descr rcuhead_debug_descr;
+extern const struct debug_obj_descr rcuhead_debug_descr;
 
 static inline int debug_rcu_head_queue(struct rcu_head *head)
 {
diff --git a/kernel/rcu/update.c b/kernel/rcu/update.c
index 2de49b5d8dd2..3e0f4bcb558f 100644
--- a/kernel/rcu/update.c
+++ b/kernel/rcu/update.c
@@ -469,7 +469,7 @@ void destroy_rcu_head_on_stack(struct rcu_head *head)
 }
 EXPORT_SYMBOL_GPL(destroy_rcu_head_on_stack);
 
-struct debug_obj_descr rcuhead_debug_descr = {
+const struct debug_obj_descr rcuhead_debug_descr = {
 	.name = "rcu_head",
 	.is_static_object = rcuhead_is_static_object,
 };
diff --git a/kernel/time/hrtimer.c b/kernel/time/hrtimer.c
index 95b6a708b040..3624b9b5835d 100644
--- a/kernel/time/hrtimer.c
+++ b/kernel/time/hrtimer.c
@@ -342,7 +342,7 @@ EXPORT_SYMBOL_GPL(ktime_add_safe);
 
 #ifdef CONFIG_DEBUG_OBJECTS_TIMERS
 
-static struct debug_obj_descr hrtimer_debug_descr;
+static const struct debug_obj_descr hrtimer_debug_descr;
 
 static void *hrtimer_debug_hint(void *addr)
 {
@@ -401,7 +401,7 @@ static bool hrtimer_fixup_free(void *addr, enum debug_obj_state state)
 	}
 }
 
-static struct debug_obj_descr hrtimer_debug_descr = {
+static const struct debug_obj_descr hrtimer_debug_descr = {
 	.name		= "hrtimer",
 	.debug_hint	= hrtimer_debug_hint,
 	.fixup_init	= hrtimer_fixup_init,
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index a50364df1054..8b17cf28e54b 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -611,7 +611,7 @@ static void internal_add_timer(struct timer_base *base, struct timer_list *timer
 
 #ifdef CONFIG_DEBUG_OBJECTS_TIMERS
 
-static struct debug_obj_descr timer_debug_descr;
+static const struct debug_obj_descr timer_debug_descr;
 
 static void *timer_debug_hint(void *addr)
 {
@@ -707,7 +707,7 @@ static bool timer_fixup_assert_init(void *addr, enum debug_obj_state state)
 	}
 }
 
-static struct debug_obj_descr timer_debug_descr = {
+static const struct debug_obj_descr timer_debug_descr = {
 	.name			= "timer_list",
 	.debug_hint		= timer_debug_hint,
 	.is_static_object	= timer_is_static_object,
diff --git a/kernel/workqueue.c b/kernel/workqueue.c
index c41c3c17b86a..ac088ce6059b 100644
--- a/kernel/workqueue.c
+++ b/kernel/workqueue.c
@@ -427,7 +427,7 @@ static void show_pwq(struct pool_workqueue *pwq);
 
 #ifdef CONFIG_DEBUG_OBJECTS_WORK
 
-static struct debug_obj_descr work_debug_descr;
+static const struct debug_obj_descr work_debug_descr;
 
 static void *work_debug_hint(void *addr)
 {
@@ -477,7 +477,7 @@ static bool work_fixup_free(void *addr, enum debug_obj_state state)
 	}
 }
 
-static struct debug_obj_descr work_debug_descr = {
+static const struct debug_obj_descr work_debug_descr = {
 	.name		= "work_struct",
 	.debug_hint	= work_debug_hint,
 	.is_static_object = work_is_static_object,
diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index a2345de90e93..f61689a96e85 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -17,7 +17,7 @@ static DEFINE_SPINLOCK(percpu_counters_lock);
 
 #ifdef CONFIG_DEBUG_OBJECTS_PERCPU_COUNTER
 
-static struct debug_obj_descr percpu_counter_debug_descr;
+static const struct debug_obj_descr percpu_counter_debug_descr;
 
 static bool percpu_counter_fixup_free(void *addr, enum debug_obj_state state)
 {
@@ -33,7 +33,7 @@ static bool percpu_counter_fixup_free(void *addr, enum debug_obj_state state)
 	}
 }
 
-static struct debug_obj_descr percpu_counter_debug_descr = {
+static const struct debug_obj_descr percpu_counter_debug_descr = {
 	.name		= "percpu_counter",
 	.fixup_free	= percpu_counter_fixup_free,
 };
-- 
cgit v1.2.3


From 255f6c2e74b136662a191652d688bd7566c94bf6 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Tue, 15 Sep 2020 13:30:24 +1000
Subject: crypto: lib/chacha20poly1305 - Set SG_MITER_ATOMIC unconditionally

There is no reason for the chacha20poly1305 SG miter code to use
kmap instead of kmap_atomic as the critical section doesn't sleep
anyway.  So we can simply get rid of the preemptible check and
set SG_MITER_ATOMIC unconditionally.

Even if we need to reenable preemption to lower latency we should
be doing that by interrupting the SG miter walk rather than using
kmap.

Reported-by: Linus Torvalds <torvalds@linux-foundation.org>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 lib/crypto/chacha20poly1305.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

(limited to 'lib')

diff --git a/lib/crypto/chacha20poly1305.c b/lib/crypto/chacha20poly1305.c
index 431e04280332..5850f3b87359 100644
--- a/lib/crypto/chacha20poly1305.c
+++ b/lib/crypto/chacha20poly1305.c
@@ -251,9 +251,7 @@ bool chacha20poly1305_crypt_sg_inplace(struct scatterlist *src,
 			poly1305_update(&poly1305_state, pad0, 0x10 - (ad_len & 0xf));
 	}
 
-	flags = SG_MITER_TO_SG;
-	if (!preemptible())
-		flags |= SG_MITER_ATOMIC;
+	flags = SG_MITER_TO_SG | SG_MITER_ATOMIC;
 
 	sg_miter_start(&miter, src, sg_nents(src), flags);
 
-- 
cgit v1.2.3


From a8ea8bdd9df92a0e5db5b43900abb7a288b8a53e Mon Sep 17 00:00:00 2001
From: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Date: Mon, 21 Sep 2020 00:20:55 +0800
Subject: lib/mpi: Extend the MPI library

Expand the mpi library based on libgcrypt, and the ECC algorithm of
mpi based on libgcrypt requires these functions.
Some other algorithms will be developed based on mpi ecc, such as SM2.

Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Tested-by: Xufeng Zhang <yunbo.xufeng@linux.alibaba.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/mpi.h    |  87 +++++++++++++
 lib/mpi/Makefile       |   5 +
 lib/mpi/mpi-add.c      | 155 +++++++++++++++++++++++
 lib/mpi/mpi-bit.c      | 251 ++++++++++++++++++++++++++++++++++++
 lib/mpi/mpi-cmp.c      |  46 +++++--
 lib/mpi/mpi-div.c      | 238 +++++++++++++++++++++++++++++++++++
 lib/mpi/mpi-internal.h |  53 ++++++++
 lib/mpi/mpi-inv.c      | 143 +++++++++++++++++++++
 lib/mpi/mpi-mod.c      | 155 +++++++++++++++++++++++
 lib/mpi/mpi-mul.c      |  94 ++++++++++++++
 lib/mpi/mpicoder.c     | 336 +++++++++++++++++++++++++++++++++++++++++++++++++
 lib/mpi/mpih-div.c     | 294 +++++++++++++++++++++++++++++++++++++++++++
 lib/mpi/mpih-mul.c     |  25 ++++
 lib/mpi/mpiutil.c      | 204 ++++++++++++++++++++++++++++++
 14 files changed, 2076 insertions(+), 10 deletions(-)
 create mode 100644 lib/mpi/mpi-add.c
 create mode 100644 lib/mpi/mpi-div.c
 create mode 100644 lib/mpi/mpi-inv.c
 create mode 100644 lib/mpi/mpi-mod.c
 create mode 100644 lib/mpi/mpi-mul.c

(limited to 'lib')

diff --git a/include/linux/mpi.h b/include/linux/mpi.h
index 5d906dfbf3ed..3c9e41603cf6 100644
--- a/include/linux/mpi.h
+++ b/include/linux/mpi.h
@@ -40,21 +40,79 @@ struct gcry_mpi {
 typedef struct gcry_mpi *MPI;
 
 #define mpi_get_nlimbs(a)     ((a)->nlimbs)
+#define mpi_has_sign(a)       ((a)->sign)
 
 /*-- mpiutil.c --*/
 MPI mpi_alloc(unsigned nlimbs);
+void mpi_clear(MPI a);
 void mpi_free(MPI a);
 int mpi_resize(MPI a, unsigned nlimbs);
 
+static inline MPI mpi_new(unsigned int nbits)
+{
+	return mpi_alloc((nbits + BITS_PER_MPI_LIMB - 1) / BITS_PER_MPI_LIMB);
+}
+
+MPI mpi_copy(MPI a);
+MPI mpi_alloc_like(MPI a);
+void mpi_snatch(MPI w, MPI u);
+MPI mpi_set(MPI w, MPI u);
+MPI mpi_set_ui(MPI w, unsigned long u);
+MPI mpi_alloc_set_ui(unsigned long u);
+void mpi_swap_cond(MPI a, MPI b, unsigned long swap);
+
+/* Constants used to return constant MPIs.  See mpi_init if you
+ * want to add more constants.
+ */
+#define MPI_NUMBER_OF_CONSTANTS 6
+enum gcry_mpi_constants {
+	MPI_C_ZERO,
+	MPI_C_ONE,
+	MPI_C_TWO,
+	MPI_C_THREE,
+	MPI_C_FOUR,
+	MPI_C_EIGHT
+};
+
+MPI mpi_const(enum gcry_mpi_constants no);
+
 /*-- mpicoder.c --*/
+
+/* Different formats of external big integer representation. */
+enum gcry_mpi_format {
+	GCRYMPI_FMT_NONE = 0,
+	GCRYMPI_FMT_STD = 1,    /* Twos complement stored without length. */
+	GCRYMPI_FMT_PGP = 2,    /* As used by OpenPGP (unsigned only). */
+	GCRYMPI_FMT_SSH = 3,    /* As used by SSH (like STD but with length). */
+	GCRYMPI_FMT_HEX = 4,    /* Hex format. */
+	GCRYMPI_FMT_USG = 5,    /* Like STD but unsigned. */
+	GCRYMPI_FMT_OPAQUE = 8  /* Opaque format (some functions only). */
+};
+
 MPI mpi_read_raw_data(const void *xbuffer, size_t nbytes);
 MPI mpi_read_from_buffer(const void *buffer, unsigned *ret_nread);
+int mpi_fromstr(MPI val, const char *str);
+MPI mpi_scanval(const char *string);
 MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int len);
 void *mpi_get_buffer(MPI a, unsigned *nbytes, int *sign);
 int mpi_read_buffer(MPI a, uint8_t *buf, unsigned buf_len, unsigned *nbytes,
 		    int *sign);
 int mpi_write_to_sgl(MPI a, struct scatterlist *sg, unsigned nbytes,
 		     int *sign);
+int mpi_print(enum gcry_mpi_format format, unsigned char *buffer,
+			size_t buflen, size_t *nwritten, MPI a);
+
+/*-- mpi-mod.c --*/
+void mpi_mod(MPI rem, MPI dividend, MPI divisor);
+
+/* Context used with Barrett reduction.  */
+struct barrett_ctx_s;
+typedef struct barrett_ctx_s *mpi_barrett_t;
+
+mpi_barrett_t mpi_barrett_init(MPI m, int copy);
+void mpi_barrett_free(mpi_barrett_t ctx);
+void mpi_mod_barrett(MPI r, MPI x, mpi_barrett_t ctx);
+void mpi_mul_barrett(MPI w, MPI u, MPI v, mpi_barrett_t ctx);
 
 /*-- mpi-pow.c --*/
 int mpi_powm(MPI res, MPI base, MPI exp, MPI mod);
@@ -62,6 +120,7 @@ int mpi_powm(MPI res, MPI base, MPI exp, MPI mod);
 /*-- mpi-cmp.c --*/
 int mpi_cmp_ui(MPI u, ulong v);
 int mpi_cmp(MPI u, MPI v);
+int mpi_cmpabs(MPI u, MPI v);
 
 /*-- mpi-sub-ui.c --*/
 int mpi_sub_ui(MPI w, MPI u, unsigned long vval);
@@ -69,6 +128,34 @@ int mpi_sub_ui(MPI w, MPI u, unsigned long vval);
 /*-- mpi-bit.c --*/
 void mpi_normalize(MPI a);
 unsigned mpi_get_nbits(MPI a);
+int mpi_test_bit(MPI a, unsigned int n);
+void mpi_set_bit(MPI a, unsigned int n);
+void mpi_set_highbit(MPI a, unsigned int n);
+void mpi_clear_highbit(MPI a, unsigned int n);
+void mpi_clear_bit(MPI a, unsigned int n);
+void mpi_rshift_limbs(MPI a, unsigned int count);
+void mpi_rshift(MPI x, MPI a, unsigned int n);
+void mpi_lshift_limbs(MPI a, unsigned int count);
+void mpi_lshift(MPI x, MPI a, unsigned int n);
+
+/*-- mpi-add.c --*/
+void mpi_add_ui(MPI w, MPI u, unsigned long v);
+void mpi_add(MPI w, MPI u, MPI v);
+void mpi_sub(MPI w, MPI u, MPI v);
+void mpi_addm(MPI w, MPI u, MPI v, MPI m);
+void mpi_subm(MPI w, MPI u, MPI v, MPI m);
+
+/*-- mpi-mul.c --*/
+void mpi_mul(MPI w, MPI u, MPI v);
+void mpi_mulm(MPI w, MPI u, MPI v, MPI m);
+
+/*-- mpi-div.c --*/
+void mpi_tdiv_r(MPI rem, MPI num, MPI den);
+void mpi_fdiv_r(MPI rem, MPI dividend, MPI divisor);
+void mpi_fdiv_q(MPI quot, MPI dividend, MPI divisor);
+
+/*-- mpi-inv.c --*/
+int mpi_invm(MPI x, MPI a, MPI n);
 
 /* inline functions */
 
diff --git a/lib/mpi/Makefile b/lib/mpi/Makefile
index 43b8fce14079..477debd7ed50 100644
--- a/lib/mpi/Makefile
+++ b/lib/mpi/Makefile
@@ -14,9 +14,14 @@ mpi-y = \
 	generic_mpih-sub1.o		\
 	generic_mpih-add1.o		\
 	mpicoder.o			\
+	mpi-add.o			\
 	mpi-bit.o			\
 	mpi-cmp.o			\
 	mpi-sub-ui.o			\
+	mpi-div.o			\
+	mpi-inv.o			\
+	mpi-mod.o			\
+	mpi-mul.o			\
 	mpih-cmp.o			\
 	mpih-div.o			\
 	mpih-mul.o			\
diff --git a/lib/mpi/mpi-add.c b/lib/mpi/mpi-add.c
new file mode 100644
index 000000000000..2cdae54c1bd0
--- /dev/null
+++ b/lib/mpi/mpi-add.c
@@ -0,0 +1,155 @@
+/* mpi-add.c  -  MPI functions
+ * Copyright (C) 1994, 1996, 1998, 2001, 2002,
+ *               2003 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ */
+
+#include "mpi-internal.h"
+
+/****************
+ * Add the unsigned integer V to the mpi-integer U and store the
+ * result in W. U and V may be the same.
+ */
+void mpi_add_ui(MPI w, MPI u, unsigned long v)
+{
+	mpi_ptr_t wp, up;
+	mpi_size_t usize, wsize;
+	int usign, wsign;
+
+	usize = u->nlimbs;
+	usign = u->sign;
+	wsign = 0;
+
+	/* If not space for W (and possible carry), increase space.  */
+	wsize = usize + 1;
+	if (w->alloced < wsize)
+		mpi_resize(w, wsize);
+
+	/* These must be after realloc (U may be the same as W).  */
+	up = u->d;
+	wp = w->d;
+
+	if (!usize) {  /* simple */
+		wp[0] = v;
+		wsize = v ? 1:0;
+	} else if (!usign) {  /* mpi is not negative */
+		mpi_limb_t cy;
+		cy = mpihelp_add_1(wp, up, usize, v);
+		wp[usize] = cy;
+		wsize = usize + cy;
+	} else {
+		/* The signs are different.  Need exact comparison to determine
+		 * which operand to subtract from which.
+		 */
+		if (usize == 1 && up[0] < v) {
+			wp[0] = v - up[0];
+			wsize = 1;
+		} else {
+			mpihelp_sub_1(wp, up, usize, v);
+			/* Size can decrease with at most one limb. */
+			wsize = usize - (wp[usize-1] == 0);
+			wsign = 1;
+		}
+	}
+
+	w->nlimbs = wsize;
+	w->sign   = wsign;
+}
+
+
+void mpi_add(MPI w, MPI u, MPI v)
+{
+	mpi_ptr_t wp, up, vp;
+	mpi_size_t usize, vsize, wsize;
+	int usign, vsign, wsign;
+
+	if (u->nlimbs < v->nlimbs) { /* Swap U and V. */
+		usize = v->nlimbs;
+		usign = v->sign;
+		vsize = u->nlimbs;
+		vsign = u->sign;
+		wsize = usize + 1;
+		RESIZE_IF_NEEDED(w, wsize);
+		/* These must be after realloc (u or v may be the same as w).  */
+		up = v->d;
+		vp = u->d;
+	} else {
+		usize = u->nlimbs;
+		usign = u->sign;
+		vsize = v->nlimbs;
+		vsign = v->sign;
+		wsize = usize + 1;
+		RESIZE_IF_NEEDED(w, wsize);
+		/* These must be after realloc (u or v may be the same as w).  */
+		up = u->d;
+		vp = v->d;
+	}
+	wp = w->d;
+	wsign = 0;
+
+	if (!vsize) {  /* simple */
+		MPN_COPY(wp, up, usize);
+		wsize = usize;
+		wsign = usign;
+	} else if (usign != vsign) { /* different sign */
+		/* This test is right since USIZE >= VSIZE */
+		if (usize != vsize) {
+			mpihelp_sub(wp, up, usize, vp, vsize);
+			wsize = usize;
+			MPN_NORMALIZE(wp, wsize);
+			wsign = usign;
+		} else if (mpihelp_cmp(up, vp, usize) < 0) {
+			mpihelp_sub_n(wp, vp, up, usize);
+			wsize = usize;
+			MPN_NORMALIZE(wp, wsize);
+			if (!usign)
+				wsign = 1;
+		} else {
+			mpihelp_sub_n(wp, up, vp, usize);
+			wsize = usize;
+			MPN_NORMALIZE(wp, wsize);
+			if (usign)
+				wsign = 1;
+		}
+	} else { /* U and V have same sign. Add them. */
+		mpi_limb_t cy = mpihelp_add(wp, up, usize, vp, vsize);
+		wp[usize] = cy;
+		wsize = usize + cy;
+		if (usign)
+			wsign = 1;
+	}
+
+	w->nlimbs = wsize;
+	w->sign = wsign;
+}
+EXPORT_SYMBOL_GPL(mpi_add);
+
+void mpi_sub(MPI w, MPI u, MPI v)
+{
+	MPI vv = mpi_copy(v);
+	vv->sign = !vv->sign;
+	mpi_add(w, u, vv);
+	mpi_free(vv);
+}
+
+
+void mpi_addm(MPI w, MPI u, MPI v, MPI m)
+{
+	mpi_add(w, u, v);
+	mpi_mod(w, w, m);
+}
+EXPORT_SYMBOL_GPL(mpi_addm);
+
+void mpi_subm(MPI w, MPI u, MPI v, MPI m)
+{
+	mpi_sub(w, u, v);
+	mpi_mod(w, w, m);
+}
+EXPORT_SYMBOL_GPL(mpi_subm);
diff --git a/lib/mpi/mpi-bit.c b/lib/mpi/mpi-bit.c
index 503537e08436..a5119a2bcdd4 100644
--- a/lib/mpi/mpi-bit.c
+++ b/lib/mpi/mpi-bit.c
@@ -32,6 +32,7 @@ void mpi_normalize(MPI a)
 	for (; a->nlimbs && !a->d[a->nlimbs - 1]; a->nlimbs--)
 		;
 }
+EXPORT_SYMBOL_GPL(mpi_normalize);
 
 /****************
  * Return the number of bits in A.
@@ -54,3 +55,253 @@ unsigned mpi_get_nbits(MPI a)
 	return n;
 }
 EXPORT_SYMBOL_GPL(mpi_get_nbits);
+
+/****************
+ * Test whether bit N is set.
+ */
+int mpi_test_bit(MPI a, unsigned int n)
+{
+	unsigned int limbno, bitno;
+	mpi_limb_t limb;
+
+	limbno = n / BITS_PER_MPI_LIMB;
+	bitno  = n % BITS_PER_MPI_LIMB;
+
+	if (limbno >= a->nlimbs)
+		return 0; /* too far left: this is a 0 */
+	limb = a->d[limbno];
+	return (limb & (A_LIMB_1 << bitno)) ? 1 : 0;
+}
+EXPORT_SYMBOL_GPL(mpi_test_bit);
+
+/****************
+ * Set bit N of A.
+ */
+void mpi_set_bit(MPI a, unsigned int n)
+{
+	unsigned int i, limbno, bitno;
+
+	limbno = n / BITS_PER_MPI_LIMB;
+	bitno  = n % BITS_PER_MPI_LIMB;
+
+	if (limbno >= a->nlimbs) {
+		for (i = a->nlimbs; i < a->alloced; i++)
+			a->d[i] = 0;
+		mpi_resize(a, limbno+1);
+		a->nlimbs = limbno+1;
+	}
+	a->d[limbno] |= (A_LIMB_1<<bitno);
+}
+
+/****************
+ * Set bit N of A. and clear all bits above
+ */
+void mpi_set_highbit(MPI a, unsigned int n)
+{
+	unsigned int i, limbno, bitno;
+
+	limbno = n / BITS_PER_MPI_LIMB;
+	bitno  = n % BITS_PER_MPI_LIMB;
+
+	if (limbno >= a->nlimbs) {
+		for (i = a->nlimbs; i < a->alloced; i++)
+			a->d[i] = 0;
+		mpi_resize(a, limbno+1);
+		a->nlimbs = limbno+1;
+	}
+	a->d[limbno] |= (A_LIMB_1<<bitno);
+	for (bitno++; bitno < BITS_PER_MPI_LIMB; bitno++)
+		a->d[limbno] &= ~(A_LIMB_1 << bitno);
+	a->nlimbs = limbno+1;
+}
+EXPORT_SYMBOL_GPL(mpi_set_highbit);
+
+/****************
+ * clear bit N of A and all bits above
+ */
+void mpi_clear_highbit(MPI a, unsigned int n)
+{
+	unsigned int limbno, bitno;
+
+	limbno = n / BITS_PER_MPI_LIMB;
+	bitno  = n % BITS_PER_MPI_LIMB;
+
+	if (limbno >= a->nlimbs)
+		return; /* not allocated, therefore no need to clear bits :-) */
+
+	for ( ; bitno < BITS_PER_MPI_LIMB; bitno++)
+		a->d[limbno] &= ~(A_LIMB_1 << bitno);
+	a->nlimbs = limbno+1;
+}
+
+/****************
+ * Clear bit N of A.
+ */
+void mpi_clear_bit(MPI a, unsigned int n)
+{
+	unsigned int limbno, bitno;
+
+	limbno = n / BITS_PER_MPI_LIMB;
+	bitno  = n % BITS_PER_MPI_LIMB;
+
+	if (limbno >= a->nlimbs)
+		return; /* Don't need to clear this bit, it's far too left.  */
+	a->d[limbno] &= ~(A_LIMB_1 << bitno);
+}
+EXPORT_SYMBOL_GPL(mpi_clear_bit);
+
+
+/****************
+ * Shift A by COUNT limbs to the right
+ * This is used only within the MPI library
+ */
+void mpi_rshift_limbs(MPI a, unsigned int count)
+{
+	mpi_ptr_t ap = a->d;
+	mpi_size_t n = a->nlimbs;
+	unsigned int i;
+
+	if (count >= n) {
+		a->nlimbs = 0;
+		return;
+	}
+
+	for (i = 0; i < n - count; i++)
+		ap[i] = ap[i+count];
+	ap[i] = 0;
+	a->nlimbs -= count;
+}
+
+/*
+ * Shift A by N bits to the right.
+ */
+void mpi_rshift(MPI x, MPI a, unsigned int n)
+{
+	mpi_size_t xsize;
+	unsigned int i;
+	unsigned int nlimbs = (n/BITS_PER_MPI_LIMB);
+	unsigned int nbits = (n%BITS_PER_MPI_LIMB);
+
+	if (x == a) {
+		/* In-place operation.  */
+		if (nlimbs >= x->nlimbs) {
+			x->nlimbs = 0;
+			return;
+		}
+
+		if (nlimbs) {
+			for (i = 0; i < x->nlimbs - nlimbs; i++)
+				x->d[i] = x->d[i+nlimbs];
+			x->d[i] = 0;
+			x->nlimbs -= nlimbs;
+		}
+		if (x->nlimbs && nbits)
+			mpihelp_rshift(x->d, x->d, x->nlimbs, nbits);
+	} else if (nlimbs) {
+		/* Copy and shift by more or equal bits than in a limb. */
+		xsize = a->nlimbs;
+		x->sign = a->sign;
+		RESIZE_IF_NEEDED(x, xsize);
+		x->nlimbs = xsize;
+		for (i = 0; i < a->nlimbs; i++)
+			x->d[i] = a->d[i];
+		x->nlimbs = i;
+
+		if (nlimbs >= x->nlimbs) {
+			x->nlimbs = 0;
+			return;
+		}
+
+		if (nlimbs) {
+			for (i = 0; i < x->nlimbs - nlimbs; i++)
+				x->d[i] = x->d[i+nlimbs];
+			x->d[i] = 0;
+			x->nlimbs -= nlimbs;
+		}
+
+		if (x->nlimbs && nbits)
+			mpihelp_rshift(x->d, x->d, x->nlimbs, nbits);
+	} else {
+		/* Copy and shift by less than bits in a limb.  */
+		xsize = a->nlimbs;
+		x->sign = a->sign;
+		RESIZE_IF_NEEDED(x, xsize);
+		x->nlimbs = xsize;
+
+		if (xsize) {
+			if (nbits)
+				mpihelp_rshift(x->d, a->d, x->nlimbs, nbits);
+			else {
+				/* The rshift helper function is not specified for
+				 * NBITS==0, thus we do a plain copy here.
+				 */
+				for (i = 0; i < x->nlimbs; i++)
+					x->d[i] = a->d[i];
+			}
+		}
+	}
+	MPN_NORMALIZE(x->d, x->nlimbs);
+}
+
+/****************
+ * Shift A by COUNT limbs to the left
+ * This is used only within the MPI library
+ */
+void mpi_lshift_limbs(MPI a, unsigned int count)
+{
+	mpi_ptr_t ap;
+	int n = a->nlimbs;
+	int i;
+
+	if (!count || !n)
+		return;
+
+	RESIZE_IF_NEEDED(a, n+count);
+
+	ap = a->d;
+	for (i = n-1; i >= 0; i--)
+		ap[i+count] = ap[i];
+	for (i = 0; i < count; i++)
+		ap[i] = 0;
+	a->nlimbs += count;
+}
+
+/*
+ * Shift A by N bits to the left.
+ */
+void mpi_lshift(MPI x, MPI a, unsigned int n)
+{
+	unsigned int nlimbs = (n/BITS_PER_MPI_LIMB);
+	unsigned int nbits = (n%BITS_PER_MPI_LIMB);
+
+	if (x == a && !n)
+		return;  /* In-place shift with an amount of zero.  */
+
+	if (x != a) {
+		/* Copy A to X.  */
+		unsigned int alimbs = a->nlimbs;
+		int asign = a->sign;
+		mpi_ptr_t xp, ap;
+
+		RESIZE_IF_NEEDED(x, alimbs+nlimbs+1);
+		xp = x->d;
+		ap = a->d;
+		MPN_COPY(xp, ap, alimbs);
+		x->nlimbs = alimbs;
+		x->flags = a->flags;
+		x->sign = asign;
+	}
+
+	if (nlimbs && !nbits) {
+		/* Shift a full number of limbs.  */
+		mpi_lshift_limbs(x, nlimbs);
+	} else if (n) {
+		/* We use a very dump approach: Shift left by the number of
+		 * limbs plus one and than fix it up by an rshift.
+		 */
+		mpi_lshift_limbs(x, nlimbs+1);
+		mpi_rshift(x, x, BITS_PER_MPI_LIMB - nbits);
+	}
+
+	MPN_NORMALIZE(x->d, x->nlimbs);
+}
diff --git a/lib/mpi/mpi-cmp.c b/lib/mpi/mpi-cmp.c
index d25e9e96c310..c4cfa3ff0581 100644
--- a/lib/mpi/mpi-cmp.c
+++ b/lib/mpi/mpi-cmp.c
@@ -41,28 +41,54 @@ int mpi_cmp_ui(MPI u, unsigned long v)
 }
 EXPORT_SYMBOL_GPL(mpi_cmp_ui);
 
-int mpi_cmp(MPI u, MPI v)
+static int do_mpi_cmp(MPI u, MPI v, int absmode)
 {
-	mpi_size_t usize, vsize;
+	mpi_size_t usize;
+	mpi_size_t vsize;
+	int usign;
+	int vsign;
 	int cmp;
 
 	mpi_normalize(u);
 	mpi_normalize(v);
+
 	usize = u->nlimbs;
 	vsize = v->nlimbs;
-	if (!u->sign && v->sign)
+	usign = absmode ? 0 : u->sign;
+	vsign = absmode ? 0 : v->sign;
+
+	/* Compare sign bits.  */
+
+	if (!usign && vsign)
 		return 1;
-	if (u->sign && !v->sign)
+	if (usign && !vsign)
 		return -1;
-	if (usize != vsize && !u->sign && !v->sign)
+
+	/* U and V are either both positive or both negative.  */
+
+	if (usize != vsize && !usign && !vsign)
 		return usize - vsize;
-	if (usize != vsize && u->sign && v->sign)
-		return vsize - usize;
+	if (usize != vsize && usign && vsign)
+		return vsize + usize;
 	if (!usize)
 		return 0;
 	cmp = mpihelp_cmp(u->d, v->d, usize);
-	if (u->sign)
-		return -cmp;
-	return cmp;
+	if (!cmp)
+		return 0;
+	if ((cmp < 0?1:0) == (usign?1:0))
+		return 1;
+
+	return -1;
+}
+
+int mpi_cmp(MPI u, MPI v)
+{
+	return do_mpi_cmp(u, v, 0);
 }
 EXPORT_SYMBOL_GPL(mpi_cmp);
+
+int mpi_cmpabs(MPI u, MPI v)
+{
+	return do_mpi_cmp(u, v, 1);
+}
+EXPORT_SYMBOL_GPL(mpi_cmpabs);
diff --git a/lib/mpi/mpi-div.c b/lib/mpi/mpi-div.c
new file mode 100644
index 000000000000..21332dab97d4
--- /dev/null
+++ b/lib/mpi/mpi-div.c
@@ -0,0 +1,238 @@
+/* mpi-div.c  -  MPI functions
+ * Copyright (C) 1994, 1996, 1998, 2001, 2002,
+ *               2003 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ */
+
+#include "mpi-internal.h"
+#include "longlong.h"
+
+void mpi_tdiv_qr(MPI quot, MPI rem, MPI num, MPI den);
+void mpi_fdiv_qr(MPI quot, MPI rem, MPI dividend, MPI divisor);
+
+void mpi_fdiv_r(MPI rem, MPI dividend, MPI divisor)
+{
+	int divisor_sign = divisor->sign;
+	MPI temp_divisor = NULL;
+
+	/* We need the original value of the divisor after the remainder has been
+	 * preliminary calculated.	We have to copy it to temporary space if it's
+	 * the same variable as REM.
+	 */
+	if (rem == divisor) {
+		temp_divisor = mpi_copy(divisor);
+		divisor = temp_divisor;
+	}
+
+	mpi_tdiv_r(rem, dividend, divisor);
+
+	if (((divisor_sign?1:0) ^ (dividend->sign?1:0)) && rem->nlimbs)
+		mpi_add(rem, rem, divisor);
+
+	if (temp_divisor)
+		mpi_free(temp_divisor);
+}
+
+void mpi_fdiv_q(MPI quot, MPI dividend, MPI divisor)
+{
+	MPI tmp = mpi_alloc(mpi_get_nlimbs(quot));
+	mpi_fdiv_qr(quot, tmp, dividend, divisor);
+	mpi_free(tmp);
+}
+
+void mpi_fdiv_qr(MPI quot, MPI rem, MPI dividend, MPI divisor)
+{
+	int divisor_sign = divisor->sign;
+	MPI temp_divisor = NULL;
+
+	if (quot == divisor || rem == divisor) {
+		temp_divisor = mpi_copy(divisor);
+		divisor = temp_divisor;
+	}
+
+	mpi_tdiv_qr(quot, rem, dividend, divisor);
+
+	if ((divisor_sign ^ dividend->sign) && rem->nlimbs) {
+		mpi_sub_ui(quot, quot, 1);
+		mpi_add(rem, rem, divisor);
+	}
+
+	if (temp_divisor)
+		mpi_free(temp_divisor);
+}
+
+/* If den == quot, den needs temporary storage.
+ * If den == rem, den needs temporary storage.
+ * If num == quot, num needs temporary storage.
+ * If den has temporary storage, it can be normalized while being copied,
+ *   i.e no extra storage should be allocated.
+ */
+
+void mpi_tdiv_r(MPI rem, MPI num, MPI den)
+{
+	mpi_tdiv_qr(NULL, rem, num, den);
+}
+
+void mpi_tdiv_qr(MPI quot, MPI rem, MPI num, MPI den)
+{
+	mpi_ptr_t np, dp;
+	mpi_ptr_t qp, rp;
+	mpi_size_t nsize = num->nlimbs;
+	mpi_size_t dsize = den->nlimbs;
+	mpi_size_t qsize, rsize;
+	mpi_size_t sign_remainder = num->sign;
+	mpi_size_t sign_quotient = num->sign ^ den->sign;
+	unsigned int normalization_steps;
+	mpi_limb_t q_limb;
+	mpi_ptr_t marker[5];
+	unsigned int marker_nlimbs[5];
+	int markidx = 0;
+
+	/* Ensure space is enough for quotient and remainder.
+	 * We need space for an extra limb in the remainder, because it's
+	 * up-shifted (normalized) below.
+	 */
+	rsize = nsize + 1;
+	mpi_resize(rem, rsize);
+
+	qsize = rsize - dsize;	  /* qsize cannot be bigger than this.	*/
+	if (qsize <= 0) {
+		if (num != rem) {
+			rem->nlimbs = num->nlimbs;
+			rem->sign = num->sign;
+			MPN_COPY(rem->d, num->d, nsize);
+		}
+		if (quot) {
+			/* This needs to follow the assignment to rem, in case the
+			 * numerator and quotient are the same.
+			 */
+			quot->nlimbs = 0;
+			quot->sign = 0;
+		}
+		return;
+	}
+
+	if (quot)
+		mpi_resize(quot, qsize);
+
+	/* Read pointers here, when reallocation is finished.  */
+	np = num->d;
+	dp = den->d;
+	rp = rem->d;
+
+	/* Optimize division by a single-limb divisor.  */
+	if (dsize == 1) {
+		mpi_limb_t rlimb;
+		if (quot) {
+			qp = quot->d;
+			rlimb = mpihelp_divmod_1(qp, np, nsize, dp[0]);
+			qsize -= qp[qsize - 1] == 0;
+			quot->nlimbs = qsize;
+			quot->sign = sign_quotient;
+		} else
+			rlimb = mpihelp_mod_1(np, nsize, dp[0]);
+		rp[0] = rlimb;
+		rsize = rlimb != 0?1:0;
+		rem->nlimbs = rsize;
+		rem->sign = sign_remainder;
+		return;
+	}
+
+
+	if (quot) {
+		qp = quot->d;
+		/* Make sure QP and NP point to different objects.  Otherwise the
+		 * numerator would be gradually overwritten by the quotient limbs.
+		 */
+		if (qp == np) { /* Copy NP object to temporary space.  */
+			marker_nlimbs[markidx] = nsize;
+			np = marker[markidx++] = mpi_alloc_limb_space(nsize);
+			MPN_COPY(np, qp, nsize);
+		}
+	} else /* Put quotient at top of remainder. */
+		qp = rp + dsize;
+
+	normalization_steps = count_leading_zeros(dp[dsize - 1]);
+
+	/* Normalize the denominator, i.e. make its most significant bit set by
+	 * shifting it NORMALIZATION_STEPS bits to the left.  Also shift the
+	 * numerator the same number of steps (to keep the quotient the same!).
+	 */
+	if (normalization_steps) {
+		mpi_ptr_t tp;
+		mpi_limb_t nlimb;
+
+		/* Shift up the denominator setting the most significant bit of
+		 * the most significant word.  Use temporary storage not to clobber
+		 * the original contents of the denominator.
+		 */
+		marker_nlimbs[markidx] = dsize;
+		tp = marker[markidx++] = mpi_alloc_limb_space(dsize);
+		mpihelp_lshift(tp, dp, dsize, normalization_steps);
+		dp = tp;
+
+		/* Shift up the numerator, possibly introducing a new most
+		 * significant word.  Move the shifted numerator in the remainder
+		 * meanwhile.
+		 */
+		nlimb = mpihelp_lshift(rp, np, nsize, normalization_steps);
+		if (nlimb) {
+			rp[nsize] = nlimb;
+			rsize = nsize + 1;
+		} else
+			rsize = nsize;
+	} else {
+		/* The denominator is already normalized, as required.	Copy it to
+		 * temporary space if it overlaps with the quotient or remainder.
+		 */
+		if (dp == rp || (quot && (dp == qp))) {
+			mpi_ptr_t tp;
+
+			marker_nlimbs[markidx] = dsize;
+			tp = marker[markidx++] = mpi_alloc_limb_space(dsize);
+			MPN_COPY(tp, dp, dsize);
+			dp = tp;
+		}
+
+		/* Move the numerator to the remainder.  */
+		if (rp != np)
+			MPN_COPY(rp, np, nsize);
+
+		rsize = nsize;
+	}
+
+	q_limb = mpihelp_divrem(qp, 0, rp, rsize, dp, dsize);
+
+	if (quot) {
+		qsize = rsize - dsize;
+		if (q_limb) {
+			qp[qsize] = q_limb;
+			qsize += 1;
+		}
+
+		quot->nlimbs = qsize;
+		quot->sign = sign_quotient;
+	}
+
+	rsize = dsize;
+	MPN_NORMALIZE(rp, rsize);
+
+	if (normalization_steps && rsize) {
+		mpihelp_rshift(rp, rp, rsize, normalization_steps);
+		rsize -= rp[rsize - 1] == 0?1:0;
+	}
+
+	rem->nlimbs = rsize;
+	rem->sign	= sign_remainder;
+	while (markidx) {
+		markidx--;
+		mpi_free_limb_space(marker[markidx]);
+	}
+}
diff --git a/lib/mpi/mpi-internal.h b/lib/mpi/mpi-internal.h
index 91df5f0b70f2..d29c4537c3a3 100644
--- a/lib/mpi/mpi-internal.h
+++ b/lib/mpi/mpi-internal.h
@@ -52,6 +52,12 @@
 typedef mpi_limb_t *mpi_ptr_t;	/* pointer to a limb */
 typedef int mpi_size_t;		/* (must be a signed type) */
 
+#define RESIZE_IF_NEEDED(a, b)			\
+	do {					\
+		if ((a)->alloced < (b))		\
+			mpi_resize((a), (b));	\
+	} while (0)
+
 /* Copy N limbs from S to D.  */
 #define MPN_COPY(d, s, n) \
 	do {					\
@@ -60,6 +66,14 @@ typedef int mpi_size_t;		/* (must be a signed type) */
 			(d)[_i] = (s)[_i];	\
 	} while (0)
 
+#define MPN_COPY_INCR(d, s, n)		\
+	do {					\
+		mpi_size_t _i;			\
+		for (_i = 0; _i < (n); _i++)	\
+			(d)[_i] = (s)[_i];	\
+	} while (0)
+
+
 #define MPN_COPY_DECR(d, s, n) \
 	do {					\
 		mpi_size_t _i;			\
@@ -92,6 +106,38 @@ typedef int mpi_size_t;		/* (must be a signed type) */
 			mul_n(prodp, up, vp, size, tspace);	\
 	} while (0);
 
+/* Divide the two-limb number in (NH,,NL) by D, with DI being the largest
+ * limb not larger than (2**(2*BITS_PER_MP_LIMB))/D - (2**BITS_PER_MP_LIMB).
+ * If this would yield overflow, DI should be the largest possible number
+ * (i.e., only ones).  For correct operation, the most significant bit of D
+ * has to be set.  Put the quotient in Q and the remainder in R.
+ */
+#define UDIV_QRNND_PREINV(q, r, nh, nl, d, di)				\
+	do {								\
+		mpi_limb_t _ql;						\
+		mpi_limb_t _q, _r;					\
+		mpi_limb_t _xh, _xl;					\
+		umul_ppmm(_q, _ql, (nh), (di));				\
+		_q += (nh);	/* DI is 2**BITS_PER_MPI_LIMB too small */ \
+		umul_ppmm(_xh, _xl, _q, (d));				\
+		sub_ddmmss(_xh, _r, (nh), (nl), _xh, _xl);		\
+		if (_xh) {						\
+			sub_ddmmss(_xh, _r, _xh, _r, 0, (d));		\
+			_q++;						\
+			if (_xh) {					\
+				sub_ddmmss(_xh, _r, _xh, _r, 0, (d));	\
+				_q++;					\
+			}						\
+		}							\
+		if (_r >= (d)) {					\
+			_r -= (d);					\
+			_q++;						\
+		}							\
+		(r) = _r;						\
+		(q) = _q;						\
+	} while (0)
+
+
 /*-- mpiutil.c --*/
 mpi_ptr_t mpi_alloc_limb_space(unsigned nlimbs);
 void mpi_free_limb_space(mpi_ptr_t a);
@@ -135,6 +181,8 @@ int mpihelp_mul(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t usize,
 void mpih_sqr_n_basecase(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size);
 void mpih_sqr_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size,
 		mpi_ptr_t tspace);
+void mpihelp_mul_n(mpi_ptr_t prodp,
+		mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size);
 
 int mpihelp_mul_karatsuba_case(mpi_ptr_t prodp,
 			       mpi_ptr_t up, mpi_size_t usize,
@@ -146,9 +194,14 @@ mpi_limb_t mpihelp_mul_1(mpi_ptr_t res_ptr, mpi_ptr_t s1_ptr,
 			 mpi_size_t s1_size, mpi_limb_t s2_limb);
 
 /*-- mpih-div.c --*/
+mpi_limb_t mpihelp_mod_1(mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
+			 mpi_limb_t divisor_limb);
 mpi_limb_t mpihelp_divrem(mpi_ptr_t qp, mpi_size_t qextra_limbs,
 			  mpi_ptr_t np, mpi_size_t nsize,
 			  mpi_ptr_t dp, mpi_size_t dsize);
+mpi_limb_t mpihelp_divmod_1(mpi_ptr_t quot_ptr,
+			    mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
+			    mpi_limb_t divisor_limb);
 
 /*-- generic_mpih-[lr]shift.c --*/
 mpi_limb_t mpihelp_lshift(mpi_ptr_t wp, mpi_ptr_t up, mpi_size_t usize,
diff --git a/lib/mpi/mpi-inv.c b/lib/mpi/mpi-inv.c
new file mode 100644
index 000000000000..61e37d18f793
--- /dev/null
+++ b/lib/mpi/mpi-inv.c
@@ -0,0 +1,143 @@
+/* mpi-inv.c  -  MPI functions
+ *	Copyright (C) 1998, 2001, 2002, 2003 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "mpi-internal.h"
+
+/****************
+ * Calculate the multiplicative inverse X of A mod N
+ * That is: Find the solution x for
+ *		1 = (a*x) mod n
+ */
+int mpi_invm(MPI x, MPI a, MPI n)
+{
+	/* Extended Euclid's algorithm (See TAOCP Vol II, 4.5.2, Alg X)
+	 * modified according to Michael Penk's solution for Exercise 35
+	 * with further enhancement
+	 */
+	MPI u, v, u1, u2 = NULL, u3, v1, v2 = NULL, v3, t1, t2 = NULL, t3;
+	unsigned int k;
+	int sign;
+	int odd;
+
+	if (!mpi_cmp_ui(a, 0))
+		return 0; /* Inverse does not exists.  */
+	if (!mpi_cmp_ui(n, 1))
+		return 0; /* Inverse does not exists.  */
+
+	u = mpi_copy(a);
+	v = mpi_copy(n);
+
+	for (k = 0; !mpi_test_bit(u, 0) && !mpi_test_bit(v, 0); k++) {
+		mpi_rshift(u, u, 1);
+		mpi_rshift(v, v, 1);
+	}
+	odd = mpi_test_bit(v, 0);
+
+	u1 = mpi_alloc_set_ui(1);
+	if (!odd)
+		u2 = mpi_alloc_set_ui(0);
+	u3 = mpi_copy(u);
+	v1 = mpi_copy(v);
+	if (!odd) {
+		v2 = mpi_alloc(mpi_get_nlimbs(u));
+		mpi_sub(v2, u1, u); /* U is used as const 1 */
+	}
+	v3 = mpi_copy(v);
+	if (mpi_test_bit(u, 0)) { /* u is odd */
+		t1 = mpi_alloc_set_ui(0);
+		if (!odd) {
+			t2 = mpi_alloc_set_ui(1);
+			t2->sign = 1;
+		}
+		t3 = mpi_copy(v);
+		t3->sign = !t3->sign;
+		goto Y4;
+	} else {
+		t1 = mpi_alloc_set_ui(1);
+		if (!odd)
+			t2 = mpi_alloc_set_ui(0);
+		t3 = mpi_copy(u);
+	}
+
+	do {
+		do {
+			if (!odd) {
+				if (mpi_test_bit(t1, 0) || mpi_test_bit(t2, 0)) {
+					/* one is odd */
+					mpi_add(t1, t1, v);
+					mpi_sub(t2, t2, u);
+				}
+				mpi_rshift(t1, t1, 1);
+				mpi_rshift(t2, t2, 1);
+				mpi_rshift(t3, t3, 1);
+			} else {
+				if (mpi_test_bit(t1, 0))
+					mpi_add(t1, t1, v);
+				mpi_rshift(t1, t1, 1);
+				mpi_rshift(t3, t3, 1);
+			}
+Y4:
+			;
+		} while (!mpi_test_bit(t3, 0)); /* while t3 is even */
+
+		if (!t3->sign) {
+			mpi_set(u1, t1);
+			if (!odd)
+				mpi_set(u2, t2);
+			mpi_set(u3, t3);
+		} else {
+			mpi_sub(v1, v, t1);
+			sign = u->sign; u->sign = !u->sign;
+			if (!odd)
+				mpi_sub(v2, u, t2);
+			u->sign = sign;
+			sign = t3->sign; t3->sign = !t3->sign;
+			mpi_set(v3, t3);
+			t3->sign = sign;
+		}
+		mpi_sub(t1, u1, v1);
+		if (!odd)
+			mpi_sub(t2, u2, v2);
+		mpi_sub(t3, u3, v3);
+		if (t1->sign) {
+			mpi_add(t1, t1, v);
+			if (!odd)
+				mpi_sub(t2, t2, u);
+		}
+	} while (mpi_cmp_ui(t3, 0)); /* while t3 != 0 */
+	/* mpi_lshift( u3, k ); */
+	mpi_set(x, u1);
+
+	mpi_free(u1);
+	mpi_free(v1);
+	mpi_free(t1);
+	if (!odd) {
+		mpi_free(u2);
+		mpi_free(v2);
+		mpi_free(t2);
+	}
+	mpi_free(u3);
+	mpi_free(v3);
+	mpi_free(t3);
+
+	mpi_free(u);
+	mpi_free(v);
+	return 1;
+}
+EXPORT_SYMBOL_GPL(mpi_invm);
diff --git a/lib/mpi/mpi-mod.c b/lib/mpi/mpi-mod.c
new file mode 100644
index 000000000000..47bc59edd4ff
--- /dev/null
+++ b/lib/mpi/mpi-mod.c
@@ -0,0 +1,155 @@
+/* mpi-mod.c -  Modular reduction
+ * Copyright (C) 1998, 1999, 2001, 2002, 2003,
+ *               2007  Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ */
+
+
+#include "mpi-internal.h"
+#include "longlong.h"
+
+/* Context used with Barrett reduction.  */
+struct barrett_ctx_s {
+	MPI m;   /* The modulus - may not be modified. */
+	int m_copied;   /* If true, M needs to be released.  */
+	int k;
+	MPI y;
+	MPI r1;  /* Helper MPI. */
+	MPI r2;  /* Helper MPI. */
+	MPI r3;  /* Helper MPI allocated on demand. */
+};
+
+
+
+void mpi_mod(MPI rem, MPI dividend, MPI divisor)
+{
+	mpi_fdiv_r(rem, dividend, divisor);
+}
+
+/* This function returns a new context for Barrett based operations on
+ * the modulus M.  This context needs to be released using
+ * _gcry_mpi_barrett_free.  If COPY is true M will be transferred to
+ * the context and the user may change M.  If COPY is false, M may not
+ * be changed until gcry_mpi_barrett_free has been called.
+ */
+mpi_barrett_t mpi_barrett_init(MPI m, int copy)
+{
+	mpi_barrett_t ctx;
+	MPI tmp;
+
+	mpi_normalize(m);
+	ctx = kcalloc(1, sizeof(*ctx), GFP_KERNEL);
+
+	if (copy) {
+		ctx->m = mpi_copy(m);
+		ctx->m_copied = 1;
+	} else
+		ctx->m = m;
+
+	ctx->k = mpi_get_nlimbs(m);
+	tmp = mpi_alloc(ctx->k + 1);
+
+	/* Barrett precalculation: y = floor(b^(2k) / m). */
+	mpi_set_ui(tmp, 1);
+	mpi_lshift_limbs(tmp, 2 * ctx->k);
+	mpi_fdiv_q(tmp, tmp, m);
+
+	ctx->y  = tmp;
+	ctx->r1 = mpi_alloc(2 * ctx->k + 1);
+	ctx->r2 = mpi_alloc(2 * ctx->k + 1);
+
+	return ctx;
+}
+
+void mpi_barrett_free(mpi_barrett_t ctx)
+{
+	if (ctx) {
+		mpi_free(ctx->y);
+		mpi_free(ctx->r1);
+		mpi_free(ctx->r2);
+		if (ctx->r3)
+			mpi_free(ctx->r3);
+		if (ctx->m_copied)
+			mpi_free(ctx->m);
+		kfree(ctx);
+	}
+}
+
+
+/* R = X mod M
+ *
+ * Using Barrett reduction.  Before using this function
+ * _gcry_mpi_barrett_init must have been called to do the
+ * precalculations.  CTX is the context created by this precalculation
+ * and also conveys M.  If the Barret reduction could no be done a
+ * straightforward reduction method is used.
+ *
+ * We assume that these conditions are met:
+ * Input:  x =(x_2k-1 ...x_0)_b
+ *     m =(m_k-1 ....m_0)_b	  with m_k-1 != 0
+ * Output: r = x mod m
+ */
+void mpi_mod_barrett(MPI r, MPI x, mpi_barrett_t ctx)
+{
+	MPI m = ctx->m;
+	int k = ctx->k;
+	MPI y = ctx->y;
+	MPI r1 = ctx->r1;
+	MPI r2 = ctx->r2;
+	int sign;
+
+	mpi_normalize(x);
+	if (mpi_get_nlimbs(x) > 2*k) {
+		mpi_mod(r, x, m);
+		return;
+	}
+
+	sign = x->sign;
+	x->sign = 0;
+
+	/* 1. q1 = floor( x / b^k-1)
+	 *    q2 = q1 * y
+	 *    q3 = floor( q2 / b^k+1 )
+	 * Actually, we don't need qx, we can work direct on r2
+	 */
+	mpi_set(r2, x);
+	mpi_rshift_limbs(r2, k-1);
+	mpi_mul(r2, r2, y);
+	mpi_rshift_limbs(r2, k+1);
+
+	/* 2. r1 = x mod b^k+1
+	 *	r2 = q3 * m mod b^k+1
+	 *	r  = r1 - r2
+	 * 3. if r < 0 then  r = r + b^k+1
+	 */
+	mpi_set(r1, x);
+	if (r1->nlimbs > k+1) /* Quick modulo operation.  */
+		r1->nlimbs = k+1;
+	mpi_mul(r2, r2, m);
+	if (r2->nlimbs > k+1) /* Quick modulo operation. */
+		r2->nlimbs = k+1;
+	mpi_sub(r, r1, r2);
+
+	if (mpi_has_sign(r)) {
+		if (!ctx->r3) {
+			ctx->r3 = mpi_alloc(k + 2);
+			mpi_set_ui(ctx->r3, 1);
+			mpi_lshift_limbs(ctx->r3, k + 1);
+		}
+		mpi_add(r, r, ctx->r3);
+	}
+
+	/* 4. while r >= m do r = r - m */
+	while (mpi_cmp(r, m) >= 0)
+		mpi_sub(r, r, m);
+
+	x->sign = sign;
+}
+
+
+void mpi_mul_barrett(MPI w, MPI u, MPI v, mpi_barrett_t ctx)
+{
+	mpi_mul(w, u, v);
+	mpi_mod_barrett(w, w, ctx);
+}
diff --git a/lib/mpi/mpi-mul.c b/lib/mpi/mpi-mul.c
new file mode 100644
index 000000000000..587e6335cc12
--- /dev/null
+++ b/lib/mpi/mpi-mul.c
@@ -0,0 +1,94 @@
+/* mpi-mul.c  -  MPI functions
+ * Copyright (C) 1994, 1996, 1998, 2001, 2002,
+ *               2003 Free Software Foundation, Inc.
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Note: This code is heavily based on the GNU MP Library.
+ *	 Actually it's the same code with only minor changes in the
+ *	 way the data is stored; this is to support the abstraction
+ *	 of an optional secure memory allocation which may be used
+ *	 to avoid revealing of sensitive data due to paging etc.
+ */
+
+#include "mpi-internal.h"
+
+void mpi_mul(MPI w, MPI u, MPI v)
+{
+	mpi_size_t usize, vsize, wsize;
+	mpi_ptr_t up, vp, wp;
+	mpi_limb_t cy;
+	int usign, vsign, sign_product;
+	int assign_wp = 0;
+	mpi_ptr_t tmp_limb = NULL;
+	unsigned int tmp_limb_nlimbs = 0;
+
+	if (u->nlimbs < v->nlimbs) {
+		/* Swap U and V. */
+		usize = v->nlimbs;
+		usign = v->sign;
+		up    = v->d;
+		vsize = u->nlimbs;
+		vsign = u->sign;
+		vp    = u->d;
+	} else {
+		usize = u->nlimbs;
+		usign = u->sign;
+		up    = u->d;
+		vsize = v->nlimbs;
+		vsign = v->sign;
+		vp    = v->d;
+	}
+	sign_product = usign ^ vsign;
+	wp = w->d;
+
+	/* Ensure W has space enough to store the result.  */
+	wsize = usize + vsize;
+	if (w->alloced < wsize) {
+		if (wp == up || wp == vp) {
+			wp = mpi_alloc_limb_space(wsize);
+			assign_wp = 1;
+		} else {
+			mpi_resize(w, wsize);
+			wp = w->d;
+		}
+	} else { /* Make U and V not overlap with W.	*/
+		if (wp == up) {
+			/* W and U are identical.  Allocate temporary space for U. */
+			tmp_limb_nlimbs = usize;
+			up = tmp_limb = mpi_alloc_limb_space(usize);
+			/* Is V identical too?  Keep it identical with U.  */
+			if (wp == vp)
+				vp = up;
+			/* Copy to the temporary space.  */
+			MPN_COPY(up, wp, usize);
+		} else if (wp == vp) {
+			/* W and V are identical.  Allocate temporary space for V. */
+			tmp_limb_nlimbs = vsize;
+			vp = tmp_limb = mpi_alloc_limb_space(vsize);
+			/* Copy to the temporary space.  */
+			MPN_COPY(vp, wp, vsize);
+		}
+	}
+
+	if (!vsize)
+		wsize = 0;
+	else {
+		mpihelp_mul(wp, up, usize, vp, vsize, &cy);
+		wsize -= cy ? 0:1;
+	}
+
+	if (assign_wp)
+		mpi_assign_limb_space(w, wp, wsize);
+	w->nlimbs = wsize;
+	w->sign = sign_product;
+	if (tmp_limb)
+		mpi_free_limb_space(tmp_limb);
+}
+
+void mpi_mulm(MPI w, MPI u, MPI v, MPI m)
+{
+	mpi_mul(w, u, v);
+	mpi_tdiv_r(w, w, m);
+}
+EXPORT_SYMBOL_GPL(mpi_mulm);
diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c
index eead4b339466..7ea225b2204f 100644
--- a/lib/mpi/mpicoder.c
+++ b/lib/mpi/mpicoder.c
@@ -25,6 +25,7 @@
 #include <linux/string.h>
 #include "mpi-internal.h"
 
+#define MAX_EXTERN_SCAN_BYTES (16*1024*1024)
 #define MAX_EXTERN_MPI_BITS 16384
 
 /**
@@ -109,6 +110,112 @@ MPI mpi_read_from_buffer(const void *xbuffer, unsigned *ret_nread)
 }
 EXPORT_SYMBOL_GPL(mpi_read_from_buffer);
 
+/****************
+ * Fill the mpi VAL from the hex string in STR.
+ */
+int mpi_fromstr(MPI val, const char *str)
+{
+	int sign = 0;
+	int prepend_zero = 0;
+	int i, j, c, c1, c2;
+	unsigned int nbits, nbytes, nlimbs;
+	mpi_limb_t a;
+
+	if (*str == '-') {
+		sign = 1;
+		str++;
+	}
+
+	/* Skip optional hex prefix.  */
+	if (*str == '0' && str[1] == 'x')
+		str += 2;
+
+	nbits = strlen(str);
+	if (nbits > MAX_EXTERN_SCAN_BYTES) {
+		mpi_clear(val);
+		return -EINVAL;
+	}
+	nbits *= 4;
+	if ((nbits % 8))
+		prepend_zero = 1;
+
+	nbytes = (nbits+7) / 8;
+	nlimbs = (nbytes+BYTES_PER_MPI_LIMB-1) / BYTES_PER_MPI_LIMB;
+
+	if (val->alloced < nlimbs)
+		mpi_resize(val, nlimbs);
+
+	i = BYTES_PER_MPI_LIMB - (nbytes % BYTES_PER_MPI_LIMB);
+	i %= BYTES_PER_MPI_LIMB;
+	j = val->nlimbs = nlimbs;
+	val->sign = sign;
+	for (; j > 0; j--) {
+		a = 0;
+		for (; i < BYTES_PER_MPI_LIMB; i++) {
+			if (prepend_zero) {
+				c1 = '0';
+				prepend_zero = 0;
+			} else
+				c1 = *str++;
+
+			if (!c1) {
+				mpi_clear(val);
+				return -EINVAL;
+			}
+			c2 = *str++;
+			if (!c2) {
+				mpi_clear(val);
+				return -EINVAL;
+			}
+			if (c1 >= '0' && c1 <= '9')
+				c = c1 - '0';
+			else if (c1 >= 'a' && c1 <= 'f')
+				c = c1 - 'a' + 10;
+			else if (c1 >= 'A' && c1 <= 'F')
+				c = c1 - 'A' + 10;
+			else {
+				mpi_clear(val);
+				return -EINVAL;
+			}
+			c <<= 4;
+			if (c2 >= '0' && c2 <= '9')
+				c |= c2 - '0';
+			else if (c2 >= 'a' && c2 <= 'f')
+				c |= c2 - 'a' + 10;
+			else if (c2 >= 'A' && c2 <= 'F')
+				c |= c2 - 'A' + 10;
+			else {
+				mpi_clear(val);
+				return -EINVAL;
+			}
+			a <<= 8;
+			a |= c;
+		}
+		i = 0;
+		val->d[j-1] = a;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(mpi_fromstr);
+
+MPI mpi_scanval(const char *string)
+{
+	MPI a;
+
+	a = mpi_alloc(0);
+	if (!a)
+		return NULL;
+
+	if (mpi_fromstr(a, string)) {
+		mpi_free(a);
+		return NULL;
+	}
+	mpi_normalize(a);
+	return a;
+}
+EXPORT_SYMBOL_GPL(mpi_scanval);
+
 static int count_lzeros(MPI a)
 {
 	mpi_limb_t alimb;
@@ -413,3 +520,232 @@ MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int nbytes)
 	return val;
 }
 EXPORT_SYMBOL_GPL(mpi_read_raw_from_sgl);
+
+/* Perform a two's complement operation on buffer P of size N bytes.  */
+static void twocompl(unsigned char *p, unsigned int n)
+{
+	int i;
+
+	for (i = n-1; i >= 0 && !p[i]; i--)
+		;
+	if (i >= 0) {
+		if ((p[i] & 0x01))
+			p[i] = (((p[i] ^ 0xfe) | 0x01) & 0xff);
+		else if ((p[i] & 0x02))
+			p[i] = (((p[i] ^ 0xfc) | 0x02) & 0xfe);
+		else if ((p[i] & 0x04))
+			p[i] = (((p[i] ^ 0xf8) | 0x04) & 0xfc);
+		else if ((p[i] & 0x08))
+			p[i] = (((p[i] ^ 0xf0) | 0x08) & 0xf8);
+		else if ((p[i] & 0x10))
+			p[i] = (((p[i] ^ 0xe0) | 0x10) & 0xf0);
+		else if ((p[i] & 0x20))
+			p[i] = (((p[i] ^ 0xc0) | 0x20) & 0xe0);
+		else if ((p[i] & 0x40))
+			p[i] = (((p[i] ^ 0x80) | 0x40) & 0xc0);
+		else
+			p[i] = 0x80;
+
+		for (i--; i >= 0; i--)
+			p[i] ^= 0xff;
+	}
+}
+
+int mpi_print(enum gcry_mpi_format format, unsigned char *buffer,
+			size_t buflen, size_t *nwritten, MPI a)
+{
+	unsigned int nbits = mpi_get_nbits(a);
+	size_t len;
+	size_t dummy_nwritten;
+	int negative;
+
+	if (!nwritten)
+		nwritten = &dummy_nwritten;
+
+	/* Libgcrypt does no always care to set clear the sign if the value
+	 * is 0.  For printing this is a bit of a surprise, in particular
+	 * because if some of the formats don't support negative numbers but
+	 * should be able to print a zero.  Thus we need this extra test
+	 * for a negative number.
+	 */
+	if (a->sign && mpi_cmp_ui(a, 0))
+		negative = 1;
+	else
+		negative = 0;
+
+	len = buflen;
+	*nwritten = 0;
+	if (format == GCRYMPI_FMT_STD) {
+		unsigned char *tmp;
+		int extra = 0;
+		unsigned int n;
+
+		tmp = mpi_get_buffer(a, &n, NULL);
+		if (!tmp)
+			return -EINVAL;
+
+		if (negative) {
+			twocompl(tmp, n);
+			if (!(*tmp & 0x80)) {
+				/* Need to extend the sign.  */
+				n++;
+				extra = 2;
+			}
+		} else if (n && (*tmp & 0x80)) {
+			/* Positive but the high bit of the returned buffer is set.
+			 * Thus we need to print an extra leading 0x00 so that the
+			 * output is interpreted as a positive number.
+			 */
+			n++;
+			extra = 1;
+		}
+
+		if (buffer && n > len) {
+			/* The provided buffer is too short. */
+			kfree(tmp);
+			return -E2BIG;
+		}
+		if (buffer) {
+			unsigned char *s = buffer;
+
+			if (extra == 1)
+				*s++ = 0;
+			else if (extra)
+				*s++ = 0xff;
+			memcpy(s, tmp, n-!!extra);
+		}
+		kfree(tmp);
+		*nwritten = n;
+		return 0;
+	} else if (format == GCRYMPI_FMT_USG) {
+		unsigned int n = (nbits + 7)/8;
+
+		/* Note:  We ignore the sign for this format.  */
+		/* FIXME: for performance reasons we should put this into
+		 * mpi_aprint because we can then use the buffer directly.
+		 */
+
+		if (buffer && n > len)
+			return -E2BIG;
+		if (buffer) {
+			unsigned char *tmp;
+
+			tmp = mpi_get_buffer(a, &n, NULL);
+			if (!tmp)
+				return -EINVAL;
+			memcpy(buffer, tmp, n);
+			kfree(tmp);
+		}
+		*nwritten = n;
+		return 0;
+	} else if (format == GCRYMPI_FMT_PGP) {
+		unsigned int n = (nbits + 7)/8;
+
+		/* The PGP format can only handle unsigned integers.  */
+		if (negative)
+			return -EINVAL;
+
+		if (buffer && n+2 > len)
+			return -E2BIG;
+
+		if (buffer) {
+			unsigned char *tmp;
+			unsigned char *s = buffer;
+
+			s[0] = nbits >> 8;
+			s[1] = nbits;
+
+			tmp = mpi_get_buffer(a, &n, NULL);
+			if (!tmp)
+				return -EINVAL;
+			memcpy(s+2, tmp, n);
+			kfree(tmp);
+		}
+		*nwritten = n+2;
+		return 0;
+	} else if (format == GCRYMPI_FMT_SSH) {
+		unsigned char *tmp;
+		int extra = 0;
+		unsigned int n;
+
+		tmp = mpi_get_buffer(a, &n, NULL);
+		if (!tmp)
+			return -EINVAL;
+
+		if (negative) {
+			twocompl(tmp, n);
+			if (!(*tmp & 0x80)) {
+				/* Need to extend the sign.  */
+				n++;
+				extra = 2;
+			}
+		} else if (n && (*tmp & 0x80)) {
+			n++;
+			extra = 1;
+		}
+
+		if (buffer && n+4 > len) {
+			kfree(tmp);
+			return -E2BIG;
+		}
+
+		if (buffer) {
+			unsigned char *s = buffer;
+
+			*s++ = n >> 24;
+			*s++ = n >> 16;
+			*s++ = n >> 8;
+			*s++ = n;
+			if (extra == 1)
+				*s++ = 0;
+			else if (extra)
+				*s++ = 0xff;
+			memcpy(s, tmp, n-!!extra);
+		}
+		kfree(tmp);
+		*nwritten = 4+n;
+		return 0;
+	} else if (format == GCRYMPI_FMT_HEX) {
+		unsigned char *tmp;
+		int i;
+		int extra = 0;
+		unsigned int n = 0;
+
+		tmp = mpi_get_buffer(a, &n, NULL);
+		if (!tmp)
+			return -EINVAL;
+		if (!n || (*tmp & 0x80))
+			extra = 2;
+
+		if (buffer && 2*n + extra + negative + 1 > len) {
+			kfree(tmp);
+			return -E2BIG;
+		}
+		if (buffer) {
+			unsigned char *s = buffer;
+
+			if (negative)
+				*s++ = '-';
+			if (extra) {
+				*s++ = '0';
+				*s++ = '0';
+			}
+
+			for (i = 0; i < n; i++) {
+				unsigned int c = tmp[i];
+
+				*s++ = (c >> 4) < 10 ? '0'+(c>>4) : 'A'+(c>>4)-10;
+				c &= 15;
+				*s++ = c < 10 ? '0'+c : 'A'+c-10;
+			}
+			*s++ = 0;
+			*nwritten = s - buffer;
+		} else {
+			*nwritten = 2*n + extra + negative + 1;
+		}
+		kfree(tmp);
+		return 0;
+	} else
+		return -EINVAL;
+}
+EXPORT_SYMBOL_GPL(mpi_print);
diff --git a/lib/mpi/mpih-div.c b/lib/mpi/mpih-div.c
index 913a519eb005..182a656a1ba0 100644
--- a/lib/mpi/mpih-div.c
+++ b/lib/mpi/mpih-div.c
@@ -24,6 +24,150 @@
 #define UDIV_TIME UMUL_TIME
 #endif
 
+
+mpi_limb_t
+mpihelp_mod_1(mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
+			mpi_limb_t divisor_limb)
+{
+	mpi_size_t i;
+	mpi_limb_t n1, n0, r;
+	mpi_limb_t dummy;
+
+	/* Botch: Should this be handled at all?  Rely on callers?	*/
+	if (!dividend_size)
+		return 0;
+
+	/* If multiplication is much faster than division, and the
+	 * dividend is large, pre-invert the divisor, and use
+	 * only multiplications in the inner loop.
+	 *
+	 * This test should be read:
+	 *	 Does it ever help to use udiv_qrnnd_preinv?
+	 *	   && Does what we save compensate for the inversion overhead?
+	 */
+	if (UDIV_TIME > (2 * UMUL_TIME + 6)
+			&& (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME) {
+		int normalization_steps;
+
+		normalization_steps = count_leading_zeros(divisor_limb);
+		if (normalization_steps) {
+			mpi_limb_t divisor_limb_inverted;
+
+			divisor_limb <<= normalization_steps;
+
+			/* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB.  The
+			 * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
+			 * most significant bit (with weight 2**N) implicit.
+			 *
+			 * Special case for DIVISOR_LIMB == 100...000.
+			 */
+			if (!(divisor_limb << 1))
+				divisor_limb_inverted = ~(mpi_limb_t)0;
+			else
+				udiv_qrnnd(divisor_limb_inverted, dummy,
+						-divisor_limb, 0, divisor_limb);
+
+			n1 = dividend_ptr[dividend_size - 1];
+			r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);
+
+			/* Possible optimization:
+			 * if (r == 0
+			 * && divisor_limb > ((n1 << normalization_steps)
+			 *		       | (dividend_ptr[dividend_size - 2] >> ...)))
+			 * ...one division less...
+			 */
+			for (i = dividend_size - 2; i >= 0; i--) {
+				n0 = dividend_ptr[i];
+				UDIV_QRNND_PREINV(dummy, r, r,
+						((n1 << normalization_steps)
+						 | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))),
+						divisor_limb, divisor_limb_inverted);
+				n1 = n0;
+			}
+			UDIV_QRNND_PREINV(dummy, r, r,
+					n1 << normalization_steps,
+					divisor_limb, divisor_limb_inverted);
+			return r >> normalization_steps;
+		} else {
+			mpi_limb_t divisor_limb_inverted;
+
+			/* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB.  The
+			 * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
+			 * most significant bit (with weight 2**N) implicit.
+			 *
+			 * Special case for DIVISOR_LIMB == 100...000.
+			 */
+			if (!(divisor_limb << 1))
+				divisor_limb_inverted = ~(mpi_limb_t)0;
+			else
+				udiv_qrnnd(divisor_limb_inverted, dummy,
+						-divisor_limb, 0, divisor_limb);
+
+			i = dividend_size - 1;
+			r = dividend_ptr[i];
+
+			if (r >= divisor_limb)
+				r = 0;
+			else
+				i--;
+
+			for ( ; i >= 0; i--) {
+				n0 = dividend_ptr[i];
+				UDIV_QRNND_PREINV(dummy, r, r,
+						n0, divisor_limb, divisor_limb_inverted);
+			}
+			return r;
+		}
+	} else {
+		if (UDIV_NEEDS_NORMALIZATION) {
+			int normalization_steps;
+
+			normalization_steps = count_leading_zeros(divisor_limb);
+			if (normalization_steps) {
+				divisor_limb <<= normalization_steps;
+
+				n1 = dividend_ptr[dividend_size - 1];
+				r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);
+
+				/* Possible optimization:
+				 * if (r == 0
+				 * && divisor_limb > ((n1 << normalization_steps)
+				 *		   | (dividend_ptr[dividend_size - 2] >> ...)))
+				 * ...one division less...
+				 */
+				for (i = dividend_size - 2; i >= 0; i--) {
+					n0 = dividend_ptr[i];
+					udiv_qrnnd(dummy, r, r,
+						((n1 << normalization_steps)
+						 | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))),
+						divisor_limb);
+					n1 = n0;
+				}
+				udiv_qrnnd(dummy, r, r,
+						n1 << normalization_steps,
+						divisor_limb);
+				return r >> normalization_steps;
+			}
+		}
+		/* No normalization needed, either because udiv_qrnnd doesn't require
+		 * it, or because DIVISOR_LIMB is already normalized.
+		 */
+		i = dividend_size - 1;
+		r = dividend_ptr[i];
+
+		if (r >= divisor_limb)
+			r = 0;
+		else
+			i--;
+
+		for (; i >= 0; i--) {
+			n0 = dividend_ptr[i];
+			udiv_qrnnd(dummy, r, r, n0, divisor_limb);
+		}
+		return r;
+	}
+}
+
 /* Divide num (NP/NSIZE) by den (DP/DSIZE) and write
  * the NSIZE-DSIZE least significant quotient limbs at QP
  * and the DSIZE long remainder at NP.	If QEXTRA_LIMBS is
@@ -221,3 +365,153 @@ q_test:
 
 	return most_significant_q_limb;
 }
+
+/****************
+ * Divide (DIVIDEND_PTR,,DIVIDEND_SIZE) by DIVISOR_LIMB.
+ * Write DIVIDEND_SIZE limbs of quotient at QUOT_PTR.
+ * Return the single-limb remainder.
+ * There are no constraints on the value of the divisor.
+ *
+ * QUOT_PTR and DIVIDEND_PTR might point to the same limb.
+ */
+
+mpi_limb_t
+mpihelp_divmod_1(mpi_ptr_t quot_ptr,
+		mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
+		mpi_limb_t divisor_limb)
+{
+	mpi_size_t i;
+	mpi_limb_t n1, n0, r;
+	mpi_limb_t dummy;
+
+	if (!dividend_size)
+		return 0;
+
+	/* If multiplication is much faster than division, and the
+	 * dividend is large, pre-invert the divisor, and use
+	 * only multiplications in the inner loop.
+	 *
+	 * This test should be read:
+	 * Does it ever help to use udiv_qrnnd_preinv?
+	 * && Does what we save compensate for the inversion overhead?
+	 */
+	if (UDIV_TIME > (2 * UMUL_TIME + 6)
+			&& (UDIV_TIME - (2 * UMUL_TIME + 6)) * dividend_size > UDIV_TIME) {
+		int normalization_steps;
+
+		normalization_steps = count_leading_zeros(divisor_limb);
+		if (normalization_steps) {
+			mpi_limb_t divisor_limb_inverted;
+
+			divisor_limb <<= normalization_steps;
+
+			/* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB.  The
+			 * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
+			 * most significant bit (with weight 2**N) implicit.
+			 */
+			/* Special case for DIVISOR_LIMB == 100...000.  */
+			if (!(divisor_limb << 1))
+				divisor_limb_inverted = ~(mpi_limb_t)0;
+			else
+				udiv_qrnnd(divisor_limb_inverted, dummy,
+						-divisor_limb, 0, divisor_limb);
+
+			n1 = dividend_ptr[dividend_size - 1];
+			r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);
+
+			/* Possible optimization:
+			 * if (r == 0
+			 * && divisor_limb > ((n1 << normalization_steps)
+			 *		       | (dividend_ptr[dividend_size - 2] >> ...)))
+			 * ...one division less...
+			 */
+			for (i = dividend_size - 2; i >= 0; i--) {
+				n0 = dividend_ptr[i];
+				UDIV_QRNND_PREINV(quot_ptr[i + 1], r, r,
+						((n1 << normalization_steps)
+						 | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))),
+						divisor_limb, divisor_limb_inverted);
+				n1 = n0;
+			}
+			UDIV_QRNND_PREINV(quot_ptr[0], r, r,
+					n1 << normalization_steps,
+					divisor_limb, divisor_limb_inverted);
+			return r >> normalization_steps;
+		} else {
+			mpi_limb_t divisor_limb_inverted;
+
+			/* Compute (2**2N - 2**N * DIVISOR_LIMB) / DIVISOR_LIMB.  The
+			 * result is a (N+1)-bit approximation to 1/DIVISOR_LIMB, with the
+			 * most significant bit (with weight 2**N) implicit.
+			 */
+			/* Special case for DIVISOR_LIMB == 100...000.  */
+			if (!(divisor_limb << 1))
+				divisor_limb_inverted = ~(mpi_limb_t) 0;
+			else
+				udiv_qrnnd(divisor_limb_inverted, dummy,
+						-divisor_limb, 0, divisor_limb);
+
+			i = dividend_size - 1;
+			r = dividend_ptr[i];
+
+			if (r >= divisor_limb)
+				r = 0;
+			else
+				quot_ptr[i--] = 0;
+
+			for ( ; i >= 0; i--) {
+				n0 = dividend_ptr[i];
+				UDIV_QRNND_PREINV(quot_ptr[i], r, r,
+						n0, divisor_limb, divisor_limb_inverted);
+			}
+			return r;
+		}
+	} else {
+		if (UDIV_NEEDS_NORMALIZATION) {
+			int normalization_steps;
+
+			normalization_steps = count_leading_zeros(divisor_limb);
+			if (normalization_steps) {
+				divisor_limb <<= normalization_steps;
+
+				n1 = dividend_ptr[dividend_size - 1];
+				r = n1 >> (BITS_PER_MPI_LIMB - normalization_steps);
+
+				/* Possible optimization:
+				 * if (r == 0
+				 * && divisor_limb > ((n1 << normalization_steps)
+				 *		   | (dividend_ptr[dividend_size - 2] >> ...)))
+				 * ...one division less...
+				 */
+				for (i = dividend_size - 2; i >= 0; i--) {
+					n0 = dividend_ptr[i];
+					udiv_qrnnd(quot_ptr[i + 1], r, r,
+						((n1 << normalization_steps)
+						 | (n0 >> (BITS_PER_MPI_LIMB - normalization_steps))),
+						divisor_limb);
+					n1 = n0;
+				}
+				udiv_qrnnd(quot_ptr[0], r, r,
+						n1 << normalization_steps,
+						divisor_limb);
+				return r >> normalization_steps;
+			}
+		}
+		/* No normalization needed, either because udiv_qrnnd doesn't require
+		 * it, or because DIVISOR_LIMB is already normalized.
+		 */
+		i = dividend_size - 1;
+		r = dividend_ptr[i];
+
+		if (r >= divisor_limb)
+			r = 0;
+		else
+			quot_ptr[i--] = 0;
+
+		for (; i >= 0; i--) {
+			n0 = dividend_ptr[i];
+			udiv_qrnnd(quot_ptr[i], r, r, n0, divisor_limb);
+		}
+		return r;
+	}
+}
diff --git a/lib/mpi/mpih-mul.c b/lib/mpi/mpih-mul.c
index a93647564054..e5f1c84e3c48 100644
--- a/lib/mpi/mpih-mul.c
+++ b/lib/mpi/mpih-mul.c
@@ -317,6 +317,31 @@ mpih_sqr_n(mpi_ptr_t prodp, mpi_ptr_t up, mpi_size_t size, mpi_ptr_t tspace)
 	}
 }
 
+
+void mpihelp_mul_n(mpi_ptr_t prodp,
+		mpi_ptr_t up, mpi_ptr_t vp, mpi_size_t size)
+{
+	if (up == vp) {
+		if (size < KARATSUBA_THRESHOLD)
+			mpih_sqr_n_basecase(prodp, up, size);
+		else {
+			mpi_ptr_t tspace;
+			tspace = mpi_alloc_limb_space(2 * size);
+			mpih_sqr_n(prodp, up, size, tspace);
+			mpi_free_limb_space(tspace);
+		}
+	} else {
+		if (size < KARATSUBA_THRESHOLD)
+			mul_n_basecase(prodp, up, vp, size);
+		else {
+			mpi_ptr_t tspace;
+			tspace = mpi_alloc_limb_space(2 * size);
+			mul_n(prodp, up, vp, size, tspace);
+			mpi_free_limb_space(tspace);
+		}
+	}
+}
+
 int
 mpihelp_mul_karatsuba_case(mpi_ptr_t prodp,
 			   mpi_ptr_t up, mpi_size_t usize,
diff --git a/lib/mpi/mpiutil.c b/lib/mpi/mpiutil.c
index 4cd2b335cb7f..3c63710c20c6 100644
--- a/lib/mpi/mpiutil.c
+++ b/lib/mpi/mpiutil.c
@@ -20,6 +20,63 @@
 
 #include "mpi-internal.h"
 
+/* Constants allocated right away at startup.  */
+static MPI constants[MPI_NUMBER_OF_CONSTANTS];
+
+/* Initialize the MPI subsystem.  This is called early and allows to
+ * do some initialization without taking care of threading issues.
+ */
+static int __init mpi_init(void)
+{
+	int idx;
+	unsigned long value;
+
+	for (idx = 0; idx < MPI_NUMBER_OF_CONSTANTS; idx++) {
+		switch (idx) {
+		case MPI_C_ZERO:
+			value = 0;
+			break;
+		case MPI_C_ONE:
+			value = 1;
+			break;
+		case MPI_C_TWO:
+			value = 2;
+			break;
+		case MPI_C_THREE:
+			value = 3;
+			break;
+		case MPI_C_FOUR:
+			value = 4;
+			break;
+		case MPI_C_EIGHT:
+			value = 8;
+			break;
+		default:
+			pr_err("MPI: invalid mpi_const selector %d\n", idx);
+			return -EFAULT;
+		}
+		constants[idx] = mpi_alloc_set_ui(value);
+		constants[idx]->flags = (16|32);
+	}
+
+	return 0;
+}
+postcore_initcall(mpi_init);
+
+/* Return a constant MPI descripbed by NO which is one of the
+ * MPI_C_xxx macros.  There is no need to copy this returned value; it
+ * may be used directly.
+ */
+MPI mpi_const(enum gcry_mpi_constants no)
+{
+	if ((int)no < 0 || no > MPI_NUMBER_OF_CONSTANTS)
+		pr_err("MPI: invalid mpi_const selector %d\n", no);
+	if (!constants[no])
+		pr_err("MPI: MPI subsystem not initialized\n");
+	return constants[no];
+}
+EXPORT_SYMBOL_GPL(mpi_const);
+
 /****************
  * Note:  It was a bad idea to use the number of limbs to allocate
  *	  because on a alpha the limbs are large but we normally need
@@ -106,6 +163,15 @@ int mpi_resize(MPI a, unsigned nlimbs)
 	return 0;
 }
 
+void mpi_clear(MPI a)
+{
+	if (!a)
+		return;
+	a->nlimbs = 0;
+	a->flags = 0;
+}
+EXPORT_SYMBOL_GPL(mpi_clear);
+
 void mpi_free(MPI a)
 {
 	if (!a)
@@ -122,5 +188,143 @@ void mpi_free(MPI a)
 }
 EXPORT_SYMBOL_GPL(mpi_free);
 
+/****************
+ * Note: This copy function should not interpret the MPI
+ *	 but copy it transparently.
+ */
+MPI mpi_copy(MPI a)
+{
+	int i;
+	MPI b;
+
+	if (a) {
+		b = mpi_alloc(a->nlimbs);
+		b->nlimbs = a->nlimbs;
+		b->sign = a->sign;
+		b->flags = a->flags;
+		b->flags &= ~(16|32); /* Reset the immutable and constant flags. */
+		for (i = 0; i < b->nlimbs; i++)
+			b->d[i] = a->d[i];
+	} else
+		b = NULL;
+	return b;
+}
+
+/****************
+ * This function allocates an MPI which is optimized to hold
+ * a value as large as the one given in the argument and allocates it
+ * with the same flags as A.
+ */
+MPI mpi_alloc_like(MPI a)
+{
+	MPI b;
+
+	if (a) {
+		b = mpi_alloc(a->nlimbs);
+		b->nlimbs = 0;
+		b->sign = 0;
+		b->flags = a->flags;
+	} else
+		b = NULL;
+
+	return b;
+}
+
+
+/* Set U into W and release U.  If W is NULL only U will be released. */
+void mpi_snatch(MPI w, MPI u)
+{
+	if (w) {
+		mpi_assign_limb_space(w, u->d, u->alloced);
+		w->nlimbs = u->nlimbs;
+		w->sign   = u->sign;
+		w->flags  = u->flags;
+		u->alloced = 0;
+		u->nlimbs = 0;
+		u->d = NULL;
+	}
+	mpi_free(u);
+}
+
+
+MPI mpi_set(MPI w, MPI u)
+{
+	mpi_ptr_t wp, up;
+	mpi_size_t usize = u->nlimbs;
+	int usign = u->sign;
+
+	if (!w)
+		w = mpi_alloc(mpi_get_nlimbs(u));
+	RESIZE_IF_NEEDED(w, usize);
+	wp = w->d;
+	up = u->d;
+	MPN_COPY(wp, up, usize);
+	w->nlimbs = usize;
+	w->flags = u->flags;
+	w->flags &= ~(16|32); /* Reset the immutable and constant flags.  */
+	w->sign = usign;
+	return w;
+}
+EXPORT_SYMBOL_GPL(mpi_set);
+
+MPI mpi_set_ui(MPI w, unsigned long u)
+{
+	if (!w)
+		w = mpi_alloc(1);
+	/* FIXME: If U is 0 we have no need to resize and thus possible
+	 * allocating the the limbs.
+	 */
+	RESIZE_IF_NEEDED(w, 1);
+	w->d[0] = u;
+	w->nlimbs = u ? 1 : 0;
+	w->sign = 0;
+	w->flags = 0;
+	return w;
+}
+EXPORT_SYMBOL_GPL(mpi_set_ui);
+
+MPI mpi_alloc_set_ui(unsigned long u)
+{
+	MPI w = mpi_alloc(1);
+	w->d[0] = u;
+	w->nlimbs = u ? 1 : 0;
+	w->sign = 0;
+	return w;
+}
+
+/****************
+ * Swap the value of A and B, when SWAP is 1.
+ * Leave the value when SWAP is 0.
+ * This implementation should be constant-time regardless of SWAP.
+ */
+void mpi_swap_cond(MPI a, MPI b, unsigned long swap)
+{
+	mpi_size_t i;
+	mpi_size_t nlimbs;
+	mpi_limb_t mask = ((mpi_limb_t)0) - swap;
+	mpi_limb_t x;
+
+	if (a->alloced > b->alloced)
+		nlimbs = b->alloced;
+	else
+		nlimbs = a->alloced;
+	if (a->nlimbs > nlimbs || b->nlimbs > nlimbs)
+		return;
+
+	for (i = 0; i < nlimbs; i++) {
+		x = mask & (a->d[i] ^ b->d[i]);
+		a->d[i] = a->d[i] ^ x;
+		b->d[i] = b->d[i] ^ x;
+	}
+
+	x = mask & (a->nlimbs ^ b->nlimbs);
+	a->nlimbs = a->nlimbs ^ x;
+	b->nlimbs = b->nlimbs ^ x;
+
+	x = mask & (a->sign ^ b->sign);
+	a->sign = a->sign ^ x;
+	b->sign = b->sign ^ x;
+}
+
 MODULE_DESCRIPTION("Multiprecision maths library");
 MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From d58bb7e55a8a65894cc02f27c3e2bf9403e7c40f Mon Sep 17 00:00:00 2001
From: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Date: Mon, 21 Sep 2020 00:20:56 +0800
Subject: lib/mpi: Introduce ec implementation to MPI library

The implementation of EC is introduced from libgcrypt as the
basic algorithm of elliptic curve, which can be more perfectly
integrated with MPI implementation.
Some other algorithms will be developed based on mpi ecc, such as SM2.

Signed-off-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Tested-by: Xufeng Zhang <yunbo.xufeng@linux.alibaba.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 include/linux/mpi.h |  105 ++++
 lib/mpi/Makefile    |    1 +
 lib/mpi/ec.c        | 1509 +++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 1615 insertions(+)
 create mode 100644 lib/mpi/ec.c

(limited to 'lib')

diff --git a/include/linux/mpi.h b/include/linux/mpi.h
index 3c9e41603cf6..3e5358f4de2f 100644
--- a/include/linux/mpi.h
+++ b/include/linux/mpi.h
@@ -157,6 +157,111 @@ void mpi_fdiv_q(MPI quot, MPI dividend, MPI divisor);
 /*-- mpi-inv.c --*/
 int mpi_invm(MPI x, MPI a, MPI n);
 
+/*-- ec.c --*/
+
+/* Object to represent a point in projective coordinates */
+struct gcry_mpi_point {
+	MPI x;
+	MPI y;
+	MPI z;
+};
+
+typedef struct gcry_mpi_point *MPI_POINT;
+
+/* Models describing an elliptic curve */
+enum gcry_mpi_ec_models {
+	/* The Short Weierstrass equation is
+	 *      y^2 = x^3 + ax + b
+	 */
+	MPI_EC_WEIERSTRASS = 0,
+	/* The Montgomery equation is
+	 *      by^2 = x^3 + ax^2 + x
+	 */
+	MPI_EC_MONTGOMERY,
+	/* The Twisted Edwards equation is
+	 *      ax^2 + y^2 = 1 + bx^2y^2
+	 * Note that we use 'b' instead of the commonly used 'd'.
+	 */
+	MPI_EC_EDWARDS
+};
+
+/* Dialects used with elliptic curves */
+enum ecc_dialects {
+	ECC_DIALECT_STANDARD = 0,
+	ECC_DIALECT_ED25519,
+	ECC_DIALECT_SAFECURVE
+};
+
+/* This context is used with all our EC functions. */
+struct mpi_ec_ctx {
+	enum gcry_mpi_ec_models model; /* The model describing this curve. */
+	enum ecc_dialects dialect;     /* The ECC dialect used with the curve. */
+	int flags;                     /* Public key flags (not always used). */
+	unsigned int nbits;            /* Number of bits.  */
+
+	/* Domain parameters.  Note that they may not all be set and if set
+	 * the MPIs may be flaged as constant.
+	 */
+	MPI p;         /* Prime specifying the field GF(p).  */
+	MPI a;         /* First coefficient of the Weierstrass equation.  */
+	MPI b;         /* Second coefficient of the Weierstrass equation.  */
+	MPI_POINT G;   /* Base point (generator).  */
+	MPI n;         /* Order of G.  */
+	unsigned int h;       /* Cofactor.  */
+
+	/* The actual key.  May not be set.  */
+	MPI_POINT Q;   /* Public key.   */
+	MPI d;         /* Private key.  */
+
+	const char *name;      /* Name of the curve.  */
+
+	/* This structure is private to mpi/ec.c! */
+	struct {
+		struct {
+			unsigned int a_is_pminus3:1;
+			unsigned int two_inv_p:1;
+		} valid; /* Flags to help setting the helper vars below.  */
+
+		int a_is_pminus3;  /* True if A = P - 3. */
+
+		MPI two_inv_p;
+
+		mpi_barrett_t p_barrett;
+
+		/* Scratch variables.  */
+		MPI scratch[11];
+
+		/* Helper for fast reduction.  */
+		/*   int nist_nbits; /\* If this is a NIST curve, the # of bits. *\/ */
+		/*   MPI s[10]; */
+		/*   MPI c; */
+	} t;
+
+	/* Curve specific computation routines for the field.  */
+	void (*addm)(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx);
+	void (*subm)(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ec);
+	void (*mulm)(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx);
+	void (*pow2)(MPI w, const MPI b, struct mpi_ec_ctx *ctx);
+	void (*mul2)(MPI w, MPI u, struct mpi_ec_ctx *ctx);
+};
+
+void mpi_ec_init(struct mpi_ec_ctx *ctx, enum gcry_mpi_ec_models model,
+			enum ecc_dialects dialect,
+			int flags, MPI p, MPI a, MPI b);
+void mpi_ec_deinit(struct mpi_ec_ctx *ctx);
+MPI_POINT mpi_point_new(unsigned int nbits);
+void mpi_point_release(MPI_POINT p);
+void mpi_point_init(MPI_POINT p);
+void mpi_point_free_parts(MPI_POINT p);
+int mpi_ec_get_affine(MPI x, MPI y, MPI_POINT point, struct mpi_ec_ctx *ctx);
+void mpi_ec_add_points(MPI_POINT result,
+			MPI_POINT p1, MPI_POINT p2,
+			struct mpi_ec_ctx *ctx);
+void mpi_ec_mul_point(MPI_POINT result,
+			MPI scalar, MPI_POINT point,
+			struct mpi_ec_ctx *ctx);
+int mpi_ec_curve_point(MPI_POINT point, struct mpi_ec_ctx *ctx);
+
 /* inline functions */
 
 /**
diff --git a/lib/mpi/Makefile b/lib/mpi/Makefile
index 477debd7ed50..6e6ef9a34fe1 100644
--- a/lib/mpi/Makefile
+++ b/lib/mpi/Makefile
@@ -13,6 +13,7 @@ mpi-y = \
 	generic_mpih-rshift.o		\
 	generic_mpih-sub1.o		\
 	generic_mpih-add1.o		\
+	ec.o				\
 	mpicoder.o			\
 	mpi-add.o			\
 	mpi-bit.o			\
diff --git a/lib/mpi/ec.c b/lib/mpi/ec.c
new file mode 100644
index 000000000000..c21470122dfc
--- /dev/null
+++ b/lib/mpi/ec.c
@@ -0,0 +1,1509 @@
+/* ec.c -  Elliptic Curve functions
+ * Copyright (C) 2007 Free Software Foundation, Inc.
+ * Copyright (C) 2013 g10 Code GmbH
+ *
+ * This file is part of Libgcrypt.
+ *
+ * Libgcrypt is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU Lesser General Public License as
+ * published by the Free Software Foundation; either version 2.1 of
+ * the License, or (at your option) any later version.
+ *
+ * Libgcrypt is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+ * GNU Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with this program; if not, see <http://www.gnu.org/licenses/>.
+ */
+
+#include "mpi-internal.h"
+#include "longlong.h"
+
+#define point_init(a)  mpi_point_init((a))
+#define point_free(a)  mpi_point_free_parts((a))
+
+#define log_error(fmt, ...) pr_err(fmt, ##__VA_ARGS__)
+#define log_fatal(fmt, ...) pr_err(fmt, ##__VA_ARGS__)
+
+#define DIM(v) (sizeof(v)/sizeof((v)[0]))
+
+
+/* Create a new point option.  NBITS gives the size in bits of one
+ * coordinate; it is only used to pre-allocate some resources and
+ * might also be passed as 0 to use a default value.
+ */
+MPI_POINT mpi_point_new(unsigned int nbits)
+{
+	MPI_POINT p;
+
+	(void)nbits;  /* Currently not used.  */
+
+	p = kmalloc(sizeof(*p), GFP_KERNEL);
+	if (p)
+		mpi_point_init(p);
+	return p;
+}
+EXPORT_SYMBOL_GPL(mpi_point_new);
+
+/* Release the point object P.  P may be NULL. */
+void mpi_point_release(MPI_POINT p)
+{
+	if (p) {
+		mpi_point_free_parts(p);
+		kfree(p);
+	}
+}
+EXPORT_SYMBOL_GPL(mpi_point_release);
+
+/* Initialize the fields of a point object.  gcry_mpi_point_free_parts
+ * may be used to release the fields.
+ */
+void mpi_point_init(MPI_POINT p)
+{
+	p->x = mpi_new(0);
+	p->y = mpi_new(0);
+	p->z = mpi_new(0);
+}
+EXPORT_SYMBOL_GPL(mpi_point_init);
+
+/* Release the parts of a point object. */
+void mpi_point_free_parts(MPI_POINT p)
+{
+	mpi_free(p->x); p->x = NULL;
+	mpi_free(p->y); p->y = NULL;
+	mpi_free(p->z); p->z = NULL;
+}
+EXPORT_SYMBOL_GPL(mpi_point_free_parts);
+
+/* Set the value from S into D.  */
+static void point_set(MPI_POINT d, MPI_POINT s)
+{
+	mpi_set(d->x, s->x);
+	mpi_set(d->y, s->y);
+	mpi_set(d->z, s->z);
+}
+
+static void point_resize(MPI_POINT p, struct mpi_ec_ctx *ctx)
+{
+	size_t nlimbs = ctx->p->nlimbs;
+
+	mpi_resize(p->x, nlimbs);
+	p->x->nlimbs = nlimbs;
+	mpi_resize(p->z, nlimbs);
+	p->z->nlimbs = nlimbs;
+
+	if (ctx->model != MPI_EC_MONTGOMERY) {
+		mpi_resize(p->y, nlimbs);
+		p->y->nlimbs = nlimbs;
+	}
+}
+
+static void point_swap_cond(MPI_POINT d, MPI_POINT s, unsigned long swap,
+		struct mpi_ec_ctx *ctx)
+{
+	mpi_swap_cond(d->x, s->x, swap);
+	if (ctx->model != MPI_EC_MONTGOMERY)
+		mpi_swap_cond(d->y, s->y, swap);
+	mpi_swap_cond(d->z, s->z, swap);
+}
+
+
+/* W = W mod P.  */
+static void ec_mod(MPI w, struct mpi_ec_ctx *ec)
+{
+	if (ec->t.p_barrett)
+		mpi_mod_barrett(w, w, ec->t.p_barrett);
+	else
+		mpi_mod(w, w, ec->p);
+}
+
+static void ec_addm(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx)
+{
+	mpi_add(w, u, v);
+	ec_mod(w, ctx);
+}
+
+static void ec_subm(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ec)
+{
+	mpi_sub(w, u, v);
+	while (w->sign)
+		mpi_add(w, w, ec->p);
+	/*ec_mod(w, ec);*/
+}
+
+static void ec_mulm(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx)
+{
+	mpi_mul(w, u, v);
+	ec_mod(w, ctx);
+}
+
+/* W = 2 * U mod P.  */
+static void ec_mul2(MPI w, MPI u, struct mpi_ec_ctx *ctx)
+{
+	mpi_lshift(w, u, 1);
+	ec_mod(w, ctx);
+}
+
+static void ec_powm(MPI w, const MPI b, const MPI e,
+		struct mpi_ec_ctx *ctx)
+{
+	mpi_powm(w, b, e, ctx->p);
+	/* mpi_abs(w); */
+}
+
+/* Shortcut for
+ * ec_powm(B, B, mpi_const(MPI_C_TWO), ctx);
+ * for easier optimization.
+ */
+static void ec_pow2(MPI w, const MPI b, struct mpi_ec_ctx *ctx)
+{
+	/* Using mpi_mul is slightly faster (at least on amd64).  */
+	/* mpi_powm(w, b, mpi_const(MPI_C_TWO), ctx->p); */
+	ec_mulm(w, b, b, ctx);
+}
+
+/* Shortcut for
+ * ec_powm(B, B, mpi_const(MPI_C_THREE), ctx);
+ * for easier optimization.
+ */
+static void ec_pow3(MPI w, const MPI b, struct mpi_ec_ctx *ctx)
+{
+	mpi_powm(w, b, mpi_const(MPI_C_THREE), ctx->p);
+}
+
+static void ec_invm(MPI x, MPI a, struct mpi_ec_ctx *ctx)
+{
+	if (!mpi_invm(x, a, ctx->p))
+		log_error("ec_invm: inverse does not exist:\n");
+}
+
+static void mpih_set_cond(mpi_ptr_t wp, mpi_ptr_t up,
+		mpi_size_t usize, unsigned long set)
+{
+	mpi_size_t i;
+	mpi_limb_t mask = ((mpi_limb_t)0) - set;
+	mpi_limb_t x;
+
+	for (i = 0; i < usize; i++) {
+		x = mask & (wp[i] ^ up[i]);
+		wp[i] = wp[i] ^ x;
+	}
+}
+
+/* Routines for 2^255 - 19.  */
+
+#define LIMB_SIZE_25519 ((256+BITS_PER_MPI_LIMB-1)/BITS_PER_MPI_LIMB)
+
+static void ec_addm_25519(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx)
+{
+	mpi_ptr_t wp, up, vp;
+	mpi_size_t wsize = LIMB_SIZE_25519;
+	mpi_limb_t n[LIMB_SIZE_25519];
+	mpi_limb_t borrow;
+
+	if (w->nlimbs != wsize || u->nlimbs != wsize || v->nlimbs != wsize)
+		log_bug("addm_25519: different sizes\n");
+
+	memset(n, 0, sizeof(n));
+	up = u->d;
+	vp = v->d;
+	wp = w->d;
+
+	mpihelp_add_n(wp, up, vp, wsize);
+	borrow = mpihelp_sub_n(wp, wp, ctx->p->d, wsize);
+	mpih_set_cond(n, ctx->p->d, wsize, (borrow != 0UL));
+	mpihelp_add_n(wp, wp, n, wsize);
+	wp[LIMB_SIZE_25519-1] &= ~((mpi_limb_t)1 << (255 % BITS_PER_MPI_LIMB));
+}
+
+static void ec_subm_25519(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx)
+{
+	mpi_ptr_t wp, up, vp;
+	mpi_size_t wsize = LIMB_SIZE_25519;
+	mpi_limb_t n[LIMB_SIZE_25519];
+	mpi_limb_t borrow;
+
+	if (w->nlimbs != wsize || u->nlimbs != wsize || v->nlimbs != wsize)
+		log_bug("subm_25519: different sizes\n");
+
+	memset(n, 0, sizeof(n));
+	up = u->d;
+	vp = v->d;
+	wp = w->d;
+
+	borrow = mpihelp_sub_n(wp, up, vp, wsize);
+	mpih_set_cond(n, ctx->p->d, wsize, (borrow != 0UL));
+	mpihelp_add_n(wp, wp, n, wsize);
+	wp[LIMB_SIZE_25519-1] &= ~((mpi_limb_t)1 << (255 % BITS_PER_MPI_LIMB));
+}
+
+static void ec_mulm_25519(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx)
+{
+	mpi_ptr_t wp, up, vp;
+	mpi_size_t wsize = LIMB_SIZE_25519;
+	mpi_limb_t n[LIMB_SIZE_25519*2];
+	mpi_limb_t m[LIMB_SIZE_25519+1];
+	mpi_limb_t cy;
+	int msb;
+
+	(void)ctx;
+	if (w->nlimbs != wsize || u->nlimbs != wsize || v->nlimbs != wsize)
+		log_bug("mulm_25519: different sizes\n");
+
+	up = u->d;
+	vp = v->d;
+	wp = w->d;
+
+	mpihelp_mul_n(n, up, vp, wsize);
+	memcpy(wp, n, wsize * BYTES_PER_MPI_LIMB);
+	wp[LIMB_SIZE_25519-1] &= ~((mpi_limb_t)1 << (255 % BITS_PER_MPI_LIMB));
+
+	memcpy(m, n+LIMB_SIZE_25519-1, (wsize+1) * BYTES_PER_MPI_LIMB);
+	mpihelp_rshift(m, m, LIMB_SIZE_25519+1, (255 % BITS_PER_MPI_LIMB));
+
+	memcpy(n, m, wsize * BYTES_PER_MPI_LIMB);
+	cy = mpihelp_lshift(m, m, LIMB_SIZE_25519, 4);
+	m[LIMB_SIZE_25519] = cy;
+	cy = mpihelp_add_n(m, m, n, wsize);
+	m[LIMB_SIZE_25519] += cy;
+	cy = mpihelp_add_n(m, m, n, wsize);
+	m[LIMB_SIZE_25519] += cy;
+	cy = mpihelp_add_n(m, m, n, wsize);
+	m[LIMB_SIZE_25519] += cy;
+
+	cy = mpihelp_add_n(wp, wp, m, wsize);
+	m[LIMB_SIZE_25519] += cy;
+
+	memset(m, 0, wsize * BYTES_PER_MPI_LIMB);
+	msb = (wp[LIMB_SIZE_25519-1] >> (255 % BITS_PER_MPI_LIMB));
+	m[0] = (m[LIMB_SIZE_25519] * 2 + msb) * 19;
+	wp[LIMB_SIZE_25519-1] &= ~((mpi_limb_t)1 << (255 % BITS_PER_MPI_LIMB));
+	mpihelp_add_n(wp, wp, m, wsize);
+
+	m[0] = 0;
+	cy = mpihelp_sub_n(wp, wp, ctx->p->d, wsize);
+	mpih_set_cond(m, ctx->p->d, wsize, (cy != 0UL));
+	mpihelp_add_n(wp, wp, m, wsize);
+}
+
+static void ec_mul2_25519(MPI w, MPI u, struct mpi_ec_ctx *ctx)
+{
+	ec_addm_25519(w, u, u, ctx);
+}
+
+static void ec_pow2_25519(MPI w, const MPI b, struct mpi_ec_ctx *ctx)
+{
+	ec_mulm_25519(w, b, b, ctx);
+}
+
+/* Routines for 2^448 - 2^224 - 1.  */
+
+#define LIMB_SIZE_448 ((448+BITS_PER_MPI_LIMB-1)/BITS_PER_MPI_LIMB)
+#define LIMB_SIZE_HALF_448 ((LIMB_SIZE_448+1)/2)
+
+static void ec_addm_448(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx)
+{
+	mpi_ptr_t wp, up, vp;
+	mpi_size_t wsize = LIMB_SIZE_448;
+	mpi_limb_t n[LIMB_SIZE_448];
+	mpi_limb_t cy;
+
+	if (w->nlimbs != wsize || u->nlimbs != wsize || v->nlimbs != wsize)
+		log_bug("addm_448: different sizes\n");
+
+	memset(n, 0, sizeof(n));
+	up = u->d;
+	vp = v->d;
+	wp = w->d;
+
+	cy = mpihelp_add_n(wp, up, vp, wsize);
+	mpih_set_cond(n, ctx->p->d, wsize, (cy != 0UL));
+	mpihelp_sub_n(wp, wp, n, wsize);
+}
+
+static void ec_subm_448(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx)
+{
+	mpi_ptr_t wp, up, vp;
+	mpi_size_t wsize = LIMB_SIZE_448;
+	mpi_limb_t n[LIMB_SIZE_448];
+	mpi_limb_t borrow;
+
+	if (w->nlimbs != wsize || u->nlimbs != wsize || v->nlimbs != wsize)
+		log_bug("subm_448: different sizes\n");
+
+	memset(n, 0, sizeof(n));
+	up = u->d;
+	vp = v->d;
+	wp = w->d;
+
+	borrow = mpihelp_sub_n(wp, up, vp, wsize);
+	mpih_set_cond(n, ctx->p->d, wsize, (borrow != 0UL));
+	mpihelp_add_n(wp, wp, n, wsize);
+}
+
+static void ec_mulm_448(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx)
+{
+	mpi_ptr_t wp, up, vp;
+	mpi_size_t wsize = LIMB_SIZE_448;
+	mpi_limb_t n[LIMB_SIZE_448*2];
+	mpi_limb_t a2[LIMB_SIZE_HALF_448];
+	mpi_limb_t a3[LIMB_SIZE_HALF_448];
+	mpi_limb_t b0[LIMB_SIZE_HALF_448];
+	mpi_limb_t b1[LIMB_SIZE_HALF_448];
+	mpi_limb_t cy;
+	int i;
+#if (LIMB_SIZE_HALF_448 > LIMB_SIZE_448/2)
+	mpi_limb_t b1_rest, a3_rest;
+#endif
+
+	if (w->nlimbs != wsize || u->nlimbs != wsize || v->nlimbs != wsize)
+		log_bug("mulm_448: different sizes\n");
+
+	up = u->d;
+	vp = v->d;
+	wp = w->d;
+
+	mpihelp_mul_n(n, up, vp, wsize);
+
+	for (i = 0; i < (wsize + 1) / 2; i++) {
+		b0[i] = n[i];
+		b1[i] = n[i+wsize/2];
+		a2[i] = n[i+wsize];
+		a3[i] = n[i+wsize+wsize/2];
+	}
+
+#if (LIMB_SIZE_HALF_448 > LIMB_SIZE_448/2)
+	b0[LIMB_SIZE_HALF_448-1] &= ((mpi_limb_t)1UL << 32)-1;
+	a2[LIMB_SIZE_HALF_448-1] &= ((mpi_limb_t)1UL << 32)-1;
+
+	b1_rest = 0;
+	a3_rest = 0;
+
+	for (i = (wsize + 1) / 2 - 1; i >= 0; i--) {
+		mpi_limb_t b1v, a3v;
+		b1v = b1[i];
+		a3v = a3[i];
+		b1[i] = (b1_rest << 32) | (b1v >> 32);
+		a3[i] = (a3_rest << 32) | (a3v >> 32);
+		b1_rest = b1v & (((mpi_limb_t)1UL << 32)-1);
+		a3_rest = a3v & (((mpi_limb_t)1UL << 32)-1);
+	}
+#endif
+
+	cy = mpihelp_add_n(b0, b0, a2, LIMB_SIZE_HALF_448);
+	cy += mpihelp_add_n(b0, b0, a3, LIMB_SIZE_HALF_448);
+	for (i = 0; i < (wsize + 1) / 2; i++)
+		wp[i] = b0[i];
+#if (LIMB_SIZE_HALF_448 > LIMB_SIZE_448/2)
+	wp[LIMB_SIZE_HALF_448-1] &= (((mpi_limb_t)1UL << 32)-1);
+#endif
+
+#if (LIMB_SIZE_HALF_448 > LIMB_SIZE_448/2)
+	cy = b0[LIMB_SIZE_HALF_448-1] >> 32;
+#endif
+
+	cy = mpihelp_add_1(b1, b1, LIMB_SIZE_HALF_448, cy);
+	cy += mpihelp_add_n(b1, b1, a2, LIMB_SIZE_HALF_448);
+	cy += mpihelp_add_n(b1, b1, a3, LIMB_SIZE_HALF_448);
+	cy += mpihelp_add_n(b1, b1, a3, LIMB_SIZE_HALF_448);
+#if (LIMB_SIZE_HALF_448 > LIMB_SIZE_448/2)
+	b1_rest = 0;
+	for (i = (wsize + 1) / 2 - 1; i >= 0; i--) {
+		mpi_limb_t b1v = b1[i];
+		b1[i] = (b1_rest << 32) | (b1v >> 32);
+		b1_rest = b1v & (((mpi_limb_t)1UL << 32)-1);
+	}
+	wp[LIMB_SIZE_HALF_448-1] |= (b1_rest << 32);
+#endif
+	for (i = 0; i < wsize / 2; i++)
+		wp[i+(wsize + 1) / 2] = b1[i];
+
+#if (LIMB_SIZE_HALF_448 > LIMB_SIZE_448/2)
+	cy = b1[LIMB_SIZE_HALF_448-1];
+#endif
+
+	memset(n, 0, wsize * BYTES_PER_MPI_LIMB);
+
+#if (LIMB_SIZE_HALF_448 > LIMB_SIZE_448/2)
+	n[LIMB_SIZE_HALF_448-1] = cy << 32;
+#else
+	n[LIMB_SIZE_HALF_448] = cy;
+#endif
+	n[0] = cy;
+	mpihelp_add_n(wp, wp, n, wsize);
+
+	memset(n, 0, wsize * BYTES_PER_MPI_LIMB);
+	cy = mpihelp_sub_n(wp, wp, ctx->p->d, wsize);
+	mpih_set_cond(n, ctx->p->d, wsize, (cy != 0UL));
+	mpihelp_add_n(wp, wp, n, wsize);
+}
+
+static void ec_mul2_448(MPI w, MPI u, struct mpi_ec_ctx *ctx)
+{
+	ec_addm_448(w, u, u, ctx);
+}
+
+static void ec_pow2_448(MPI w, const MPI b, struct mpi_ec_ctx *ctx)
+{
+	ec_mulm_448(w, b, b, ctx);
+}
+
+struct field_table {
+	const char *p;
+
+	/* computation routines for the field.  */
+	void (*addm)(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx);
+	void (*subm)(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx);
+	void (*mulm)(MPI w, MPI u, MPI v, struct mpi_ec_ctx *ctx);
+	void (*mul2)(MPI w, MPI u, struct mpi_ec_ctx *ctx);
+	void (*pow2)(MPI w, const MPI b, struct mpi_ec_ctx *ctx);
+};
+
+static const struct field_table field_table[] = {
+	{
+		"0x7FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFED",
+		ec_addm_25519,
+		ec_subm_25519,
+		ec_mulm_25519,
+		ec_mul2_25519,
+		ec_pow2_25519
+	},
+	{
+		"0xFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFE"
+		"FFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFFF",
+		ec_addm_448,
+		ec_subm_448,
+		ec_mulm_448,
+		ec_mul2_448,
+		ec_pow2_448
+	},
+	{ NULL, NULL, NULL, NULL, NULL, NULL },
+};
+
+/* Force recomputation of all helper variables.  */
+static void mpi_ec_get_reset(struct mpi_ec_ctx *ec)
+{
+	ec->t.valid.a_is_pminus3 = 0;
+	ec->t.valid.two_inv_p = 0;
+}
+
+/* Accessor for helper variable.  */
+static int ec_get_a_is_pminus3(struct mpi_ec_ctx *ec)
+{
+	MPI tmp;
+
+	if (!ec->t.valid.a_is_pminus3) {
+		ec->t.valid.a_is_pminus3 = 1;
+		tmp = mpi_alloc_like(ec->p);
+		mpi_sub_ui(tmp, ec->p, 3);
+		ec->t.a_is_pminus3 = !mpi_cmp(ec->a, tmp);
+		mpi_free(tmp);
+	}
+
+	return ec->t.a_is_pminus3;
+}
+
+/* Accessor for helper variable.  */
+static MPI ec_get_two_inv_p(struct mpi_ec_ctx *ec)
+{
+	if (!ec->t.valid.two_inv_p) {
+		ec->t.valid.two_inv_p = 1;
+		if (!ec->t.two_inv_p)
+			ec->t.two_inv_p = mpi_alloc(0);
+		ec_invm(ec->t.two_inv_p, mpi_const(MPI_C_TWO), ec);
+	}
+	return ec->t.two_inv_p;
+}
+
+static const char *const curve25519_bad_points[] = {
+	"0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffed",
+	"0x0000000000000000000000000000000000000000000000000000000000000000",
+	"0x0000000000000000000000000000000000000000000000000000000000000001",
+	"0x00b8495f16056286fdb1329ceb8d09da6ac49ff1fae35616aeb8413b7c7aebe0",
+	"0x57119fd0dd4e22d8868e1c58c45c44045bef839c55b1d0b1248c50a3bc959c5f",
+	"0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffec",
+	"0x7fffffffffffffffffffffffffffffffffffffffffffffffffffffffffffffee",
+	NULL
+};
+
+static const char *const curve448_bad_points[] = {
+	"0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffe"
+	"ffffffffffffffffffffffffffffffffffffffffffffffffffffffff",
+	"0x00000000000000000000000000000000000000000000000000000000"
+	"00000000000000000000000000000000000000000000000000000000",
+	"0x00000000000000000000000000000000000000000000000000000000"
+	"00000000000000000000000000000000000000000000000000000001",
+	"0xfffffffffffffffffffffffffffffffffffffffffffffffffffffffe"
+	"fffffffffffffffffffffffffffffffffffffffffffffffffffffffe",
+	"0xffffffffffffffffffffffffffffffffffffffffffffffffffffffff"
+	"00000000000000000000000000000000000000000000000000000000",
+	NULL
+};
+
+static const char *const *bad_points_table[] = {
+	curve25519_bad_points,
+	curve448_bad_points,
+};
+
+static void mpi_ec_coefficient_normalize(MPI a, MPI p)
+{
+	if (a->sign) {
+		mpi_resize(a, p->nlimbs);
+		mpihelp_sub_n(a->d, p->d, a->d, p->nlimbs);
+		a->nlimbs = p->nlimbs;
+		a->sign = 0;
+	}
+}
+
+/* This function initialized a context for elliptic curve based on the
+ * field GF(p).  P is the prime specifying this field, A is the first
+ * coefficient.  CTX is expected to be zeroized.
+ */
+void mpi_ec_init(struct mpi_ec_ctx *ctx, enum gcry_mpi_ec_models model,
+			enum ecc_dialects dialect,
+			int flags, MPI p, MPI a, MPI b)
+{
+	int i;
+	static int use_barrett = -1 /* TODO: 1 or -1 */;
+
+	mpi_ec_coefficient_normalize(a, p);
+	mpi_ec_coefficient_normalize(b, p);
+
+	/* Fixme: Do we want to check some constraints? e.g.  a < p  */
+
+	ctx->model = model;
+	ctx->dialect = dialect;
+	ctx->flags = flags;
+	if (dialect == ECC_DIALECT_ED25519)
+		ctx->nbits = 256;
+	else
+		ctx->nbits = mpi_get_nbits(p);
+	ctx->p = mpi_copy(p);
+	ctx->a = mpi_copy(a);
+	ctx->b = mpi_copy(b);
+
+	ctx->t.p_barrett = use_barrett > 0 ? mpi_barrett_init(ctx->p, 0) : NULL;
+
+	mpi_ec_get_reset(ctx);
+
+	if (model == MPI_EC_MONTGOMERY) {
+		for (i = 0; i < DIM(bad_points_table); i++) {
+			MPI p_candidate = mpi_scanval(bad_points_table[i][0]);
+			int match_p = !mpi_cmp(ctx->p, p_candidate);
+			int j;
+
+			mpi_free(p_candidate);
+			if (!match_p)
+				continue;
+
+			for (j = 0; i < DIM(ctx->t.scratch) && bad_points_table[i][j]; j++)
+				ctx->t.scratch[j] = mpi_scanval(bad_points_table[i][j]);
+		}
+	} else {
+		/* Allocate scratch variables.  */
+		for (i = 0; i < DIM(ctx->t.scratch); i++)
+			ctx->t.scratch[i] = mpi_alloc_like(ctx->p);
+	}
+
+	ctx->addm = ec_addm;
+	ctx->subm = ec_subm;
+	ctx->mulm = ec_mulm;
+	ctx->mul2 = ec_mul2;
+	ctx->pow2 = ec_pow2;
+
+	for (i = 0; field_table[i].p; i++) {
+		MPI f_p;
+
+		f_p = mpi_scanval(field_table[i].p);
+		if (!f_p)
+			break;
+
+		if (!mpi_cmp(p, f_p)) {
+			ctx->addm = field_table[i].addm;
+			ctx->subm = field_table[i].subm;
+			ctx->mulm = field_table[i].mulm;
+			ctx->mul2 = field_table[i].mul2;
+			ctx->pow2 = field_table[i].pow2;
+			mpi_free(f_p);
+
+			mpi_resize(ctx->a, ctx->p->nlimbs);
+			ctx->a->nlimbs = ctx->p->nlimbs;
+
+			mpi_resize(ctx->b, ctx->p->nlimbs);
+			ctx->b->nlimbs = ctx->p->nlimbs;
+
+			for (i = 0; i < DIM(ctx->t.scratch) && ctx->t.scratch[i]; i++)
+				ctx->t.scratch[i]->nlimbs = ctx->p->nlimbs;
+
+			break;
+		}
+
+		mpi_free(f_p);
+	}
+}
+EXPORT_SYMBOL_GPL(mpi_ec_init);
+
+void mpi_ec_deinit(struct mpi_ec_ctx *ctx)
+{
+	int i;
+
+	mpi_barrett_free(ctx->t.p_barrett);
+
+	/* Domain parameter.  */
+	mpi_free(ctx->p);
+	mpi_free(ctx->a);
+	mpi_free(ctx->b);
+	mpi_point_release(ctx->G);
+	mpi_free(ctx->n);
+
+	/* The key.  */
+	mpi_point_release(ctx->Q);
+	mpi_free(ctx->d);
+
+	/* Private data of ec.c.  */
+	mpi_free(ctx->t.two_inv_p);
+
+	for (i = 0; i < DIM(ctx->t.scratch); i++)
+		mpi_free(ctx->t.scratch[i]);
+}
+EXPORT_SYMBOL_GPL(mpi_ec_deinit);
+
+/* Compute the affine coordinates from the projective coordinates in
+ * POINT.  Set them into X and Y.  If one coordinate is not required,
+ * X or Y may be passed as NULL.  CTX is the usual context. Returns: 0
+ * on success or !0 if POINT is at infinity.
+ */
+int mpi_ec_get_affine(MPI x, MPI y, MPI_POINT point, struct mpi_ec_ctx *ctx)
+{
+	if (!mpi_cmp_ui(point->z, 0))
+		return -1;
+
+	switch (ctx->model) {
+	case MPI_EC_WEIERSTRASS: /* Using Jacobian coordinates.  */
+		{
+			MPI z1, z2, z3;
+
+			z1 = mpi_new(0);
+			z2 = mpi_new(0);
+			ec_invm(z1, point->z, ctx);  /* z1 = z^(-1) mod p  */
+			ec_mulm(z2, z1, z1, ctx);    /* z2 = z^(-2) mod p  */
+
+			if (x)
+				ec_mulm(x, point->x, z2, ctx);
+
+			if (y) {
+				z3 = mpi_new(0);
+				ec_mulm(z3, z2, z1, ctx);      /* z3 = z^(-3) mod p */
+				ec_mulm(y, point->y, z3, ctx);
+				mpi_free(z3);
+			}
+
+			mpi_free(z2);
+			mpi_free(z1);
+		}
+		return 0;
+
+	case MPI_EC_MONTGOMERY:
+		{
+			if (x)
+				mpi_set(x, point->x);
+
+			if (y) {
+				log_fatal("%s: Getting Y-coordinate on %s is not supported\n",
+						"mpi_ec_get_affine", "Montgomery");
+				return -1;
+			}
+		}
+		return 0;
+
+	case MPI_EC_EDWARDS:
+		{
+			MPI z;
+
+			z = mpi_new(0);
+			ec_invm(z, point->z, ctx);
+
+			mpi_resize(z, ctx->p->nlimbs);
+			z->nlimbs = ctx->p->nlimbs;
+
+			if (x) {
+				mpi_resize(x, ctx->p->nlimbs);
+				x->nlimbs = ctx->p->nlimbs;
+				ctx->mulm(x, point->x, z, ctx);
+			}
+			if (y) {
+				mpi_resize(y, ctx->p->nlimbs);
+				y->nlimbs = ctx->p->nlimbs;
+				ctx->mulm(y, point->y, z, ctx);
+			}
+
+			mpi_free(z);
+		}
+		return 0;
+
+	default:
+		return -1;
+	}
+}
+EXPORT_SYMBOL_GPL(mpi_ec_get_affine);
+
+/*  RESULT = 2 * POINT  (Weierstrass version). */
+static void dup_point_weierstrass(MPI_POINT result,
+		MPI_POINT point, struct mpi_ec_ctx *ctx)
+{
+#define x3 (result->x)
+#define y3 (result->y)
+#define z3 (result->z)
+#define t1 (ctx->t.scratch[0])
+#define t2 (ctx->t.scratch[1])
+#define t3 (ctx->t.scratch[2])
+#define l1 (ctx->t.scratch[3])
+#define l2 (ctx->t.scratch[4])
+#define l3 (ctx->t.scratch[5])
+
+	if (!mpi_cmp_ui(point->y, 0) || !mpi_cmp_ui(point->z, 0)) {
+		/* P_y == 0 || P_z == 0 => [1:1:0] */
+		mpi_set_ui(x3, 1);
+		mpi_set_ui(y3, 1);
+		mpi_set_ui(z3, 0);
+	} else {
+		if (ec_get_a_is_pminus3(ctx)) {
+			/* Use the faster case.  */
+			/* L1 = 3(X - Z^2)(X + Z^2) */
+			/*                          T1: used for Z^2. */
+			/*                          T2: used for the right term. */
+			ec_pow2(t1, point->z, ctx);
+			ec_subm(l1, point->x, t1, ctx);
+			ec_mulm(l1, l1, mpi_const(MPI_C_THREE), ctx);
+			ec_addm(t2, point->x, t1, ctx);
+			ec_mulm(l1, l1, t2, ctx);
+		} else {
+			/* Standard case. */
+			/* L1 = 3X^2 + aZ^4 */
+			/*                          T1: used for aZ^4. */
+			ec_pow2(l1, point->x, ctx);
+			ec_mulm(l1, l1, mpi_const(MPI_C_THREE), ctx);
+			ec_powm(t1, point->z, mpi_const(MPI_C_FOUR), ctx);
+			ec_mulm(t1, t1, ctx->a, ctx);
+			ec_addm(l1, l1, t1, ctx);
+		}
+		/* Z3 = 2YZ */
+		ec_mulm(z3, point->y, point->z, ctx);
+		ec_mul2(z3, z3, ctx);
+
+		/* L2 = 4XY^2 */
+		/*                              T2: used for Y2; required later. */
+		ec_pow2(t2, point->y, ctx);
+		ec_mulm(l2, t2, point->x, ctx);
+		ec_mulm(l2, l2, mpi_const(MPI_C_FOUR), ctx);
+
+		/* X3 = L1^2 - 2L2 */
+		/*                              T1: used for L2^2. */
+		ec_pow2(x3, l1, ctx);
+		ec_mul2(t1, l2, ctx);
+		ec_subm(x3, x3, t1, ctx);
+
+		/* L3 = 8Y^4 */
+		/*                              T2: taken from above. */
+		ec_pow2(t2, t2, ctx);
+		ec_mulm(l3, t2, mpi_const(MPI_C_EIGHT), ctx);
+
+		/* Y3 = L1(L2 - X3) - L3 */
+		ec_subm(y3, l2, x3, ctx);
+		ec_mulm(y3, y3, l1, ctx);
+		ec_subm(y3, y3, l3, ctx);
+	}
+
+#undef x3
+#undef y3
+#undef z3
+#undef t1
+#undef t2
+#undef t3
+#undef l1
+#undef l2
+#undef l3
+}
+
+/*  RESULT = 2 * POINT  (Montgomery version). */
+static void dup_point_montgomery(MPI_POINT result,
+				MPI_POINT point, struct mpi_ec_ctx *ctx)
+{
+	(void)result;
+	(void)point;
+	(void)ctx;
+	log_fatal("%s: %s not yet supported\n",
+			"mpi_ec_dup_point", "Montgomery");
+}
+
+/*  RESULT = 2 * POINT  (Twisted Edwards version). */
+static void dup_point_edwards(MPI_POINT result,
+		MPI_POINT point, struct mpi_ec_ctx *ctx)
+{
+#define X1 (point->x)
+#define Y1 (point->y)
+#define Z1 (point->z)
+#define X3 (result->x)
+#define Y3 (result->y)
+#define Z3 (result->z)
+#define B (ctx->t.scratch[0])
+#define C (ctx->t.scratch[1])
+#define D (ctx->t.scratch[2])
+#define E (ctx->t.scratch[3])
+#define F (ctx->t.scratch[4])
+#define H (ctx->t.scratch[5])
+#define J (ctx->t.scratch[6])
+
+	/* Compute: (X_3 : Y_3 : Z_3) = 2( X_1 : Y_1 : Z_1 ) */
+
+	/* B = (X_1 + Y_1)^2  */
+	ctx->addm(B, X1, Y1, ctx);
+	ctx->pow2(B, B, ctx);
+
+	/* C = X_1^2 */
+	/* D = Y_1^2 */
+	ctx->pow2(C, X1, ctx);
+	ctx->pow2(D, Y1, ctx);
+
+	/* E = aC */
+	if (ctx->dialect == ECC_DIALECT_ED25519)
+		ctx->subm(E, ctx->p, C, ctx);
+	else
+		ctx->mulm(E, ctx->a, C, ctx);
+
+	/* F = E + D */
+	ctx->addm(F, E, D, ctx);
+
+	/* H = Z_1^2 */
+	ctx->pow2(H, Z1, ctx);
+
+	/* J = F - 2H */
+	ctx->mul2(J, H, ctx);
+	ctx->subm(J, F, J, ctx);
+
+	/* X_3 = (B - C - D) · J */
+	ctx->subm(X3, B, C, ctx);
+	ctx->subm(X3, X3, D, ctx);
+	ctx->mulm(X3, X3, J, ctx);
+
+	/* Y_3 = F · (E - D) */
+	ctx->subm(Y3, E, D, ctx);
+	ctx->mulm(Y3, Y3, F, ctx);
+
+	/* Z_3 = F · J */
+	ctx->mulm(Z3, F, J, ctx);
+
+#undef X1
+#undef Y1
+#undef Z1
+#undef X3
+#undef Y3
+#undef Z3
+#undef B
+#undef C
+#undef D
+#undef E
+#undef F
+#undef H
+#undef J
+}
+
+/*  RESULT = 2 * POINT  */
+static void
+mpi_ec_dup_point(MPI_POINT result, MPI_POINT point, struct mpi_ec_ctx *ctx)
+{
+	switch (ctx->model) {
+	case MPI_EC_WEIERSTRASS:
+		dup_point_weierstrass(result, point, ctx);
+		break;
+	case MPI_EC_MONTGOMERY:
+		dup_point_montgomery(result, point, ctx);
+		break;
+	case MPI_EC_EDWARDS:
+		dup_point_edwards(result, point, ctx);
+		break;
+	}
+}
+
+/* RESULT = P1 + P2  (Weierstrass version).*/
+static void add_points_weierstrass(MPI_POINT result,
+		MPI_POINT p1, MPI_POINT p2,
+		struct mpi_ec_ctx *ctx)
+{
+#define x1 (p1->x)
+#define y1 (p1->y)
+#define z1 (p1->z)
+#define x2 (p2->x)
+#define y2 (p2->y)
+#define z2 (p2->z)
+#define x3 (result->x)
+#define y3 (result->y)
+#define z3 (result->z)
+#define l1 (ctx->t.scratch[0])
+#define l2 (ctx->t.scratch[1])
+#define l3 (ctx->t.scratch[2])
+#define l4 (ctx->t.scratch[3])
+#define l5 (ctx->t.scratch[4])
+#define l6 (ctx->t.scratch[5])
+#define l7 (ctx->t.scratch[6])
+#define l8 (ctx->t.scratch[7])
+#define l9 (ctx->t.scratch[8])
+#define t1 (ctx->t.scratch[9])
+#define t2 (ctx->t.scratch[10])
+
+	if ((!mpi_cmp(x1, x2)) && (!mpi_cmp(y1, y2)) && (!mpi_cmp(z1, z2))) {
+		/* Same point; need to call the duplicate function.  */
+		mpi_ec_dup_point(result, p1, ctx);
+	} else if (!mpi_cmp_ui(z1, 0)) {
+		/* P1 is at infinity.  */
+		mpi_set(x3, p2->x);
+		mpi_set(y3, p2->y);
+		mpi_set(z3, p2->z);
+	} else if (!mpi_cmp_ui(z2, 0)) {
+		/* P2 is at infinity.  */
+		mpi_set(x3, p1->x);
+		mpi_set(y3, p1->y);
+		mpi_set(z3, p1->z);
+	} else {
+		int z1_is_one = !mpi_cmp_ui(z1, 1);
+		int z2_is_one = !mpi_cmp_ui(z2, 1);
+
+		/* l1 = x1 z2^2  */
+		/* l2 = x2 z1^2  */
+		if (z2_is_one)
+			mpi_set(l1, x1);
+		else {
+			ec_pow2(l1, z2, ctx);
+			ec_mulm(l1, l1, x1, ctx);
+		}
+		if (z1_is_one)
+			mpi_set(l2, x2);
+		else {
+			ec_pow2(l2, z1, ctx);
+			ec_mulm(l2, l2, x2, ctx);
+		}
+		/* l3 = l1 - l2 */
+		ec_subm(l3, l1, l2, ctx);
+		/* l4 = y1 z2^3  */
+		ec_powm(l4, z2, mpi_const(MPI_C_THREE), ctx);
+		ec_mulm(l4, l4, y1, ctx);
+		/* l5 = y2 z1^3  */
+		ec_powm(l5, z1, mpi_const(MPI_C_THREE), ctx);
+		ec_mulm(l5, l5, y2, ctx);
+		/* l6 = l4 - l5  */
+		ec_subm(l6, l4, l5, ctx);
+
+		if (!mpi_cmp_ui(l3, 0)) {
+			if (!mpi_cmp_ui(l6, 0)) {
+				/* P1 and P2 are the same - use duplicate function. */
+				mpi_ec_dup_point(result, p1, ctx);
+			} else {
+				/* P1 is the inverse of P2.  */
+				mpi_set_ui(x3, 1);
+				mpi_set_ui(y3, 1);
+				mpi_set_ui(z3, 0);
+			}
+		} else {
+			/* l7 = l1 + l2  */
+			ec_addm(l7, l1, l2, ctx);
+			/* l8 = l4 + l5  */
+			ec_addm(l8, l4, l5, ctx);
+			/* z3 = z1 z2 l3  */
+			ec_mulm(z3, z1, z2, ctx);
+			ec_mulm(z3, z3, l3, ctx);
+			/* x3 = l6^2 - l7 l3^2  */
+			ec_pow2(t1, l6, ctx);
+			ec_pow2(t2, l3, ctx);
+			ec_mulm(t2, t2, l7, ctx);
+			ec_subm(x3, t1, t2, ctx);
+			/* l9 = l7 l3^2 - 2 x3  */
+			ec_mul2(t1, x3, ctx);
+			ec_subm(l9, t2, t1, ctx);
+			/* y3 = (l9 l6 - l8 l3^3)/2  */
+			ec_mulm(l9, l9, l6, ctx);
+			ec_powm(t1, l3, mpi_const(MPI_C_THREE), ctx); /* fixme: Use saved value*/
+			ec_mulm(t1, t1, l8, ctx);
+			ec_subm(y3, l9, t1, ctx);
+			ec_mulm(y3, y3, ec_get_two_inv_p(ctx), ctx);
+		}
+	}
+
+#undef x1
+#undef y1
+#undef z1
+#undef x2
+#undef y2
+#undef z2
+#undef x3
+#undef y3
+#undef z3
+#undef l1
+#undef l2
+#undef l3
+#undef l4
+#undef l5
+#undef l6
+#undef l7
+#undef l8
+#undef l9
+#undef t1
+#undef t2
+}
+
+/* RESULT = P1 + P2  (Montgomery version).*/
+static void add_points_montgomery(MPI_POINT result,
+		MPI_POINT p1, MPI_POINT p2,
+		struct mpi_ec_ctx *ctx)
+{
+	(void)result;
+	(void)p1;
+	(void)p2;
+	(void)ctx;
+	log_fatal("%s: %s not yet supported\n",
+			"mpi_ec_add_points", "Montgomery");
+}
+
+/* RESULT = P1 + P2  (Twisted Edwards version).*/
+static void add_points_edwards(MPI_POINT result,
+		MPI_POINT p1, MPI_POINT p2,
+		struct mpi_ec_ctx *ctx)
+{
+#define X1 (p1->x)
+#define Y1 (p1->y)
+#define Z1 (p1->z)
+#define X2 (p2->x)
+#define Y2 (p2->y)
+#define Z2 (p2->z)
+#define X3 (result->x)
+#define Y3 (result->y)
+#define Z3 (result->z)
+#define A (ctx->t.scratch[0])
+#define B (ctx->t.scratch[1])
+#define C (ctx->t.scratch[2])
+#define D (ctx->t.scratch[3])
+#define E (ctx->t.scratch[4])
+#define F (ctx->t.scratch[5])
+#define G (ctx->t.scratch[6])
+#define tmp (ctx->t.scratch[7])
+
+	point_resize(result, ctx);
+
+	/* Compute: (X_3 : Y_3 : Z_3) = (X_1 : Y_1 : Z_1) + (X_2 : Y_2 : Z_3) */
+
+	/* A = Z1 · Z2 */
+	ctx->mulm(A, Z1, Z2, ctx);
+
+	/* B = A^2 */
+	ctx->pow2(B, A, ctx);
+
+	/* C = X1 · X2 */
+	ctx->mulm(C, X1, X2, ctx);
+
+	/* D = Y1 · Y2 */
+	ctx->mulm(D, Y1, Y2, ctx);
+
+	/* E = d · C · D */
+	ctx->mulm(E, ctx->b, C, ctx);
+	ctx->mulm(E, E, D, ctx);
+
+	/* F = B - E */
+	ctx->subm(F, B, E, ctx);
+
+	/* G = B + E */
+	ctx->addm(G, B, E, ctx);
+
+	/* X_3 = A · F · ((X_1 + Y_1) · (X_2 + Y_2) - C - D) */
+	ctx->addm(tmp, X1, Y1, ctx);
+	ctx->addm(X3, X2, Y2, ctx);
+	ctx->mulm(X3, X3, tmp, ctx);
+	ctx->subm(X3, X3, C, ctx);
+	ctx->subm(X3, X3, D, ctx);
+	ctx->mulm(X3, X3, F, ctx);
+	ctx->mulm(X3, X3, A, ctx);
+
+	/* Y_3 = A · G · (D - aC) */
+	if (ctx->dialect == ECC_DIALECT_ED25519) {
+		ctx->addm(Y3, D, C, ctx);
+	} else {
+		ctx->mulm(Y3, ctx->a, C, ctx);
+		ctx->subm(Y3, D, Y3, ctx);
+	}
+	ctx->mulm(Y3, Y3, G, ctx);
+	ctx->mulm(Y3, Y3, A, ctx);
+
+	/* Z_3 = F · G */
+	ctx->mulm(Z3, F, G, ctx);
+
+
+#undef X1
+#undef Y1
+#undef Z1
+#undef X2
+#undef Y2
+#undef Z2
+#undef X3
+#undef Y3
+#undef Z3
+#undef A
+#undef B
+#undef C
+#undef D
+#undef E
+#undef F
+#undef G
+#undef tmp
+}
+
+/* Compute a step of Montgomery Ladder (only use X and Z in the point).
+ * Inputs:  P1, P2, and x-coordinate of DIF = P1 - P1.
+ * Outputs: PRD = 2 * P1 and  SUM = P1 + P2.
+ */
+static void montgomery_ladder(MPI_POINT prd, MPI_POINT sum,
+		MPI_POINT p1, MPI_POINT p2, MPI dif_x,
+		struct mpi_ec_ctx *ctx)
+{
+	ctx->addm(sum->x, p2->x, p2->z, ctx);
+	ctx->subm(p2->z, p2->x, p2->z, ctx);
+	ctx->addm(prd->x, p1->x, p1->z, ctx);
+	ctx->subm(p1->z, p1->x, p1->z, ctx);
+	ctx->mulm(p2->x, p1->z, sum->x, ctx);
+	ctx->mulm(p2->z, prd->x, p2->z, ctx);
+	ctx->pow2(p1->x, prd->x, ctx);
+	ctx->pow2(p1->z, p1->z, ctx);
+	ctx->addm(sum->x, p2->x, p2->z, ctx);
+	ctx->subm(p2->z, p2->x, p2->z, ctx);
+	ctx->mulm(prd->x, p1->x, p1->z, ctx);
+	ctx->subm(p1->z, p1->x, p1->z, ctx);
+	ctx->pow2(sum->x, sum->x, ctx);
+	ctx->pow2(sum->z, p2->z, ctx);
+	ctx->mulm(prd->z, p1->z, ctx->a, ctx); /* CTX->A: (a-2)/4 */
+	ctx->mulm(sum->z, sum->z, dif_x, ctx);
+	ctx->addm(prd->z, p1->x, prd->z, ctx);
+	ctx->mulm(prd->z, prd->z, p1->z, ctx);
+}
+
+/* RESULT = P1 + P2 */
+void mpi_ec_add_points(MPI_POINT result,
+		MPI_POINT p1, MPI_POINT p2,
+		struct mpi_ec_ctx *ctx)
+{
+	switch (ctx->model) {
+	case MPI_EC_WEIERSTRASS:
+		add_points_weierstrass(result, p1, p2, ctx);
+		break;
+	case MPI_EC_MONTGOMERY:
+		add_points_montgomery(result, p1, p2, ctx);
+		break;
+	case MPI_EC_EDWARDS:
+		add_points_edwards(result, p1, p2, ctx);
+		break;
+	}
+}
+EXPORT_SYMBOL_GPL(mpi_ec_add_points);
+
+/* Scalar point multiplication - the main function for ECC.  If takes
+ * an integer SCALAR and a POINT as well as the usual context CTX.
+ * RESULT will be set to the resulting point.
+ */
+void mpi_ec_mul_point(MPI_POINT result,
+			MPI scalar, MPI_POINT point,
+			struct mpi_ec_ctx *ctx)
+{
+	MPI x1, y1, z1, k, h, yy;
+	unsigned int i, loops;
+	struct gcry_mpi_point p1, p2, p1inv;
+
+	if (ctx->model == MPI_EC_EDWARDS) {
+		/* Simple left to right binary method.  Algorithm 3.27 from
+		 * {author={Hankerson, Darrel and Menezes, Alfred J. and Vanstone, Scott},
+		 *  title = {Guide to Elliptic Curve Cryptography},
+		 *  year = {2003}, isbn = {038795273X},
+		 *  url = {http://www.cacr.math.uwaterloo.ca/ecc/},
+		 *  publisher = {Springer-Verlag New York, Inc.}}
+		 */
+		unsigned int nbits;
+		int j;
+
+		if (mpi_cmp(scalar, ctx->p) >= 0)
+			nbits = mpi_get_nbits(scalar);
+		else
+			nbits = mpi_get_nbits(ctx->p);
+
+		mpi_set_ui(result->x, 0);
+		mpi_set_ui(result->y, 1);
+		mpi_set_ui(result->z, 1);
+		point_resize(point, ctx);
+
+		point_resize(result, ctx);
+		point_resize(point, ctx);
+
+		for (j = nbits-1; j >= 0; j--) {
+			mpi_ec_dup_point(result, result, ctx);
+			if (mpi_test_bit(scalar, j))
+				mpi_ec_add_points(result, result, point, ctx);
+		}
+		return;
+	} else if (ctx->model == MPI_EC_MONTGOMERY) {
+		unsigned int nbits;
+		int j;
+		struct gcry_mpi_point p1_, p2_;
+		MPI_POINT q1, q2, prd, sum;
+		unsigned long sw;
+		mpi_size_t rsize;
+		int scalar_copied = 0;
+
+		/* Compute scalar point multiplication with Montgomery Ladder.
+		 * Note that we don't use Y-coordinate in the points at all.
+		 * RESULT->Y will be filled by zero.
+		 */
+
+		nbits = mpi_get_nbits(scalar);
+		point_init(&p1);
+		point_init(&p2);
+		point_init(&p1_);
+		point_init(&p2_);
+		mpi_set_ui(p1.x, 1);
+		mpi_free(p2.x);
+		p2.x = mpi_copy(point->x);
+		mpi_set_ui(p2.z, 1);
+
+		point_resize(&p1, ctx);
+		point_resize(&p2, ctx);
+		point_resize(&p1_, ctx);
+		point_resize(&p2_, ctx);
+
+		mpi_resize(point->x, ctx->p->nlimbs);
+		point->x->nlimbs = ctx->p->nlimbs;
+
+		q1 = &p1;
+		q2 = &p2;
+		prd = &p1_;
+		sum = &p2_;
+
+		for (j = nbits-1; j >= 0; j--) {
+			MPI_POINT t;
+
+			sw = mpi_test_bit(scalar, j);
+			point_swap_cond(q1, q2, sw, ctx);
+			montgomery_ladder(prd, sum, q1, q2, point->x, ctx);
+			point_swap_cond(prd, sum, sw, ctx);
+			t = q1;  q1 = prd;  prd = t;
+			t = q2;  q2 = sum;  sum = t;
+		}
+
+		mpi_clear(result->y);
+		sw = (nbits & 1);
+		point_swap_cond(&p1, &p1_, sw, ctx);
+
+		rsize = p1.z->nlimbs;
+		MPN_NORMALIZE(p1.z->d, rsize);
+		if (rsize == 0) {
+			mpi_set_ui(result->x, 1);
+			mpi_set_ui(result->z, 0);
+		} else {
+			z1 = mpi_new(0);
+			ec_invm(z1, p1.z, ctx);
+			ec_mulm(result->x, p1.x, z1, ctx);
+			mpi_set_ui(result->z, 1);
+			mpi_free(z1);
+		}
+
+		point_free(&p1);
+		point_free(&p2);
+		point_free(&p1_);
+		point_free(&p2_);
+		if (scalar_copied)
+			mpi_free(scalar);
+		return;
+	}
+
+	x1 = mpi_alloc_like(ctx->p);
+	y1 = mpi_alloc_like(ctx->p);
+	h  = mpi_alloc_like(ctx->p);
+	k  = mpi_copy(scalar);
+	yy = mpi_copy(point->y);
+
+	if (mpi_has_sign(k)) {
+		k->sign = 0;
+		ec_invm(yy, yy, ctx);
+	}
+
+	if (!mpi_cmp_ui(point->z, 1)) {
+		mpi_set(x1, point->x);
+		mpi_set(y1, yy);
+	} else {
+		MPI z2, z3;
+
+		z2 = mpi_alloc_like(ctx->p);
+		z3 = mpi_alloc_like(ctx->p);
+		ec_mulm(z2, point->z, point->z, ctx);
+		ec_mulm(z3, point->z, z2, ctx);
+		ec_invm(z2, z2, ctx);
+		ec_mulm(x1, point->x, z2, ctx);
+		ec_invm(z3, z3, ctx);
+		ec_mulm(y1, yy, z3, ctx);
+		mpi_free(z2);
+		mpi_free(z3);
+	}
+	z1 = mpi_copy(mpi_const(MPI_C_ONE));
+
+	mpi_mul(h, k, mpi_const(MPI_C_THREE)); /* h = 3k */
+	loops = mpi_get_nbits(h);
+	if (loops < 2) {
+		/* If SCALAR is zero, the above mpi_mul sets H to zero and thus
+		 * LOOPs will be zero.  To avoid an underflow of I in the main
+		 * loop we set LOOP to 2 and the result to (0,0,0).
+		 */
+		loops = 2;
+		mpi_clear(result->x);
+		mpi_clear(result->y);
+		mpi_clear(result->z);
+	} else {
+		mpi_set(result->x, point->x);
+		mpi_set(result->y, yy);
+		mpi_set(result->z, point->z);
+	}
+	mpi_free(yy); yy = NULL;
+
+	p1.x = x1; x1 = NULL;
+	p1.y = y1; y1 = NULL;
+	p1.z = z1; z1 = NULL;
+	point_init(&p2);
+	point_init(&p1inv);
+
+	/* Invert point: y = p - y mod p  */
+	point_set(&p1inv, &p1);
+	ec_subm(p1inv.y, ctx->p, p1inv.y, ctx);
+
+	for (i = loops-2; i > 0; i--) {
+		mpi_ec_dup_point(result, result, ctx);
+		if (mpi_test_bit(h, i) == 1 && mpi_test_bit(k, i) == 0) {
+			point_set(&p2, result);
+			mpi_ec_add_points(result, &p2, &p1, ctx);
+		}
+		if (mpi_test_bit(h, i) == 0 && mpi_test_bit(k, i) == 1) {
+			point_set(&p2, result);
+			mpi_ec_add_points(result, &p2, &p1inv, ctx);
+		}
+	}
+
+	point_free(&p1);
+	point_free(&p2);
+	point_free(&p1inv);
+	mpi_free(h);
+	mpi_free(k);
+}
+EXPORT_SYMBOL_GPL(mpi_ec_mul_point);
+
+/* Return true if POINT is on the curve described by CTX.  */
+int mpi_ec_curve_point(MPI_POINT point, struct mpi_ec_ctx *ctx)
+{
+	int res = 0;
+	MPI x, y, w;
+
+	x = mpi_new(0);
+	y = mpi_new(0);
+	w = mpi_new(0);
+
+	/* Check that the point is in range.  This needs to be done here and
+	 * not after conversion to affine coordinates.
+	 */
+	if (mpi_cmpabs(point->x, ctx->p) >= 0)
+		goto leave;
+	if (mpi_cmpabs(point->y, ctx->p) >= 0)
+		goto leave;
+	if (mpi_cmpabs(point->z, ctx->p) >= 0)
+		goto leave;
+
+	switch (ctx->model) {
+	case MPI_EC_WEIERSTRASS:
+		{
+			MPI xxx;
+
+			if (mpi_ec_get_affine(x, y, point, ctx))
+				goto leave;
+
+			xxx = mpi_new(0);
+
+			/* y^2 == x^3 + a·x + b */
+			ec_pow2(y, y, ctx);
+
+			ec_pow3(xxx, x, ctx);
+			ec_mulm(w, ctx->a, x, ctx);
+			ec_addm(w, w, ctx->b, ctx);
+			ec_addm(w, w, xxx, ctx);
+
+			if (!mpi_cmp(y, w))
+				res = 1;
+
+			mpi_free(xxx);
+		}
+		break;
+
+	case MPI_EC_MONTGOMERY:
+		{
+#define xx y
+			/* With Montgomery curve, only X-coordinate is valid. */
+			if (mpi_ec_get_affine(x, NULL, point, ctx))
+				goto leave;
+
+			/* The equation is: b * y^2 == x^3 + a · x^2 + x */
+			/* We check if right hand is quadratic residue or not by
+			 * Euler's criterion.
+			 */
+			/* CTX->A has (a-2)/4 and CTX->B has b^-1 */
+			ec_mulm(w, ctx->a, mpi_const(MPI_C_FOUR), ctx);
+			ec_addm(w, w, mpi_const(MPI_C_TWO), ctx);
+			ec_mulm(w, w, x, ctx);
+			ec_pow2(xx, x, ctx);
+			ec_addm(w, w, xx, ctx);
+			ec_addm(w, w, mpi_const(MPI_C_ONE), ctx);
+			ec_mulm(w, w, x, ctx);
+			ec_mulm(w, w, ctx->b, ctx);
+#undef xx
+			/* Compute Euler's criterion: w^(p-1)/2 */
+#define p_minus1 y
+			ec_subm(p_minus1, ctx->p, mpi_const(MPI_C_ONE), ctx);
+			mpi_rshift(p_minus1, p_minus1, 1);
+			ec_powm(w, w, p_minus1, ctx);
+
+			res = !mpi_cmp_ui(w, 1);
+#undef p_minus1
+		}
+		break;
+
+	case MPI_EC_EDWARDS:
+		{
+			if (mpi_ec_get_affine(x, y, point, ctx))
+				goto leave;
+
+			mpi_resize(w, ctx->p->nlimbs);
+			w->nlimbs = ctx->p->nlimbs;
+
+			/* a · x^2 + y^2 - 1 - b · x^2 · y^2 == 0 */
+			ctx->pow2(x, x, ctx);
+			ctx->pow2(y, y, ctx);
+			if (ctx->dialect == ECC_DIALECT_ED25519)
+				ctx->subm(w, ctx->p, x, ctx);
+			else
+				ctx->mulm(w, ctx->a, x, ctx);
+			ctx->addm(w, w, y, ctx);
+			ctx->mulm(x, x, y, ctx);
+			ctx->mulm(x, x, ctx->b, ctx);
+			ctx->subm(w, w, x, ctx);
+			if (!mpi_cmp_ui(w, 1))
+				res = 1;
+		}
+		break;
+	}
+
+leave:
+	mpi_free(w);
+	mpi_free(x);
+	mpi_free(y);
+
+	return res;
+}
+EXPORT_SYMBOL_GPL(mpi_ec_curve_point);
-- 
cgit v1.2.3


From 700cd59db518edc2028df7c830d6cb6f7164573b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 3 Sep 2020 07:41:04 +0200
Subject: vsprintf: use bd_partno in bdev_name

No need to go through the hd_struct to find the partition number.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 lib/vsprintf.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'lib')

diff --git a/lib/vsprintf.c b/lib/vsprintf.c
index afb9521ddf91..14c9a6af1b23 100644
--- a/lib/vsprintf.c
+++ b/lib/vsprintf.c
@@ -940,13 +940,13 @@ char *bdev_name(char *buf, char *end, struct block_device *bdev,
 
 	hd = bdev->bd_disk;
 	buf = string(buf, end, hd->disk_name, spec);
-	if (bdev->bd_part->partno) {
+	if (bdev->bd_partno) {
 		if (isdigit(hd->disk_name[strlen(hd->disk_name)-1])) {
 			if (buf < end)
 				*buf = 'p';
 			buf++;
 		}
-		buf = number(buf, end, bdev->bd_part->partno, spec);
+		buf = number(buf, end, bdev->bd_partno, spec);
 	}
 	return buf;
 }
-- 
cgit v1.2.3


From fb041b598997d63c0f7d7305dfae70046bf66fe1 Mon Sep 17 00:00:00 2001
From: David Laight <David.Laight@ACULAB.COM>
Date: Fri, 25 Sep 2020 06:51:39 +0200
Subject: iov_iter: move rw_copy_check_uvector() into lib/iov_iter.c

This lets the compiler inline it into import_iovec() generating
much better code.

Signed-off-by: David Laight <david.laight@aculab.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 fs/read_write.c | 179 --------------------------------------------------------
 lib/iov_iter.c  | 176 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 176 insertions(+), 179 deletions(-)

(limited to 'lib')

diff --git a/fs/read_write.c b/fs/read_write.c
index 5db58b8c78d0..e5e891a88442 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -752,185 +752,6 @@ static ssize_t do_loop_readv_writev(struct file *filp, struct iov_iter *iter,
 	return ret;
 }
 
-/**
- * rw_copy_check_uvector() - Copy an array of &struct iovec from userspace
- *     into the kernel and check that it is valid.
- *
- * @type: One of %CHECK_IOVEC_ONLY, %READ, or %WRITE.
- * @uvector: Pointer to the userspace array.
- * @nr_segs: Number of elements in userspace array.
- * @fast_segs: Number of elements in @fast_pointer.
- * @fast_pointer: Pointer to (usually small on-stack) kernel array.
- * @ret_pointer: (output parameter) Pointer to a variable that will point to
- *     either @fast_pointer, a newly allocated kernel array, or NULL,
- *     depending on which array was used.
- *
- * This function copies an array of &struct iovec of @nr_segs from
- * userspace into the kernel and checks that each element is valid (e.g.
- * it does not point to a kernel address or cause overflow by being too
- * large, etc.).
- *
- * As an optimization, the caller may provide a pointer to a small
- * on-stack array in @fast_pointer, typically %UIO_FASTIOV elements long
- * (the size of this array, or 0 if unused, should be given in @fast_segs).
- *
- * @ret_pointer will always point to the array that was used, so the
- * caller must take care not to call kfree() on it e.g. in case the
- * @fast_pointer array was used and it was allocated on the stack.
- *
- * Return: The total number of bytes covered by the iovec array on success
- *   or a negative error code on error.
- */
-ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
-			      unsigned long nr_segs, unsigned long fast_segs,
-			      struct iovec *fast_pointer,
-			      struct iovec **ret_pointer)
-{
-	unsigned long seg;
-	ssize_t ret;
-	struct iovec *iov = fast_pointer;
-
-	/*
-	 * SuS says "The readv() function *may* fail if the iovcnt argument
-	 * was less than or equal to 0, or greater than {IOV_MAX}.  Linux has
-	 * traditionally returned zero for zero segments, so...
-	 */
-	if (nr_segs == 0) {
-		ret = 0;
-		goto out;
-	}
-
-	/*
-	 * First get the "struct iovec" from user memory and
-	 * verify all the pointers
-	 */
-	if (nr_segs > UIO_MAXIOV) {
-		ret = -EINVAL;
-		goto out;
-	}
-	if (nr_segs > fast_segs) {
-		iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL);
-		if (iov == NULL) {
-			ret = -ENOMEM;
-			goto out;
-		}
-	}
-	if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
-		ret = -EFAULT;
-		goto out;
-	}
-
-	/*
-	 * According to the Single Unix Specification we should return EINVAL
-	 * if an element length is < 0 when cast to ssize_t or if the
-	 * total length would overflow the ssize_t return value of the
-	 * system call.
-	 *
-	 * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
-	 * overflow case.
-	 */
-	ret = 0;
-	for (seg = 0; seg < nr_segs; seg++) {
-		void __user *buf = iov[seg].iov_base;
-		ssize_t len = (ssize_t)iov[seg].iov_len;
-
-		/* see if we we're about to use an invalid len or if
-		 * it's about to overflow ssize_t */
-		if (len < 0) {
-			ret = -EINVAL;
-			goto out;
-		}
-		if (type >= 0
-		    && unlikely(!access_ok(buf, len))) {
-			ret = -EFAULT;
-			goto out;
-		}
-		if (len > MAX_RW_COUNT - ret) {
-			len = MAX_RW_COUNT - ret;
-			iov[seg].iov_len = len;
-		}
-		ret += len;
-	}
-out:
-	*ret_pointer = iov;
-	return ret;
-}
-
-#ifdef CONFIG_COMPAT
-ssize_t compat_rw_copy_check_uvector(int type,
-		const struct compat_iovec __user *uvector, unsigned long nr_segs,
-		unsigned long fast_segs, struct iovec *fast_pointer,
-		struct iovec **ret_pointer)
-{
-	compat_ssize_t tot_len;
-	struct iovec *iov = *ret_pointer = fast_pointer;
-	ssize_t ret = 0;
-	int seg;
-
-	/*
-	 * SuS says "The readv() function *may* fail if the iovcnt argument
-	 * was less than or equal to 0, or greater than {IOV_MAX}.  Linux has
-	 * traditionally returned zero for zero segments, so...
-	 */
-	if (nr_segs == 0)
-		goto out;
-
-	ret = -EINVAL;
-	if (nr_segs > UIO_MAXIOV)
-		goto out;
-	if (nr_segs > fast_segs) {
-		ret = -ENOMEM;
-		iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL);
-		if (iov == NULL)
-			goto out;
-	}
-	*ret_pointer = iov;
-
-	ret = -EFAULT;
-	if (!access_ok(uvector, nr_segs*sizeof(*uvector)))
-		goto out;
-
-	/*
-	 * Single unix specification:
-	 * We should -EINVAL if an element length is not >= 0 and fitting an
-	 * ssize_t.
-	 *
-	 * In Linux, the total length is limited to MAX_RW_COUNT, there is
-	 * no overflow possibility.
-	 */
-	tot_len = 0;
-	ret = -EINVAL;
-	for (seg = 0; seg < nr_segs; seg++) {
-		compat_uptr_t buf;
-		compat_ssize_t len;
-
-		if (__get_user(len, &uvector->iov_len) ||
-		   __get_user(buf, &uvector->iov_base)) {
-			ret = -EFAULT;
-			goto out;
-		}
-		if (len < 0)	/* size_t not fitting in compat_ssize_t .. */
-			goto out;
-		if (type >= 0 &&
-		    !access_ok(compat_ptr(buf), len)) {
-			ret = -EFAULT;
-			goto out;
-		}
-		if (len > MAX_RW_COUNT - tot_len)
-			len = MAX_RW_COUNT - tot_len;
-		tot_len += len;
-		iov->iov_base = compat_ptr(buf);
-		iov->iov_len = (compat_size_t) len;
-		uvector++;
-		iov++;
-	}
-	ret = tot_len;
-
-out:
-	return ret;
-}
-#endif
-
 static ssize_t do_iter_read(struct file *file, struct iov_iter *iter,
 		loff_t *pos, rwf_t flags)
 {
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 5e40786c8f12..ccea9db3f72b 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -1650,6 +1650,109 @@ const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
 }
 EXPORT_SYMBOL(dup_iter);
 
+/**
+ * rw_copy_check_uvector() - Copy an array of &struct iovec from userspace
+ *     into the kernel and check that it is valid.
+ *
+ * @type: One of %CHECK_IOVEC_ONLY, %READ, or %WRITE.
+ * @uvector: Pointer to the userspace array.
+ * @nr_segs: Number of elements in userspace array.
+ * @fast_segs: Number of elements in @fast_pointer.
+ * @fast_pointer: Pointer to (usually small on-stack) kernel array.
+ * @ret_pointer: (output parameter) Pointer to a variable that will point to
+ *     either @fast_pointer, a newly allocated kernel array, or NULL,
+ *     depending on which array was used.
+ *
+ * This function copies an array of &struct iovec of @nr_segs from
+ * userspace into the kernel and checks that each element is valid (e.g.
+ * it does not point to a kernel address or cause overflow by being too
+ * large, etc.).
+ *
+ * As an optimization, the caller may provide a pointer to a small
+ * on-stack array in @fast_pointer, typically %UIO_FASTIOV elements long
+ * (the size of this array, or 0 if unused, should be given in @fast_segs).
+ *
+ * @ret_pointer will always point to the array that was used, so the
+ * caller must take care not to call kfree() on it e.g. in case the
+ * @fast_pointer array was used and it was allocated on the stack.
+ *
+ * Return: The total number of bytes covered by the iovec array on success
+ *   or a negative error code on error.
+ */
+ssize_t rw_copy_check_uvector(int type, const struct iovec __user *uvector,
+		unsigned long nr_segs, unsigned long fast_segs,
+		struct iovec *fast_pointer, struct iovec **ret_pointer)
+{
+	unsigned long seg;
+	ssize_t ret;
+	struct iovec *iov = fast_pointer;
+
+	/*
+	 * SuS says "The readv() function *may* fail if the iovcnt argument
+	 * was less than or equal to 0, or greater than {IOV_MAX}.  Linux has
+	 * traditionally returned zero for zero segments, so...
+	 */
+	if (nr_segs == 0) {
+		ret = 0;
+		goto out;
+	}
+
+	/*
+	 * First get the "struct iovec" from user memory and
+	 * verify all the pointers
+	 */
+	if (nr_segs > UIO_MAXIOV) {
+		ret = -EINVAL;
+		goto out;
+	}
+	if (nr_segs > fast_segs) {
+		iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL);
+		if (iov == NULL) {
+			ret = -ENOMEM;
+			goto out;
+		}
+	}
+	if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
+		ret = -EFAULT;
+		goto out;
+	}
+
+	/*
+	 * According to the Single Unix Specification we should return EINVAL
+	 * if an element length is < 0 when cast to ssize_t or if the
+	 * total length would overflow the ssize_t return value of the
+	 * system call.
+	 *
+	 * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
+	 * overflow case.
+	 */
+	ret = 0;
+	for (seg = 0; seg < nr_segs; seg++) {
+		void __user *buf = iov[seg].iov_base;
+		ssize_t len = (ssize_t)iov[seg].iov_len;
+
+		/* see if we we're about to use an invalid len or if
+		 * it's about to overflow ssize_t */
+		if (len < 0) {
+			ret = -EINVAL;
+			goto out;
+		}
+		if (type >= 0
+		    && unlikely(!access_ok(buf, len))) {
+			ret = -EFAULT;
+			goto out;
+		}
+		if (len > MAX_RW_COUNT - ret) {
+			len = MAX_RW_COUNT - ret;
+			iov[seg].iov_len = len;
+		}
+		ret += len;
+	}
+out:
+	*ret_pointer = iov;
+	return ret;
+}
+
 /**
  * import_iovec() - Copy an array of &struct iovec from userspace
  *     into the kernel, check that it is valid, and initialize a new
@@ -1695,6 +1798,79 @@ EXPORT_SYMBOL(import_iovec);
 #ifdef CONFIG_COMPAT
 #include <linux/compat.h>
 
+ssize_t compat_rw_copy_check_uvector(int type,
+		const struct compat_iovec __user *uvector,
+		unsigned long nr_segs, unsigned long fast_segs,
+		struct iovec *fast_pointer, struct iovec **ret_pointer)
+{
+	compat_ssize_t tot_len;
+	struct iovec *iov = *ret_pointer = fast_pointer;
+	ssize_t ret = 0;
+	int seg;
+
+	/*
+	 * SuS says "The readv() function *may* fail if the iovcnt argument
+	 * was less than or equal to 0, or greater than {IOV_MAX}.  Linux has
+	 * traditionally returned zero for zero segments, so...
+	 */
+	if (nr_segs == 0)
+		goto out;
+
+	ret = -EINVAL;
+	if (nr_segs > UIO_MAXIOV)
+		goto out;
+	if (nr_segs > fast_segs) {
+		ret = -ENOMEM;
+		iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL);
+		if (iov == NULL)
+			goto out;
+	}
+	*ret_pointer = iov;
+
+	ret = -EFAULT;
+	if (!access_ok(uvector, nr_segs*sizeof(*uvector)))
+		goto out;
+
+	/*
+	 * Single unix specification:
+	 * We should -EINVAL if an element length is not >= 0 and fitting an
+	 * ssize_t.
+	 *
+	 * In Linux, the total length is limited to MAX_RW_COUNT, there is
+	 * no overflow possibility.
+	 */
+	tot_len = 0;
+	ret = -EINVAL;
+	for (seg = 0; seg < nr_segs; seg++) {
+		compat_uptr_t buf;
+		compat_ssize_t len;
+
+		if (__get_user(len, &uvector->iov_len) ||
+		   __get_user(buf, &uvector->iov_base)) {
+			ret = -EFAULT;
+			goto out;
+		}
+		if (len < 0)	/* size_t not fitting in compat_ssize_t .. */
+			goto out;
+		if (type >= 0 &&
+		    !access_ok(compat_ptr(buf), len)) {
+			ret = -EFAULT;
+			goto out;
+		}
+		if (len > MAX_RW_COUNT - tot_len)
+			len = MAX_RW_COUNT - tot_len;
+		tot_len += len;
+		iov->iov_base = compat_ptr(buf);
+		iov->iov_len = (compat_size_t) len;
+		uvector++;
+		iov++;
+	}
+	ret = tot_len;
+
+out:
+	return ret;
+}
+
 ssize_t compat_import_iovec(int type,
 		const struct compat_iovec __user * uvector,
 		unsigned nr_segs, unsigned fast_segs,
-- 
cgit v1.2.3


From e5e5fcef600e94d83c6542cdcca3ab6dada95946 Mon Sep 17 00:00:00 2001
From: Jim Cromie <jim.cromie@gmail.com>
Date: Mon, 21 Sep 2020 13:04:33 -0600
Subject: dyndbg: use keyword, arg varnames for query term pairs

optimize for clarity by replacing word[i,i+1] refs with temps.
no functional changes.

Signed-off-by: Jim Cromie <jim.cromie@gmail.com>
Link: https://lore.kernel.org/r/20200921190433.1149521-3-jim.cromie@gmail.com
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 lib/dynamic_debug.c | 27 +++++++++++++++------------
 1 file changed, 15 insertions(+), 12 deletions(-)

(limited to 'lib')

diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c
index 2d4dfd44b0fa..bd7b3aaa93c3 100644
--- a/lib/dynamic_debug.c
+++ b/lib/dynamic_debug.c
@@ -384,10 +384,13 @@ static int ddebug_parse_query(char *words[], int nwords,
 		query->module = modname;
 
 	for (i = 0; i < nwords; i += 2) {
-		if (!strcmp(words[i], "func")) {
-			rc = check_set(&query->function, words[i+1], "func");
-		} else if (!strcmp(words[i], "file")) {
-			if (check_set(&query->filename, words[i+1], "file"))
+		char *keyword = words[i];
+		char *arg = words[i+1];
+
+		if (!strcmp(keyword, "func")) {
+			rc = check_set(&query->function, arg, "func");
+		} else if (!strcmp(keyword, "file")) {
+			if (check_set(&query->filename, arg, "file"))
 				return -EINVAL;
 
 			/* tail :$info is function or line-range */
@@ -403,18 +406,18 @@ static int ddebug_parse_query(char *words[], int nwords,
 				if (parse_linerange(query, fline))
 					return -EINVAL;
 			}
-		} else if (!strcmp(words[i], "module")) {
-			rc = check_set(&query->module, words[i+1], "module");
-		} else if (!strcmp(words[i], "format")) {
-			string_unescape_inplace(words[i+1], UNESCAPE_SPACE |
+		} else if (!strcmp(keyword, "module")) {
+			rc = check_set(&query->module, arg, "module");
+		} else if (!strcmp(keyword, "format")) {
+			string_unescape_inplace(arg, UNESCAPE_SPACE |
 							    UNESCAPE_OCTAL |
 							    UNESCAPE_SPECIAL);
-			rc = check_set(&query->format, words[i+1], "format");
-		} else if (!strcmp(words[i], "line")) {
-			if (parse_linerange(query, words[i+1]))
+			rc = check_set(&query->format, arg, "format");
+		} else if (!strcmp(keyword, "line")) {
+			if (parse_linerange(query, arg))
 				return -EINVAL;
 		} else {
-			pr_err("unknown keyword \"%s\"\n", words[i]);
+			pr_err("unknown keyword \"%s\"\n", keyword);
 			return -EINVAL;
 		}
 		if (rc)
-- 
cgit v1.2.3


From f2d10ff4a903813df767a4b56b651a26b938df06 Mon Sep 17 00:00:00 2001
From: Daniel Thompson <daniel.thompson@linaro.org>
Date: Sun, 27 Sep 2020 22:15:29 +0100
Subject: kgdb: Honour the kprobe blocklist when setting breakpoints

Currently kgdb has absolutely no safety rails in place to discourage or
prevent a user from placing a breakpoint in dangerous places such as
the debugger's own trap entry/exit and other places where it is not safe
to take synchronous traps.

Introduce a new config symbol KGDB_HONOUR_BLOCKLIST and modify the
default implementation of kgdb_validate_break_address() so that we use
the kprobe blocklist to prohibit instrumentation of critical functions
if the config symbol is set. The config symbol dependencies are set to
ensure that the blocklist will be enabled by default if we enable KGDB
and are compiling for an architecture where we HAVE_KPROBES.

Suggested-by: Peter Zijlstra <peterz@infradead.org>
Reviewed-by: Douglas Anderson <dianders@chromium.org>
Reviewed-by: Masami Hiramatsu <mhiramat@kernel.org>
Link: https://lore.kernel.org/r/20200927211531.1380577-2-daniel.thompson@linaro.org
Signed-off-by: Daniel Thompson <daniel.thompson@linaro.org>
---
 include/linux/kgdb.h      | 18 ++++++++++++++++++
 kernel/debug/debug_core.c |  4 ++++
 kernel/debug/kdb/kdb_bp.c |  9 +++++++++
 lib/Kconfig.kgdb          | 15 +++++++++++++++
 4 files changed, 46 insertions(+)

(limited to 'lib')

diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h
index 477b8b7c908f..0d6cf64c8bb1 100644
--- a/include/linux/kgdb.h
+++ b/include/linux/kgdb.h
@@ -16,6 +16,7 @@
 #include <linux/linkage.h>
 #include <linux/init.h>
 #include <linux/atomic.h>
+#include <linux/kprobes.h>
 #ifdef CONFIG_HAVE_ARCH_KGDB
 #include <asm/kgdb.h>
 #endif
@@ -335,6 +336,23 @@ extern int kgdb_nmicallin(int cpu, int trapnr, void *regs, int err_code,
 			  atomic_t *snd_rdy);
 extern void gdbstub_exit(int status);
 
+/*
+ * kgdb and kprobes both use the same (kprobe) blocklist (which makes sense
+ * given they are both typically hooked up to the same trap meaning on most
+ * architectures one cannot be used to debug the other)
+ *
+ * However on architectures where kprobes is not (yet) implemented we permit
+ * breakpoints everywhere rather than blocking everything by default.
+ */
+static inline bool kgdb_within_blocklist(unsigned long addr)
+{
+#ifdef CONFIG_KGDB_HONOUR_BLOCKLIST
+	return within_kprobe_blacklist(addr);
+#else
+	return false;
+#endif
+}
+
 extern int			kgdb_single_step;
 extern atomic_t			kgdb_active;
 #define in_dbg_master() \
diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c
index 165e5b0c2083..6b9383fa8278 100644
--- a/kernel/debug/debug_core.c
+++ b/kernel/debug/debug_core.c
@@ -180,6 +180,10 @@ int __weak kgdb_validate_break_address(unsigned long addr)
 {
 	struct kgdb_bkpt tmp;
 	int err;
+
+	if (kgdb_within_blocklist(addr))
+		return -EINVAL;
+
 	/* Validate setting the breakpoint and then removing it.  If the
 	 * remove fails, the kernel needs to emit a bad message because we
 	 * are deep trouble not being able to put things back the way we
diff --git a/kernel/debug/kdb/kdb_bp.c b/kernel/debug/kdb/kdb_bp.c
index d7ebb2c79cb8..ec4940146612 100644
--- a/kernel/debug/kdb/kdb_bp.c
+++ b/kernel/debug/kdb/kdb_bp.c
@@ -306,6 +306,15 @@ static int kdb_bp(int argc, const char **argv)
 	if (!template.bp_addr)
 		return KDB_BADINT;
 
+	/*
+	 * This check is redundant (since the breakpoint machinery should
+	 * be doing the same check during kdb_bp_install) but gives the
+	 * user immediate feedback.
+	 */
+	diag = kgdb_validate_break_address(template.bp_addr);
+	if (diag)
+		return diag;
+
 	/*
 	 * Find an empty bp structure to allocate
 	 */
diff --git a/lib/Kconfig.kgdb b/lib/Kconfig.kgdb
index 256f2486f9bd..05dae05b6cc9 100644
--- a/lib/Kconfig.kgdb
+++ b/lib/Kconfig.kgdb
@@ -24,6 +24,21 @@ menuconfig KGDB
 
 if KGDB
 
+config KGDB_HONOUR_BLOCKLIST
+	bool "KGDB: use kprobe blocklist to prohibit unsafe breakpoints"
+	depends on HAVE_KPROBES
+	depends on MODULES
+	select KPROBES
+	default y
+	help
+	  If set to Y the debug core will use the kprobe blocklist to
+	  identify symbols where it is unsafe to set breakpoints.
+	  In particular this disallows instrumentation of functions
+	  called during debug trap handling and thus makes it very
+	  difficult to inadvertently provoke recursive trap handling.
+
+	  If unsure, say Y.
+
 config KGDB_SERIAL_CONSOLE
 	tristate "KGDB: use kgdb over the serial console"
 	select CONSOLE_POLL
-- 
cgit v1.2.3


From 0fd16012adc0a994a7ce980a78e22e4de6220778 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Tue, 29 Sep 2020 12:09:55 +0200
Subject: lib: string_helpers: provide kfree_strarray()

There's a common pattern of dynamically allocating an array of char
pointers and then also dynamically allocating each string in this
array. Provide a helper for freeing such a string array with one call.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Reviewed-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
---
 include/linux/string_helpers.h |  2 ++
 lib/string_helpers.c           | 23 +++++++++++++++++++++++
 2 files changed, 25 insertions(+)

(limited to 'lib')

diff --git a/include/linux/string_helpers.h b/include/linux/string_helpers.h
index 86f150c2a6b6..fa06dcdc481e 100644
--- a/include/linux/string_helpers.h
+++ b/include/linux/string_helpers.h
@@ -94,4 +94,6 @@ char *kstrdup_quotable(const char *src, gfp_t gfp);
 char *kstrdup_quotable_cmdline(struct task_struct *task, gfp_t gfp);
 char *kstrdup_quotable_file(struct file *file, gfp_t gfp);
 
+void kfree_strarray(char **array, size_t n);
+
 #endif
diff --git a/lib/string_helpers.c b/lib/string_helpers.c
index 963050c0283e..7f2d5fbaf243 100644
--- a/lib/string_helpers.c
+++ b/lib/string_helpers.c
@@ -649,3 +649,26 @@ char *kstrdup_quotable_file(struct file *file, gfp_t gfp)
 	return pathname;
 }
 EXPORT_SYMBOL_GPL(kstrdup_quotable_file);
+
+/**
+ * kfree_strarray - free a number of dynamically allocated strings contained
+ *                  in an array and the array itself
+ *
+ * @array: Dynamically allocated array of strings to free.
+ * @n: Number of strings (starting from the beginning of the array) to free.
+ *
+ * Passing a non-NULL @array and @n == 0 as well as NULL @array are valid
+ * use-cases. If @array is NULL, the function does nothing.
+ */
+void kfree_strarray(char **array, size_t n)
+{
+	unsigned int i;
+
+	if (!array)
+		return;
+
+	for (i = 0; i < n; i++)
+		kfree(array[i]);
+	kfree(array);
+}
+EXPORT_SYMBOL_GPL(kfree_strarray);
-- 
cgit v1.2.3


From 88451f2cd3cec2abc30debdf129422d2699d1eba Mon Sep 17 00:00:00 2001
From: Zqiang <qiang.zhang@windriver.com>
Date: Tue, 8 Sep 2020 14:27:09 +0800
Subject: debugobjects: Free per CPU pool after CPU unplug

If a CPU is offlined the debug objects per CPU pool is not cleaned up. If
the CPU is never onlined again then the objects in the pool are wasted.

Add a CPU hotplug callback which is invoked after the CPU is dead to free
the pool.

[ tglx: Massaged changelog and added comment about remote access safety ]

Signed-off-by: Zqiang <qiang.zhang@windriver.com>
Signed-off-by: Thomas Gleixner <tglx@linutronix.de>
Cc: Waiman Long <longman@redhat.com>
Link: https://lore.kernel.org/r/20200908062709.11441-1-qiang.zhang@windriver.com
---
 include/linux/cpuhotplug.h |  1 +
 lib/debugobjects.c         | 25 +++++++++++++++++++++++++
 2 files changed, 26 insertions(+)

(limited to 'lib')

diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h
index bf9181cef444..6f524bbf71a2 100644
--- a/include/linux/cpuhotplug.h
+++ b/include/linux/cpuhotplug.h
@@ -36,6 +36,7 @@ enum cpuhp_state {
 	CPUHP_X86_MCE_DEAD,
 	CPUHP_VIRT_NET_DEAD,
 	CPUHP_SLUB_DEAD,
+	CPUHP_DEBUG_OBJ_DEAD,
 	CPUHP_MM_WRITEBACK_DEAD,
 	CPUHP_MM_VMSTAT_DEAD,
 	CPUHP_SOFTIRQ_DEAD,
diff --git a/lib/debugobjects.c b/lib/debugobjects.c
index e2a3171b6c63..9e14ae02306b 100644
--- a/lib/debugobjects.c
+++ b/lib/debugobjects.c
@@ -19,6 +19,7 @@
 #include <linux/slab.h>
 #include <linux/hash.h>
 #include <linux/kmemleak.h>
+#include <linux/cpu.h>
 
 #define ODEBUG_HASH_BITS	14
 #define ODEBUG_HASH_SIZE	(1 << ODEBUG_HASH_BITS)
@@ -433,6 +434,25 @@ static void free_object(struct debug_obj *obj)
 	}
 }
 
+#ifdef CONFIG_HOTPLUG_CPU
+static int object_cpu_offline(unsigned int cpu)
+{
+	struct debug_percpu_free *percpu_pool;
+	struct hlist_node *tmp;
+	struct debug_obj *obj;
+
+	/* Remote access is safe as the CPU is dead already */
+	percpu_pool = per_cpu_ptr(&percpu_obj_pool, cpu);
+	hlist_for_each_entry_safe(obj, tmp, &percpu_pool->free_objs, node) {
+		hlist_del(&obj->node);
+		kmem_cache_free(obj_cache, obj);
+	}
+	percpu_pool->obj_free = 0;
+
+	return 0;
+}
+#endif
+
 /*
  * We run out of memory. That means we probably have tons of objects
  * allocated.
@@ -1367,6 +1387,11 @@ void __init debug_objects_mem_init(void)
 	} else
 		debug_objects_selftest();
 
+#ifdef CONFIG_HOTPLUG_CPU
+	cpuhp_setup_state_nocalls(CPUHP_DEBUG_OBJ_DEAD, "object:offline", NULL,
+					object_cpu_offline);
+#endif
+
 	/*
 	 * Increase the thresholds for allocating and freeing objects
 	 * according to the number of possible CPUs available in the system.
-- 
cgit v1.2.3


From ae6ee6ae2b4f4534664561161ef8087e08aff2d9 Mon Sep 17 00:00:00 2001
From: Herbert Xu <herbert@gondor.apana.org.au>
Date: Fri, 25 Sep 2020 18:19:55 +1000
Subject: lib/mpi: Fix unused variable warnings

This patch removes a number of unused variables and marks others
as unused in order to silence compiler warnings about them.

Fixes: a8ea8bdd9df9 ("lib/mpi: Extend the MPI library")
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
Tested-by: Tianjia Zhang <tianjia.zhang@linux.alibaba.com>
Signed-off-by: Herbert Xu <herbert@gondor.apana.org.au>
---
 lib/mpi/mpi-div.c      | 4 ----
 lib/mpi/mpi-internal.h | 2 +-
 lib/mpi/mpi-mul.c      | 3 ---
 lib/mpi/mpih-div.c     | 4 ++--
 4 files changed, 3 insertions(+), 10 deletions(-)

(limited to 'lib')

diff --git a/lib/mpi/mpi-div.c b/lib/mpi/mpi-div.c
index 21332dab97d4..45beab8b9e9e 100644
--- a/lib/mpi/mpi-div.c
+++ b/lib/mpi/mpi-div.c
@@ -92,7 +92,6 @@ void mpi_tdiv_qr(MPI quot, MPI rem, MPI num, MPI den)
 	unsigned int normalization_steps;
 	mpi_limb_t q_limb;
 	mpi_ptr_t marker[5];
-	unsigned int marker_nlimbs[5];
 	int markidx = 0;
 
 	/* Ensure space is enough for quotient and remainder.
@@ -152,7 +151,6 @@ void mpi_tdiv_qr(MPI quot, MPI rem, MPI num, MPI den)
 		 * numerator would be gradually overwritten by the quotient limbs.
 		 */
 		if (qp == np) { /* Copy NP object to temporary space.  */
-			marker_nlimbs[markidx] = nsize;
 			np = marker[markidx++] = mpi_alloc_limb_space(nsize);
 			MPN_COPY(np, qp, nsize);
 		}
@@ -173,7 +171,6 @@ void mpi_tdiv_qr(MPI quot, MPI rem, MPI num, MPI den)
 		 * the most significant word.  Use temporary storage not to clobber
 		 * the original contents of the denominator.
 		 */
-		marker_nlimbs[markidx] = dsize;
 		tp = marker[markidx++] = mpi_alloc_limb_space(dsize);
 		mpihelp_lshift(tp, dp, dsize, normalization_steps);
 		dp = tp;
@@ -195,7 +192,6 @@ void mpi_tdiv_qr(MPI quot, MPI rem, MPI num, MPI den)
 		if (dp == rp || (quot && (dp == qp))) {
 			mpi_ptr_t tp;
 
-			marker_nlimbs[markidx] = dsize;
 			tp = marker[markidx++] = mpi_alloc_limb_space(dsize);
 			MPN_COPY(tp, dp, dsize);
 			dp = tp;
diff --git a/lib/mpi/mpi-internal.h b/lib/mpi/mpi-internal.h
index d29c4537c3a3..554002182db1 100644
--- a/lib/mpi/mpi-internal.h
+++ b/lib/mpi/mpi-internal.h
@@ -114,7 +114,7 @@ typedef int mpi_size_t;		/* (must be a signed type) */
  */
 #define UDIV_QRNND_PREINV(q, r, nh, nl, d, di)				\
 	do {								\
-		mpi_limb_t _ql;						\
+		mpi_limb_t _ql __maybe_unused;				\
 		mpi_limb_t _q, _r;					\
 		mpi_limb_t _xh, _xl;					\
 		umul_ppmm(_q, _ql, (nh), (di));				\
diff --git a/lib/mpi/mpi-mul.c b/lib/mpi/mpi-mul.c
index 587e6335cc12..8f5fa200f297 100644
--- a/lib/mpi/mpi-mul.c
+++ b/lib/mpi/mpi-mul.c
@@ -21,7 +21,6 @@ void mpi_mul(MPI w, MPI u, MPI v)
 	int usign, vsign, sign_product;
 	int assign_wp = 0;
 	mpi_ptr_t tmp_limb = NULL;
-	unsigned int tmp_limb_nlimbs = 0;
 
 	if (u->nlimbs < v->nlimbs) {
 		/* Swap U and V. */
@@ -55,7 +54,6 @@ void mpi_mul(MPI w, MPI u, MPI v)
 	} else { /* Make U and V not overlap with W.	*/
 		if (wp == up) {
 			/* W and U are identical.  Allocate temporary space for U. */
-			tmp_limb_nlimbs = usize;
 			up = tmp_limb = mpi_alloc_limb_space(usize);
 			/* Is V identical too?  Keep it identical with U.  */
 			if (wp == vp)
@@ -64,7 +62,6 @@ void mpi_mul(MPI w, MPI u, MPI v)
 			MPN_COPY(up, wp, usize);
 		} else if (wp == vp) {
 			/* W and V are identical.  Allocate temporary space for V. */
-			tmp_limb_nlimbs = vsize;
 			vp = tmp_limb = mpi_alloc_limb_space(vsize);
 			/* Copy to the temporary space.  */
 			MPN_COPY(vp, wp, vsize);
diff --git a/lib/mpi/mpih-div.c b/lib/mpi/mpih-div.c
index 182a656a1ba0..be70ee2e42d3 100644
--- a/lib/mpi/mpih-div.c
+++ b/lib/mpi/mpih-div.c
@@ -31,7 +31,7 @@ mpihelp_mod_1(mpi_ptr_t dividend_ptr, mpi_size_t dividend_size,
 {
 	mpi_size_t i;
 	mpi_limb_t n1, n0, r;
-	mpi_limb_t dummy;
+	mpi_limb_t dummy __maybe_unused;
 
 	/* Botch: Should this be handled at all?  Rely on callers?	*/
 	if (!dividend_size)
@@ -382,7 +382,7 @@ mpihelp_divmod_1(mpi_ptr_t quot_ptr,
 {
 	mpi_size_t i;
 	mpi_limb_t n1, n0, r;
-	mpi_limb_t dummy;
+	mpi_limb_t dummy __maybe_unused;
 
 	if (!dividend_size)
 		return 0;
-- 
cgit v1.2.3


From bfdc59701d6d100c99c3b987bcffd1c204e393c8 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 25 Sep 2020 06:51:40 +0200
Subject: iov_iter: refactor rw_copy_check_uvector and import_iovec

Split rw_copy_check_uvector into two new helpers with more sensible
calling conventions:

 - iovec_from_user copies a iovec from userspace either into the provided
   stack buffer if it fits, or allocates a new buffer for it.  Returns
   the actually used iovec.  It also verifies that iov_len does fit a
   signed type, and handles compat iovecs if the compat flag is set.
 - __import_iovec consolidates the native and compat versions of
   import_iovec. It calls iovec_from_user, then validates each iovec
   actually points to user addresses, and ensures the total length
   doesn't overflow.

This has two major implications:

 - the access_process_vm case loses the total lenght checking, which
   wasn't required anyway, given that each call receives two iovecs
   for the local and remote side of the operation, and it verifies
   the total length on the local side already.
 - instead of a single loop there now are two loops over the iovecs.
   Given that the iovecs are cache hot this doesn't make a major
   difference

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/compat.h |  16 +--
 include/linux/fs.h     |  13 ---
 include/linux/uio.h    |  12 +-
 lib/iov_iter.c         | 300 +++++++++++++++++++------------------------------
 mm/process_vm_access.c |  34 +++---
 5 files changed, 143 insertions(+), 232 deletions(-)

(limited to 'lib')

diff --git a/include/linux/compat.h b/include/linux/compat.h
index 790be5ffc12c..cebcaac68aec 100644
--- a/include/linux/compat.h
+++ b/include/linux/compat.h
@@ -91,6 +91,11 @@
 	static inline long __do_compat_sys##name(__MAP(x,__SC_DECL,__VA_ARGS__))
 #endif /* COMPAT_SYSCALL_DEFINEx */
 
+struct compat_iovec {
+	compat_uptr_t	iov_base;
+	compat_size_t	iov_len;
+};
+
 #ifdef CONFIG_COMPAT
 
 #ifndef compat_user_stack_pointer
@@ -248,11 +253,6 @@ typedef struct compat_siginfo {
 	} _sifields;
 } compat_siginfo_t;
 
-struct compat_iovec {
-	compat_uptr_t	iov_base;
-	compat_size_t	iov_len;
-};
-
 struct compat_rlimit {
 	compat_ulong_t	rlim_cur;
 	compat_ulong_t	rlim_max;
@@ -451,12 +451,6 @@ extern long compat_arch_ptrace(struct task_struct *child, compat_long_t request,
 
 struct epoll_event;	/* fortunately, this one is fixed-layout */
 
-extern ssize_t compat_rw_copy_check_uvector(int type,
-		const struct compat_iovec __user *uvector,
-		unsigned long nr_segs,
-		unsigned long fast_segs, struct iovec *fast_pointer,
-		struct iovec **ret_pointer);
-
 extern void __user *compat_alloc_user_space(unsigned long len);
 
 int compat_restore_altstack(const compat_stack_t __user *uss);
diff --git a/include/linux/fs.h b/include/linux/fs.h
index e019ea2f1347..b9fdac75ba06 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -178,14 +178,6 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset,
 /* File supports async buffered reads */
 #define FMODE_BUF_RASYNC	((__force fmode_t)0x40000000)
 
-/*
- * Flag for rw_copy_check_uvector and compat_rw_copy_check_uvector
- * that indicates that they should check the contents of the iovec are
- * valid, but not check the memory that the iovec elements
- * points too.
- */
-#define CHECK_IOVEC_ONLY -1
-
 /*
  * Attribute flags.  These should be or-ed together to figure out what
  * has been changed!
@@ -1887,11 +1879,6 @@ static inline int call_mmap(struct file *file, struct vm_area_struct *vma)
 	return file->f_op->mmap(file, vma);
 }
 
-ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector,
-			      unsigned long nr_segs, unsigned long fast_segs,
-			      struct iovec *fast_pointer,
-			      struct iovec **ret_pointer);
-
 extern ssize_t vfs_read(struct file *, char __user *, size_t, loff_t *);
 extern ssize_t vfs_write(struct file *, const char __user *, size_t, loff_t *);
 extern ssize_t vfs_readv(struct file *, const struct iovec __user *,
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 3835a8a8e9ea..92c11fe41c62 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -266,9 +266,15 @@ bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, struct
 size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp,
 		struct iov_iter *i);
 
-ssize_t import_iovec(int type, const struct iovec __user * uvector,
-		 unsigned nr_segs, unsigned fast_segs,
-		 struct iovec **iov, struct iov_iter *i);
+struct iovec *iovec_from_user(const struct iovec __user *uvector,
+		unsigned long nr_segs, unsigned long fast_segs,
+		struct iovec *fast_iov, bool compat);
+ssize_t import_iovec(int type, const struct iovec __user *uvec,
+		 unsigned nr_segs, unsigned fast_segs, struct iovec **iovp,
+		 struct iov_iter *i);
+ssize_t __import_iovec(int type, const struct iovec __user *uvec,
+		 unsigned nr_segs, unsigned fast_segs, struct iovec **iovp,
+		 struct iov_iter *i, bool compat);
 
 #ifdef CONFIG_COMPAT
 struct compat_iovec;
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index ccea9db3f72b..e6cb51da3424 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -7,6 +7,7 @@
 #include <linux/slab.h>
 #include <linux/vmalloc.h>
 #include <linux/splice.h>
+#include <linux/compat.h>
 #include <net/checksum.h>
 #include <linux/scatterlist.h>
 #include <linux/instrumented.h>
@@ -1650,107 +1651,133 @@ const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags)
 }
 EXPORT_SYMBOL(dup_iter);
 
-/**
- * rw_copy_check_uvector() - Copy an array of &struct iovec from userspace
- *     into the kernel and check that it is valid.
- *
- * @type: One of %CHECK_IOVEC_ONLY, %READ, or %WRITE.
- * @uvector: Pointer to the userspace array.
- * @nr_segs: Number of elements in userspace array.
- * @fast_segs: Number of elements in @fast_pointer.
- * @fast_pointer: Pointer to (usually small on-stack) kernel array.
- * @ret_pointer: (output parameter) Pointer to a variable that will point to
- *     either @fast_pointer, a newly allocated kernel array, or NULL,
- *     depending on which array was used.
- *
- * This function copies an array of &struct iovec of @nr_segs from
- * userspace into the kernel and checks that each element is valid (e.g.
- * it does not point to a kernel address or cause overflow by being too
- * large, etc.).
- *
- * As an optimization, the caller may provide a pointer to a small
- * on-stack array in @fast_pointer, typically %UIO_FASTIOV elements long
- * (the size of this array, or 0 if unused, should be given in @fast_segs).
- *
- * @ret_pointer will always point to the array that was used, so the
- * caller must take care not to call kfree() on it e.g. in case the
- * @fast_pointer array was used and it was allocated on the stack.
- *
- * Return: The total number of bytes covered by the iovec array on success
- *   or a negative error code on error.
- */
-ssize_t rw_copy_check_uvector(int type, const struct iovec __user *uvector,
-		unsigned long nr_segs, unsigned long fast_segs,
-		struct iovec *fast_pointer, struct iovec **ret_pointer)
+static int copy_compat_iovec_from_user(struct iovec *iov,
+		const struct iovec __user *uvec, unsigned long nr_segs)
+{
+	const struct compat_iovec __user *uiov =
+		(const struct compat_iovec __user *)uvec;
+	int ret = -EFAULT, i;
+
+	if (!user_access_begin(uvec, nr_segs * sizeof(*uvec)))
+		return -EFAULT;
+
+	for (i = 0; i < nr_segs; i++) {
+		compat_uptr_t buf;
+		compat_ssize_t len;
+
+		unsafe_get_user(len, &uiov[i].iov_len, uaccess_end);
+		unsafe_get_user(buf, &uiov[i].iov_base, uaccess_end);
+
+		/* check for compat_size_t not fitting in compat_ssize_t .. */
+		if (len < 0) {
+			ret = -EINVAL;
+			goto uaccess_end;
+		}
+		iov[i].iov_base = compat_ptr(buf);
+		iov[i].iov_len = len;
+	}
+
+	ret = 0;
+uaccess_end:
+	user_access_end();
+	return ret;
+}
+
+static int copy_iovec_from_user(struct iovec *iov,
+		const struct iovec __user *uvec, unsigned long nr_segs)
 {
 	unsigned long seg;
-	ssize_t ret;
-	struct iovec *iov = fast_pointer;
 
-	/*
-	 * SuS says "The readv() function *may* fail if the iovcnt argument
-	 * was less than or equal to 0, or greater than {IOV_MAX}.  Linux has
-	 * traditionally returned zero for zero segments, so...
-	 */
-	if (nr_segs == 0) {
-		ret = 0;
-		goto out;
+	if (copy_from_user(iov, uvec, nr_segs * sizeof(*uvec)))
+		return -EFAULT;
+	for (seg = 0; seg < nr_segs; seg++) {
+		if ((ssize_t)iov[seg].iov_len < 0)
+			return -EINVAL;
 	}
 
+	return 0;
+}
+
+struct iovec *iovec_from_user(const struct iovec __user *uvec,
+		unsigned long nr_segs, unsigned long fast_segs,
+		struct iovec *fast_iov, bool compat)
+{
+	struct iovec *iov = fast_iov;
+	int ret;
+
 	/*
-	 * First get the "struct iovec" from user memory and
-	 * verify all the pointers
+	 * SuS says "The readv() function *may* fail if the iovcnt argument was
+	 * less than or equal to 0, or greater than {IOV_MAX}.  Linux has
+	 * traditionally returned zero for zero segments, so...
 	 */
-	if (nr_segs > UIO_MAXIOV) {
-		ret = -EINVAL;
-		goto out;
-	}
+	if (nr_segs == 0)
+		return iov;
+	if (nr_segs > UIO_MAXIOV)
+		return ERR_PTR(-EINVAL);
 	if (nr_segs > fast_segs) {
 		iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL);
-		if (iov == NULL) {
-			ret = -ENOMEM;
-			goto out;
-		}
+		if (!iov)
+			return ERR_PTR(-ENOMEM);
 	}
-	if (copy_from_user(iov, uvector, nr_segs*sizeof(*uvector))) {
-		ret = -EFAULT;
-		goto out;
+
+	if (compat)
+		ret = copy_compat_iovec_from_user(iov, uvec, nr_segs);
+	else
+		ret = copy_iovec_from_user(iov, uvec, nr_segs);
+	if (ret) {
+		if (iov != fast_iov)
+			kfree(iov);
+		return ERR_PTR(ret);
+	}
+
+	return iov;
+}
+
+ssize_t __import_iovec(int type, const struct iovec __user *uvec,
+		 unsigned nr_segs, unsigned fast_segs, struct iovec **iovp,
+		 struct iov_iter *i, bool compat)
+{
+	ssize_t total_len = 0;
+	unsigned long seg;
+	struct iovec *iov;
+
+	iov = iovec_from_user(uvec, nr_segs, fast_segs, *iovp, compat);
+	if (IS_ERR(iov)) {
+		*iovp = NULL;
+		return PTR_ERR(iov);
 	}
 
 	/*
-	 * According to the Single Unix Specification we should return EINVAL
-	 * if an element length is < 0 when cast to ssize_t or if the
-	 * total length would overflow the ssize_t return value of the
-	 * system call.
+	 * According to the Single Unix Specification we should return EINVAL if
+	 * an element length is < 0 when cast to ssize_t or if the total length
+	 * would overflow the ssize_t return value of the system call.
 	 *
 	 * Linux caps all read/write calls to MAX_RW_COUNT, and avoids the
 	 * overflow case.
 	 */
-	ret = 0;
 	for (seg = 0; seg < nr_segs; seg++) {
-		void __user *buf = iov[seg].iov_base;
 		ssize_t len = (ssize_t)iov[seg].iov_len;
 
-		/* see if we we're about to use an invalid len or if
-		 * it's about to overflow ssize_t */
-		if (len < 0) {
-			ret = -EINVAL;
-			goto out;
+		if (!access_ok(iov[seg].iov_base, len)) {
+			if (iov != *iovp)
+				kfree(iov);
+			*iovp = NULL;
+			return -EFAULT;
 		}
-		if (type >= 0
-		    && unlikely(!access_ok(buf, len))) {
-			ret = -EFAULT;
-			goto out;
-		}
-		if (len > MAX_RW_COUNT - ret) {
-			len = MAX_RW_COUNT - ret;
+
+		if (len > MAX_RW_COUNT - total_len) {
+			len = MAX_RW_COUNT - total_len;
 			iov[seg].iov_len = len;
 		}
-		ret += len;
+		total_len += len;
 	}
-out:
-	*ret_pointer = iov;
-	return ret;
+
+	iov_iter_init(i, type, iov, nr_segs, total_len);
+	if (iov == *iovp)
+		*iovp = NULL;
+	else
+		*iovp = iov;
+	return total_len;
 }
 
 /**
@@ -1759,10 +1786,10 @@ out:
  *     &struct iov_iter iterator to access it.
  *
  * @type: One of %READ or %WRITE.
- * @uvector: Pointer to the userspace array.
+ * @uvec: Pointer to the userspace array.
  * @nr_segs: Number of elements in userspace array.
  * @fast_segs: Number of elements in @iov.
- * @iov: (input and output parameter) Pointer to pointer to (usually small
+ * @iovp: (input and output parameter) Pointer to pointer to (usually small
  *     on-stack) kernel array.
  * @i: Pointer to iterator that will be initialized on success.
  *
@@ -1775,120 +1802,21 @@ out:
  *
  * Return: Negative error code on error, bytes imported on success
  */
-ssize_t import_iovec(int type, const struct iovec __user * uvector,
+ssize_t import_iovec(int type, const struct iovec __user *uvec,
 		 unsigned nr_segs, unsigned fast_segs,
-		 struct iovec **iov, struct iov_iter *i)
+		 struct iovec **iovp, struct iov_iter *i)
 {
-	ssize_t n;
-	struct iovec *p;
-	n = rw_copy_check_uvector(type, uvector, nr_segs, fast_segs,
-				  *iov, &p);
-	if (n < 0) {
-		if (p != *iov)
-			kfree(p);
-		*iov = NULL;
-		return n;
-	}
-	iov_iter_init(i, type, p, nr_segs, n);
-	*iov = p == *iov ? NULL : p;
-	return n;
+	return __import_iovec(type, uvec, nr_segs, fast_segs, iovp, i, false);
 }
 EXPORT_SYMBOL(import_iovec);
 
 #ifdef CONFIG_COMPAT
-#include <linux/compat.h>
-
-ssize_t compat_rw_copy_check_uvector(int type,
-		const struct compat_iovec __user *uvector,
-		unsigned long nr_segs, unsigned long fast_segs,
-		struct iovec *fast_pointer, struct iovec **ret_pointer)
-{
-	compat_ssize_t tot_len;
-	struct iovec *iov = *ret_pointer = fast_pointer;
-	ssize_t ret = 0;
-	int seg;
-
-	/*
-	 * SuS says "The readv() function *may* fail if the iovcnt argument
-	 * was less than or equal to 0, or greater than {IOV_MAX}.  Linux has
-	 * traditionally returned zero for zero segments, so...
-	 */
-	if (nr_segs == 0)
-		goto out;
-
-	ret = -EINVAL;
-	if (nr_segs > UIO_MAXIOV)
-		goto out;
-	if (nr_segs > fast_segs) {
-		ret = -ENOMEM;
-		iov = kmalloc_array(nr_segs, sizeof(struct iovec), GFP_KERNEL);
-		if (iov == NULL)
-			goto out;
-	}
-	*ret_pointer = iov;
-
-	ret = -EFAULT;
-	if (!access_ok(uvector, nr_segs*sizeof(*uvector)))
-		goto out;
-
-	/*
-	 * Single unix specification:
-	 * We should -EINVAL if an element length is not >= 0 and fitting an
-	 * ssize_t.
-	 *
-	 * In Linux, the total length is limited to MAX_RW_COUNT, there is
-	 * no overflow possibility.
-	 */
-	tot_len = 0;
-	ret = -EINVAL;
-	for (seg = 0; seg < nr_segs; seg++) {
-		compat_uptr_t buf;
-		compat_ssize_t len;
-
-		if (__get_user(len, &uvector->iov_len) ||
-		   __get_user(buf, &uvector->iov_base)) {
-			ret = -EFAULT;
-			goto out;
-		}
-		if (len < 0)	/* size_t not fitting in compat_ssize_t .. */
-			goto out;
-		if (type >= 0 &&
-		    !access_ok(compat_ptr(buf), len)) {
-			ret = -EFAULT;
-			goto out;
-		}
-		if (len > MAX_RW_COUNT - tot_len)
-			len = MAX_RW_COUNT - tot_len;
-		tot_len += len;
-		iov->iov_base = compat_ptr(buf);
-		iov->iov_len = (compat_size_t) len;
-		uvector++;
-		iov++;
-	}
-	ret = tot_len;
-
-out:
-	return ret;
-}
-
-ssize_t compat_import_iovec(int type,
-		const struct compat_iovec __user * uvector,
-		unsigned nr_segs, unsigned fast_segs,
-		struct iovec **iov, struct iov_iter *i)
+ssize_t compat_import_iovec(int type, const struct compat_iovec __user *uvec,
+		unsigned nr_segs, unsigned fast_segs, struct iovec **iovp,
+		struct iov_iter *i)
 {
-	ssize_t n;
-	struct iovec *p;
-	n = compat_rw_copy_check_uvector(type, uvector, nr_segs, fast_segs,
-				  *iov, &p);
-	if (n < 0) {
-		if (p != *iov)
-			kfree(p);
-		*iov = NULL;
-		return n;
-	}
-	iov_iter_init(i, type, p, nr_segs, n);
-	*iov = p == *iov ? NULL : p;
-	return n;
+	return __import_iovec(type, (const struct iovec __user *)uvec, nr_segs,
+			     fast_segs, iovp, i, true);
 }
 EXPORT_SYMBOL(compat_import_iovec);
 #endif
diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c
index 29c052099aff..5e728c20c2be 100644
--- a/mm/process_vm_access.c
+++ b/mm/process_vm_access.c
@@ -276,20 +276,17 @@ static ssize_t process_vm_rw(pid_t pid,
 	if (rc < 0)
 		return rc;
 	if (!iov_iter_count(&iter))
-		goto free_iovecs;
-
-	rc = rw_copy_check_uvector(CHECK_IOVEC_ONLY, rvec, riovcnt, UIO_FASTIOV,
-				   iovstack_r, &iov_r);
-	if (rc <= 0)
-		goto free_iovecs;
-
+		goto free_iov_l;
+	iov_r = iovec_from_user(rvec, riovcnt, UIO_FASTIOV, iovstack_r, false);
+	if (IS_ERR(iov_r)) {
+		rc = PTR_ERR(iov_r);
+		goto free_iov_l;
+	}
 	rc = process_vm_rw_core(pid, &iter, iov_r, riovcnt, flags, vm_write);
-
-free_iovecs:
 	if (iov_r != iovstack_r)
 		kfree(iov_r);
+free_iov_l:
 	kfree(iov_l);
-
 	return rc;
 }
 
@@ -333,18 +330,17 @@ compat_process_vm_rw(compat_pid_t pid,
 	if (rc < 0)
 		return rc;
 	if (!iov_iter_count(&iter))
-		goto free_iovecs;
-	rc = compat_rw_copy_check_uvector(CHECK_IOVEC_ONLY, rvec, riovcnt,
-					  UIO_FASTIOV, iovstack_r,
-					  &iov_r);
-	if (rc <= 0)
-		goto free_iovecs;
-
+		goto free_iov_l;
+	iov_r = iovec_from_user((const struct iovec __user *)rvec, riovcnt,
+				UIO_FASTIOV, iovstack_r, true);
+	if (IS_ERR(iov_r)) {
+		rc = PTR_ERR(iov_r);
+		goto free_iov_l;
+	}
 	rc = process_vm_rw_core(pid, &iter, iov_r, riovcnt, flags, vm_write);
-
-free_iovecs:
 	if (iov_r != iovstack_r)
 		kfree(iov_r);
+free_iov_l:
 	kfree(iov_l);
 	return rc;
 }
-- 
cgit v1.2.3


From 89cd35c58bc2e36bfdc23dde67a429b08cf4ae03 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 25 Sep 2020 06:51:41 +0200
Subject: iov_iter: transparently handle compat iovecs in import_iovec

Use in compat_syscall to import either native or the compat iovecs, and
remove the now superflous compat_import_iovec.

This removes the need for special compat logic in most callers, and
the remaining ones can still be simplified by using __import_iovec
with a bool compat parameter.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 block/scsi_ioctl.c     | 12 ++----------
 drivers/scsi/sg.c      |  9 +--------
 fs/aio.c               |  8 ++------
 fs/io_uring.c          | 20 ++++++++------------
 fs/read_write.c        |  6 ++++--
 fs/splice.c            |  2 +-
 include/linux/uio.h    |  8 --------
 lib/iov_iter.c         | 14 ++------------
 mm/process_vm_access.c |  3 ++-
 net/compat.c           |  4 ++--
 security/keys/compat.c |  5 ++---
 11 files changed, 26 insertions(+), 65 deletions(-)

(limited to 'lib')

diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c
index ef722f04f88a..e08df86866ee 100644
--- a/block/scsi_ioctl.c
+++ b/block/scsi_ioctl.c
@@ -333,16 +333,8 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk,
 		struct iov_iter i;
 		struct iovec *iov = NULL;
 
-#ifdef CONFIG_COMPAT
-		if (in_compat_syscall())
-			ret = compat_import_iovec(rq_data_dir(rq),
-				   hdr->dxferp, hdr->iovec_count,
-				   0, &iov, &i);
-		else
-#endif
-			ret = import_iovec(rq_data_dir(rq),
-				   hdr->dxferp, hdr->iovec_count,
-				   0, &iov, &i);
+		ret = import_iovec(rq_data_dir(rq), hdr->dxferp,
+				   hdr->iovec_count, 0, &iov, &i);
 		if (ret < 0)
 			goto out_free_cdb;
 
diff --git a/drivers/scsi/sg.c b/drivers/scsi/sg.c
index 20472aaaf630..bfa8d77322d7 100644
--- a/drivers/scsi/sg.c
+++ b/drivers/scsi/sg.c
@@ -1820,14 +1820,7 @@ sg_start_req(Sg_request *srp, unsigned char *cmd)
 		struct iovec *iov = NULL;
 		struct iov_iter i;
 
-#ifdef CONFIG_COMPAT
-		if (in_compat_syscall())
-			res = compat_import_iovec(rw, hp->dxferp, iov_count,
-						  0, &iov, &i);
-		else
-#endif
-			res = import_iovec(rw, hp->dxferp, iov_count,
-					   0, &iov, &i);
+		res = import_iovec(rw, hp->dxferp, iov_count, 0, &iov, &i);
 		if (res < 0)
 			return res;
 
diff --git a/fs/aio.c b/fs/aio.c
index 5736bff48e9e..247f05533dd5 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -1489,12 +1489,8 @@ static ssize_t aio_setup_rw(int rw, const struct iocb *iocb,
 		*iovec = NULL;
 		return ret;
 	}
-#ifdef CONFIG_COMPAT
-	if (compat)
-		return compat_import_iovec(rw, buf, len, UIO_FASTIOV, iovec,
-				iter);
-#endif
-	return import_iovec(rw, buf, len, UIO_FASTIOV, iovec, iter);
+
+	return __import_iovec(rw, buf, len, UIO_FASTIOV, iovec, iter, compat);
 }
 
 static inline void aio_rw_done(struct kiocb *req, ssize_t ret)
diff --git a/fs/io_uring.c b/fs/io_uring.c
index 91e2cc8414f9..b75c699b5cb7 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2846,13 +2846,8 @@ static ssize_t __io_import_iovec(int rw, struct io_kiocb *req,
 		return ret;
 	}
 
-#ifdef CONFIG_COMPAT
-	if (req->ctx->compat)
-		return compat_import_iovec(rw, buf, sqe_len, UIO_FASTIOV,
-						iovec, iter);
-#endif
-
-	return import_iovec(rw, buf, sqe_len, UIO_FASTIOV, iovec, iter);
+	return __import_iovec(rw, buf, sqe_len, UIO_FASTIOV, iovec, iter,
+			      req->ctx->compat);
 }
 
 static ssize_t io_import_iovec(int rw, struct io_kiocb *req,
@@ -4166,8 +4161,9 @@ static int __io_recvmsg_copy_hdr(struct io_kiocb *req,
 				sr->len);
 		iomsg->iov = NULL;
 	} else {
-		ret = import_iovec(READ, uiov, iov_len, UIO_FASTIOV,
-					&iomsg->iov, &iomsg->msg.msg_iter);
+		ret = __import_iovec(READ, uiov, iov_len, UIO_FASTIOV,
+				     &iomsg->iov, &iomsg->msg.msg_iter,
+				     false);
 		if (ret > 0)
 			ret = 0;
 	}
@@ -4207,9 +4203,9 @@ static int __io_compat_recvmsg_copy_hdr(struct io_kiocb *req,
 		sr->len = iomsg->iov[0].iov_len;
 		iomsg->iov = NULL;
 	} else {
-		ret = compat_import_iovec(READ, uiov, len, UIO_FASTIOV,
-						&iomsg->iov,
-						&iomsg->msg.msg_iter);
+		ret = __import_iovec(READ, (struct iovec __user *)uiov, len,
+				   UIO_FASTIOV, &iomsg->iov,
+				   &iomsg->msg.msg_iter, true);
 		if (ret < 0)
 			return ret;
 	}
diff --git a/fs/read_write.c b/fs/read_write.c
index e5e891a88442..0a68037580b4 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -1078,7 +1078,8 @@ static size_t compat_readv(struct file *file,
 	struct iov_iter iter;
 	ssize_t ret;
 
-	ret = compat_import_iovec(READ, vec, vlen, UIO_FASTIOV, &iov, &iter);
+	ret = import_iovec(READ, (const struct iovec __user *)vec, vlen,
+			   UIO_FASTIOV, &iov, &iter);
 	if (ret >= 0) {
 		ret = do_iter_read(file, &iter, pos, flags);
 		kfree(iov);
@@ -1186,7 +1187,8 @@ static size_t compat_writev(struct file *file,
 	struct iov_iter iter;
 	ssize_t ret;
 
-	ret = compat_import_iovec(WRITE, vec, vlen, UIO_FASTIOV, &iov, &iter);
+	ret = import_iovec(WRITE, (const struct iovec __user *)vec, vlen,
+			   UIO_FASTIOV, &iov, &iter);
 	if (ret >= 0) {
 		file_start_write(file);
 		ret = do_iter_write(file, &iter, pos, flags);
diff --git a/fs/splice.c b/fs/splice.c
index d7c8a7c4db07..132d42b9871f 100644
--- a/fs/splice.c
+++ b/fs/splice.c
@@ -1387,7 +1387,7 @@ COMPAT_SYSCALL_DEFINE4(vmsplice, int, fd, const struct compat_iovec __user *, io
 	if (error)
 		return error;
 
-	error = compat_import_iovec(type, iov32, nr_segs,
+	error = import_iovec(type, (struct iovec __user *)iov32, nr_segs,
 			     ARRAY_SIZE(iovstack), &iov, &iter);
 	if (error >= 0) {
 		error = do_vmsplice(f.file, &iter, flags);
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 92c11fe41c62..daedc61ad370 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -275,14 +275,6 @@ ssize_t import_iovec(int type, const struct iovec __user *uvec,
 ssize_t __import_iovec(int type, const struct iovec __user *uvec,
 		 unsigned nr_segs, unsigned fast_segs, struct iovec **iovp,
 		 struct iov_iter *i, bool compat);
-
-#ifdef CONFIG_COMPAT
-struct compat_iovec;
-ssize_t compat_import_iovec(int type, const struct compat_iovec __user * uvector,
-		 unsigned nr_segs, unsigned fast_segs,
-		 struct iovec **iov, struct iov_iter *i);
-#endif
-
 int import_single_range(int type, void __user *buf, size_t len,
 		 struct iovec *iov, struct iov_iter *i);
 
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index e6cb51da3424..2e452ce7388b 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -1806,21 +1806,11 @@ ssize_t import_iovec(int type, const struct iovec __user *uvec,
 		 unsigned nr_segs, unsigned fast_segs,
 		 struct iovec **iovp, struct iov_iter *i)
 {
-	return __import_iovec(type, uvec, nr_segs, fast_segs, iovp, i, false);
+	return __import_iovec(type, uvec, nr_segs, fast_segs, iovp, i,
+			      in_compat_syscall());
 }
 EXPORT_SYMBOL(import_iovec);
 
-#ifdef CONFIG_COMPAT
-ssize_t compat_import_iovec(int type, const struct compat_iovec __user *uvec,
-		unsigned nr_segs, unsigned fast_segs, struct iovec **iovp,
-		struct iov_iter *i)
-{
-	return __import_iovec(type, (const struct iovec __user *)uvec, nr_segs,
-			     fast_segs, iovp, i, true);
-}
-EXPORT_SYMBOL(compat_import_iovec);
-#endif
-
 int import_single_range(int rw, void __user *buf, size_t len,
 		 struct iovec *iov, struct iov_iter *i)
 {
diff --git a/mm/process_vm_access.c b/mm/process_vm_access.c
index 5e728c20c2be..3f2156aab442 100644
--- a/mm/process_vm_access.c
+++ b/mm/process_vm_access.c
@@ -326,7 +326,8 @@ compat_process_vm_rw(compat_pid_t pid,
 	if (flags != 0)
 		return -EINVAL;
 
-	rc = compat_import_iovec(dir, lvec, liovcnt, UIO_FASTIOV, &iov_l, &iter);
+	rc = import_iovec(dir, (const struct iovec __user *)lvec, liovcnt,
+			  UIO_FASTIOV, &iov_l, &iter);
 	if (rc < 0)
 		return rc;
 	if (!iov_iter_count(&iter))
diff --git a/net/compat.c b/net/compat.c
index 95ce707a30a3..ddd15af3a283 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -98,8 +98,8 @@ int get_compat_msghdr(struct msghdr *kmsg,
 	if (err)
 		return err;
 
-	err = compat_import_iovec(save_addr ? READ : WRITE, compat_ptr(ptr),
-				   len, UIO_FASTIOV, iov, &kmsg->msg_iter);
+	err = import_iovec(save_addr ? READ : WRITE, compat_ptr(ptr), len,
+			   UIO_FASTIOV, iov, &kmsg->msg_iter);
 	return err < 0 ? err : 0;
 }
 
diff --git a/security/keys/compat.c b/security/keys/compat.c
index 6ee9d8f6a4a5..7ae531db031c 100644
--- a/security/keys/compat.c
+++ b/security/keys/compat.c
@@ -33,9 +33,8 @@ static long compat_keyctl_instantiate_key_iov(
 	if (!_payload_iov)
 		ioc = 0;
 
-	ret = compat_import_iovec(WRITE, _payload_iov, ioc,
-				  ARRAY_SIZE(iovstack), &iov,
-				  &from);
+	ret = import_iovec(WRITE, (const struct iovec __user *)_payload_iov,
+			   ioc, ARRAY_SIZE(iovstack), &iov, &from);
 	if (ret < 0)
 		return ret;
 
-- 
cgit v1.2.3


From 5d90e05c0e83ee5c67fdba1fa56e590103aac9fd Mon Sep 17 00:00:00 2001
From: Scott Branden <scott.branden@broadcom.com>
Date: Fri, 2 Oct 2020 10:38:28 -0700
Subject: test_firmware: Test partial read support

Add additional hooks to test_firmware to pass in support
for partial file read using request_firmware_into_buf():

	buf_size: size of buffer to request firmware into
	partial: indicates that a partial file request is being made
	file_offset: to indicate offset into file to request

Also update firmware selftests to use the new partial read test API.

Signed-off-by: Scott Branden <scott.branden@broadcom.com>
Co-developed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Kees Cook <keescook@chromium.org>
Link: https://lore.kernel.org/r/20201002173828.2099543-17-keescook@chromium.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
---
 lib/test_firmware.c                               | 154 ++++++++++++++++++++--
 tools/testing/selftests/firmware/fw_filesystem.sh |  91 +++++++++++++
 2 files changed, 233 insertions(+), 12 deletions(-)

(limited to 'lib')

diff --git a/lib/test_firmware.c b/lib/test_firmware.c
index 06c955057756..2baa275a6ddf 100644
--- a/lib/test_firmware.c
+++ b/lib/test_firmware.c
@@ -52,6 +52,9 @@ struct test_batched_req {
  * @name: the name of the firmware file to look for
  * @into_buf: when the into_buf is used if this is true
  *	request_firmware_into_buf() will be used instead.
+ * @buf_size: size of buf to allocate when into_buf is true
+ * @file_offset: file offset to request when calling request_firmware_into_buf
+ * @partial: partial read opt when calling request_firmware_into_buf
  * @sync_direct: when the sync trigger is used if this is true
  *	request_firmware_direct() will be used instead.
  * @send_uevent: whether or not to send a uevent for async requests
@@ -91,6 +94,9 @@ struct test_batched_req {
 struct test_config {
 	char *name;
 	bool into_buf;
+	size_t buf_size;
+	size_t file_offset;
+	bool partial;
 	bool sync_direct;
 	bool send_uevent;
 	u8 num_requests;
@@ -185,6 +191,9 @@ static int __test_firmware_config_init(void)
 	test_fw_config->num_requests = TEST_FIRMWARE_NUM_REQS;
 	test_fw_config->send_uevent = true;
 	test_fw_config->into_buf = false;
+	test_fw_config->buf_size = TEST_FIRMWARE_BUF_SIZE;
+	test_fw_config->file_offset = 0;
+	test_fw_config->partial = false;
 	test_fw_config->sync_direct = false;
 	test_fw_config->req_firmware = request_firmware;
 	test_fw_config->test_result = 0;
@@ -238,28 +247,35 @@ static ssize_t config_show(struct device *dev,
 			dev_name(dev));
 
 	if (test_fw_config->name)
-		len += scnprintf(buf+len, PAGE_SIZE - len,
+		len += scnprintf(buf + len, PAGE_SIZE - len,
 				"name:\t%s\n",
 				test_fw_config->name);
 	else
-		len += scnprintf(buf+len, PAGE_SIZE - len,
+		len += scnprintf(buf + len, PAGE_SIZE - len,
 				"name:\tEMTPY\n");
 
-	len += scnprintf(buf+len, PAGE_SIZE - len,
+	len += scnprintf(buf + len, PAGE_SIZE - len,
 			"num_requests:\t%u\n", test_fw_config->num_requests);
 
-	len += scnprintf(buf+len, PAGE_SIZE - len,
+	len += scnprintf(buf + len, PAGE_SIZE - len,
 			"send_uevent:\t\t%s\n",
 			test_fw_config->send_uevent ?
 			"FW_ACTION_HOTPLUG" :
 			"FW_ACTION_NOHOTPLUG");
-	len += scnprintf(buf+len, PAGE_SIZE - len,
+	len += scnprintf(buf + len, PAGE_SIZE - len,
 			"into_buf:\t\t%s\n",
 			test_fw_config->into_buf ? "true" : "false");
-	len += scnprintf(buf+len, PAGE_SIZE - len,
+	len += scnprintf(buf + len, PAGE_SIZE - len,
+			"buf_size:\t%zu\n", test_fw_config->buf_size);
+	len += scnprintf(buf + len, PAGE_SIZE - len,
+			"file_offset:\t%zu\n", test_fw_config->file_offset);
+	len += scnprintf(buf + len, PAGE_SIZE - len,
+			"partial:\t\t%s\n",
+			test_fw_config->partial ? "true" : "false");
+	len += scnprintf(buf + len, PAGE_SIZE - len,
 			"sync_direct:\t\t%s\n",
 			test_fw_config->sync_direct ? "true" : "false");
-	len += scnprintf(buf+len, PAGE_SIZE - len,
+	len += scnprintf(buf + len, PAGE_SIZE - len,
 			"read_fw_idx:\t%u\n", test_fw_config->read_fw_idx);
 
 	mutex_unlock(&test_fw_mutex);
@@ -317,6 +333,30 @@ static ssize_t test_dev_config_show_bool(char *buf, bool val)
 	return snprintf(buf, PAGE_SIZE, "%d\n", val);
 }
 
+static int test_dev_config_update_size_t(const char *buf,
+					 size_t size,
+					 size_t *cfg)
+{
+	int ret;
+	long new;
+
+	ret = kstrtol(buf, 10, &new);
+	if (ret)
+		return ret;
+
+	mutex_lock(&test_fw_mutex);
+	*(size_t *)cfg = new;
+	mutex_unlock(&test_fw_mutex);
+
+	/* Always return full write size even if we didn't consume all */
+	return size;
+}
+
+static ssize_t test_dev_config_show_size_t(char *buf, size_t val)
+{
+	return snprintf(buf, PAGE_SIZE, "%zu\n", val);
+}
+
 static ssize_t test_dev_config_show_int(char *buf, int val)
 {
 	return snprintf(buf, PAGE_SIZE, "%d\n", val);
@@ -402,6 +442,83 @@ static ssize_t config_into_buf_show(struct device *dev,
 }
 static DEVICE_ATTR_RW(config_into_buf);
 
+static ssize_t config_buf_size_store(struct device *dev,
+				     struct device_attribute *attr,
+				     const char *buf, size_t count)
+{
+	int rc;
+
+	mutex_lock(&test_fw_mutex);
+	if (test_fw_config->reqs) {
+		pr_err("Must call release_all_firmware prior to changing config\n");
+		rc = -EINVAL;
+		mutex_unlock(&test_fw_mutex);
+		goto out;
+	}
+	mutex_unlock(&test_fw_mutex);
+
+	rc = test_dev_config_update_size_t(buf, count,
+					   &test_fw_config->buf_size);
+
+out:
+	return rc;
+}
+
+static ssize_t config_buf_size_show(struct device *dev,
+				    struct device_attribute *attr,
+				    char *buf)
+{
+	return test_dev_config_show_size_t(buf, test_fw_config->buf_size);
+}
+static DEVICE_ATTR_RW(config_buf_size);
+
+static ssize_t config_file_offset_store(struct device *dev,
+					struct device_attribute *attr,
+					const char *buf, size_t count)
+{
+	int rc;
+
+	mutex_lock(&test_fw_mutex);
+	if (test_fw_config->reqs) {
+		pr_err("Must call release_all_firmware prior to changing config\n");
+		rc = -EINVAL;
+		mutex_unlock(&test_fw_mutex);
+		goto out;
+	}
+	mutex_unlock(&test_fw_mutex);
+
+	rc = test_dev_config_update_size_t(buf, count,
+					   &test_fw_config->file_offset);
+
+out:
+	return rc;
+}
+
+static ssize_t config_file_offset_show(struct device *dev,
+				       struct device_attribute *attr,
+				       char *buf)
+{
+	return test_dev_config_show_size_t(buf, test_fw_config->file_offset);
+}
+static DEVICE_ATTR_RW(config_file_offset);
+
+static ssize_t config_partial_store(struct device *dev,
+				    struct device_attribute *attr,
+				    const char *buf, size_t count)
+{
+	return test_dev_config_update_bool(buf,
+					   count,
+					   &test_fw_config->partial);
+}
+
+static ssize_t config_partial_show(struct device *dev,
+				   struct device_attribute *attr,
+				   char *buf)
+{
+	return test_dev_config_show_bool(buf, test_fw_config->partial);
+}
+static DEVICE_ATTR_RW(config_partial);
+
 static ssize_t config_sync_direct_store(struct device *dev,
 					struct device_attribute *attr,
 					const char *buf, size_t count)
@@ -659,11 +776,21 @@ static int test_fw_run_batch_request(void *data)
 		if (!test_buf)
 			return -ENOSPC;
 
-		req->rc = request_firmware_into_buf(&req->fw,
-						    req->name,
-						    req->dev,
-						    test_buf,
-						    TEST_FIRMWARE_BUF_SIZE);
+		if (test_fw_config->partial)
+			req->rc = request_partial_firmware_into_buf
+						(&req->fw,
+						 req->name,
+						 req->dev,
+						 test_buf,
+						 test_fw_config->buf_size,
+						 test_fw_config->file_offset);
+		else
+			req->rc = request_firmware_into_buf
+						(&req->fw,
+						 req->name,
+						 req->dev,
+						 test_buf,
+						 test_fw_config->buf_size);
 		if (!req->fw)
 			kfree(test_buf);
 	} else {
@@ -936,6 +1063,9 @@ static struct attribute *test_dev_attrs[] = {
 	TEST_FW_DEV_ATTR(config_name),
 	TEST_FW_DEV_ATTR(config_num_requests),
 	TEST_FW_DEV_ATTR(config_into_buf),
+	TEST_FW_DEV_ATTR(config_buf_size),
+	TEST_FW_DEV_ATTR(config_file_offset),
+	TEST_FW_DEV_ATTR(config_partial),
 	TEST_FW_DEV_ATTR(config_sync_direct),
 	TEST_FW_DEV_ATTR(config_send_uevent),
 	TEST_FW_DEV_ATTR(config_read_fw_idx),
diff --git a/tools/testing/selftests/firmware/fw_filesystem.sh b/tools/testing/selftests/firmware/fw_filesystem.sh
index fcc281373b4d..c2a2a100114b 100755
--- a/tools/testing/selftests/firmware/fw_filesystem.sh
+++ b/tools/testing/selftests/firmware/fw_filesystem.sh
@@ -149,6 +149,26 @@ config_unset_into_buf()
 	echo 0 >  $DIR/config_into_buf
 }
 
+config_set_buf_size()
+{
+	echo $1 >  $DIR/config_buf_size
+}
+
+config_set_file_offset()
+{
+	echo $1 >  $DIR/config_file_offset
+}
+
+config_set_partial()
+{
+	echo 1 >  $DIR/config_partial
+}
+
+config_unset_partial()
+{
+	echo 0 >  $DIR/config_partial
+}
+
 config_set_sync_direct()
 {
 	echo 1 >  $DIR/config_sync_direct
@@ -207,6 +227,35 @@ read_firmwares()
 	done
 }
 
+read_partial_firmwares()
+{
+	if [ "$(cat $DIR/config_into_buf)" == "1" ]; then
+		fwfile="${FW_INTO_BUF}"
+	else
+		fwfile="${FW}"
+	fi
+
+	if [ "$1" = "xzonly" ]; then
+		fwfile="${fwfile}-orig"
+	fi
+
+	# Strip fwfile down to match partial offset and length
+	partial_data="$(cat $fwfile)"
+	partial_data="${partial_data:$2:$3}"
+
+	for i in $(seq 0 3); do
+		config_set_read_fw_idx $i
+
+		read_firmware="$(cat $DIR/read_firmware)"
+
+		# Verify the contents are what we expect.
+		if [ $read_firmware != $partial_data ]; then
+			echo "request #$i: partial firmware was not loaded" >&2
+			exit 1
+		fi
+	done
+}
+
 read_firmwares_expect_nofile()
 {
 	for i in $(seq 0 3); do
@@ -242,6 +291,21 @@ test_batched_request_firmware_into_buf_nofile()
 	echo "OK"
 }
 
+test_request_partial_firmware_into_buf_nofile()
+{
+	echo -n "Test request_partial_firmware_into_buf() off=$1 size=$2 nofile: "
+	config_reset
+	config_set_name nope-test-firmware.bin
+	config_set_into_buf
+	config_set_partial
+	config_set_buf_size $2
+	config_set_file_offset $1
+	config_trigger_sync
+	read_firmwares_expect_nofile
+	release_all_firmware
+	echo "OK"
+}
+
 test_batched_request_firmware_direct_nofile()
 {
 	echo -n "Batched request_firmware_direct() nofile try #$1: "
@@ -356,6 +420,21 @@ test_request_firmware_nowait_custom()
 	echo "OK"
 }
 
+test_request_partial_firmware_into_buf()
+{
+	echo -n "Test request_partial_firmware_into_buf() off=$1 size=$2: "
+	config_reset
+	config_set_name $TEST_FIRMWARE_INTO_BUF_FILENAME
+	config_set_into_buf
+	config_set_partial
+	config_set_buf_size $2
+	config_set_file_offset $1
+	config_trigger_sync
+	read_partial_firmwares normal $1 $2
+	release_all_firmware
+	echo "OK"
+}
+
 # Only continue if batched request triggers are present on the
 # test-firmware driver
 test_config_present
@@ -383,6 +462,12 @@ for i in $(seq 1 5); do
 	test_request_firmware_nowait_custom $i normal
 done
 
+# Partial loads cannot use fallback, so do not repeat tests.
+test_request_partial_firmware_into_buf 0 10
+test_request_partial_firmware_into_buf 0 5
+test_request_partial_firmware_into_buf 1 6
+test_request_partial_firmware_into_buf 2 10
+
 # Test for file not found, errors are expected, the failure would be
 # a hung task, which would require a hard reset.
 echo
@@ -407,6 +492,12 @@ for i in $(seq 1 5); do
 	test_request_firmware_nowait_custom_nofile $i
 done
 
+# Partial loads cannot use fallback, so do not repeat tests.
+test_request_partial_firmware_into_buf_nofile 0 10
+test_request_partial_firmware_into_buf_nofile 0 5
+test_request_partial_firmware_into_buf_nofile 1 6
+test_request_partial_firmware_into_buf_nofile 2 10
+
 test "$HAS_FW_LOADER_COMPRESS" != "yes" && exit 0
 
 # test with both files present
-- 
cgit v1.2.3


From 07da1223ec939982497db3caccd6215b55acc35c Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@nvidia.com>
Date: Sun, 4 Oct 2020 18:43:37 +0300
Subject: lib/scatterlist: Add support in dynamic allocation of SG table from
 pages

Extend __sg_alloc_table_from_pages to support dynamic allocation of
SG table from pages. It should be used by drivers that can't supply
all the pages at one time.

This function returns the last populated SGE in the table. Users should
pass it as an argument to the function from the second call and forward.
As before, nents will be equal to the number of populated SGEs (chunks).

With this new extension, drivers can benefit the optimization of merging
contiguous pages without a need to allocate all pages in advance and
hold them in a large buffer.

E.g. with the Infiniband driver that allocates a single page for hold the
pages. For 1TB memory registration, the temporary buffer would consume only
4KB, instead of 2GB.

Link: https://lore.kernel.org/r/20201004154340.1080481-2-leon@kernel.org
Signed-off-by: Maor Gottlieb <maorg@nvidia.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Leon Romanovsky <leonro@nvidia.com>
Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 drivers/gpu/drm/i915/gem/i915_gem_userptr.c |  12 +--
 drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c  |  15 ++--
 include/linux/scatterlist.h                 |  38 +++++----
 lib/scatterlist.c                           | 125 ++++++++++++++++++++++------
 tools/testing/scatterlist/main.c            |   9 +-
 5 files changed, 142 insertions(+), 57 deletions(-)

(limited to 'lib')

diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
index 12b30075134a..f2eaed6aca3d 100644
--- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
+++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c
@@ -403,6 +403,7 @@ __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj,
 	unsigned int max_segment = i915_sg_segment_size();
 	struct sg_table *st;
 	unsigned int sg_page_sizes;
+	struct scatterlist *sg;
 	int ret;
 
 	st = kmalloc(sizeof(*st), GFP_KERNEL);
@@ -410,13 +411,12 @@ __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj,
 		return ERR_PTR(-ENOMEM);
 
 alloc_table:
-	ret = __sg_alloc_table_from_pages(st, pvec, num_pages,
-					  0, num_pages << PAGE_SHIFT,
-					  max_segment,
-					  GFP_KERNEL);
-	if (ret) {
+	sg = __sg_alloc_table_from_pages(st, pvec, num_pages, 0,
+					 num_pages << PAGE_SHIFT, max_segment,
+					 NULL, 0, GFP_KERNEL);
+	if (IS_ERR(sg)) {
 		kfree(st);
-		return ERR_PTR(ret);
+		return ERR_CAST(sg);
 	}
 
 	ret = i915_gem_gtt_prepare_pages(obj, st);
diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
index ab524ab3b0b4..f22acd398b1f 100644
--- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
+++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c
@@ -419,6 +419,7 @@ static int vmw_ttm_map_dma(struct vmw_ttm_tt *vmw_tt)
 	int ret = 0;
 	static size_t sgl_size;
 	static size_t sgt_size;
+	struct scatterlist *sg;
 
 	if (vmw_tt->mapped)
 		return 0;
@@ -441,13 +442,15 @@ static int vmw_ttm_map_dma(struct vmw_ttm_tt *vmw_tt)
 		if (unlikely(ret != 0))
 			return ret;
 
-		ret = __sg_alloc_table_from_pages
-			(&vmw_tt->sgt, vsgt->pages, vsgt->num_pages, 0,
-			 (unsigned long) vsgt->num_pages << PAGE_SHIFT,
-			 dma_get_max_seg_size(dev_priv->dev->dev),
-			 GFP_KERNEL);
-		if (unlikely(ret != 0))
+		sg = __sg_alloc_table_from_pages(&vmw_tt->sgt, vsgt->pages,
+				vsgt->num_pages, 0,
+				(unsigned long) vsgt->num_pages << PAGE_SHIFT,
+				dma_get_max_seg_size(dev_priv->dev->dev),
+				NULL, 0, GFP_KERNEL);
+		if (IS_ERR(sg)) {
+			ret = PTR_ERR(sg);
 			goto out_sg_alloc_fail;
+		}
 
 		if (vsgt->num_pages > vmw_tt->sgt.nents) {
 			uint64_t over_alloc =
diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h
index 45cf7b69d852..36c47e7e66a2 100644
--- a/include/linux/scatterlist.h
+++ b/include/linux/scatterlist.h
@@ -165,6 +165,22 @@ static inline void sg_set_buf(struct scatterlist *sg, const void *buf,
 #define for_each_sgtable_dma_sg(sgt, sg, i)	\
 	for_each_sg((sgt)->sgl, sg, (sgt)->nents, i)
 
+static inline void __sg_chain(struct scatterlist *chain_sg,
+			      struct scatterlist *sgl)
+{
+	/*
+	 * offset and length are unused for chain entry. Clear them.
+	 */
+	chain_sg->offset = 0;
+	chain_sg->length = 0;
+
+	/*
+	 * Set lowest bit to indicate a link pointer, and make sure to clear
+	 * the termination bit if it happens to be set.
+	 */
+	chain_sg->page_link = ((unsigned long) sgl | SG_CHAIN) & ~SG_END;
+}
+
 /**
  * sg_chain - Chain two sglists together
  * @prv:	First scatterlist
@@ -178,18 +194,7 @@ static inline void sg_set_buf(struct scatterlist *sg, const void *buf,
 static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents,
 			    struct scatterlist *sgl)
 {
-	/*
-	 * offset and length are unused for chain entry.  Clear them.
-	 */
-	prv[prv_nents - 1].offset = 0;
-	prv[prv_nents - 1].length = 0;
-
-	/*
-	 * Set lowest bit to indicate a link pointer, and make sure to clear
-	 * the termination bit if it happens to be set.
-	 */
-	prv[prv_nents - 1].page_link = ((unsigned long) sgl | SG_CHAIN)
-					& ~SG_END;
+	__sg_chain(&prv[prv_nents - 1], sgl);
 }
 
 /**
@@ -286,10 +291,11 @@ void sg_free_table(struct sg_table *);
 int __sg_alloc_table(struct sg_table *, unsigned int, unsigned int,
 		     struct scatterlist *, unsigned int, gfp_t, sg_alloc_fn *);
 int sg_alloc_table(struct sg_table *, unsigned int, gfp_t);
-int __sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
-				unsigned int n_pages, unsigned int offset,
-				unsigned long size, unsigned int max_segment,
-				gfp_t gfp_mask);
+struct scatterlist *__sg_alloc_table_from_pages(struct sg_table *sgt,
+		struct page **pages, unsigned int n_pages, unsigned int offset,
+		unsigned long size, unsigned int max_segment,
+		struct scatterlist *prv, unsigned int left_pages,
+		gfp_t gfp_mask);
 int sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
 			      unsigned int n_pages, unsigned int offset,
 			      unsigned long size, gfp_t gfp_mask);
diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index 5d63a8857f36..e102fdfaa75b 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -365,6 +365,37 @@ int sg_alloc_table(struct sg_table *table, unsigned int nents, gfp_t gfp_mask)
 }
 EXPORT_SYMBOL(sg_alloc_table);
 
+static struct scatterlist *get_next_sg(struct sg_table *table,
+				       struct scatterlist *cur,
+				       unsigned long needed_sges,
+				       gfp_t gfp_mask)
+{
+	struct scatterlist *new_sg, *next_sg;
+	unsigned int alloc_size;
+
+	if (cur) {
+		next_sg = sg_next(cur);
+		/* Check if last entry should be keeped for chainning */
+		if (!sg_is_last(next_sg) || needed_sges == 1)
+			return next_sg;
+	}
+
+	alloc_size = min_t(unsigned long, needed_sges, SG_MAX_SINGLE_ALLOC);
+	new_sg = sg_kmalloc(alloc_size, gfp_mask);
+	if (!new_sg)
+		return ERR_PTR(-ENOMEM);
+	sg_init_table(new_sg, alloc_size);
+	if (cur) {
+		__sg_chain(next_sg, new_sg);
+		table->orig_nents += alloc_size - 1;
+	} else {
+		table->sgl = new_sg;
+		table->orig_nents = alloc_size;
+		table->nents = 0;
+	}
+	return new_sg;
+}
+
 /**
  * __sg_alloc_table_from_pages - Allocate and initialize an sg table from
  *			         an array of pages
@@ -374,29 +405,63 @@ EXPORT_SYMBOL(sg_alloc_table);
  * @offset:      Offset from start of the first page to the start of a buffer
  * @size:        Number of valid bytes in the buffer (after offset)
  * @max_segment: Maximum size of a scatterlist node in bytes (page aligned)
+ * @prv:	 Last populated sge in sgt
+ * @left_pages:  Left pages caller have to set after this call
  * @gfp_mask:	 GFP allocation mask
  *
- *  Description:
- *    Allocate and initialize an sg table from a list of pages. Contiguous
- *    ranges of the pages are squashed into a single scatterlist node up to the
- *    maximum size specified in @max_segment. An user may provide an offset at a
- *    start and a size of valid data in a buffer specified by the page array.
- *    The returned sg table is released by sg_free_table.
+ * Description:
+ *    If @prv is NULL, allocate and initialize an sg table from a list of pages,
+ *    else reuse the scatterlist passed in at @prv.
+ *    Contiguous ranges of the pages are squashed into a single scatterlist
+ *    entry up to the maximum size specified in @max_segment.  A user may
+ *    provide an offset at a start and a size of valid data in a buffer
+ *    specified by the page array.
  *
  * Returns:
- *   0 on success, negative error on failure
+ *   Last SGE in sgt on success, PTR_ERR on otherwise.
+ *   The allocation in @sgt must be released by sg_free_table.
+ *
+ * Notes:
+ *   If this function returns non-0 (eg failure), the caller must call
+ *   sg_free_table() to cleanup any leftover allocations.
  */
-int __sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
-				unsigned int n_pages, unsigned int offset,
-				unsigned long size, unsigned int max_segment,
-				gfp_t gfp_mask)
+struct scatterlist *__sg_alloc_table_from_pages(struct sg_table *sgt,
+		struct page **pages, unsigned int n_pages, unsigned int offset,
+		unsigned long size, unsigned int max_segment,
+		struct scatterlist *prv, unsigned int left_pages,
+		gfp_t gfp_mask)
 {
-	unsigned int chunks, cur_page, seg_len, i;
-	int ret;
-	struct scatterlist *s;
+	unsigned int chunks, cur_page, seg_len, i, prv_len = 0;
+	unsigned int added_nents = 0;
+	struct scatterlist *s = prv;
 
 	if (WARN_ON(!max_segment || offset_in_page(max_segment)))
-		return -EINVAL;
+		return ERR_PTR(-EINVAL);
+
+	if (IS_ENABLED(CONFIG_ARCH_NO_SG_CHAIN) && prv)
+		return ERR_PTR(-EOPNOTSUPP);
+
+	if (prv) {
+		unsigned long paddr = (page_to_pfn(sg_page(prv)) * PAGE_SIZE +
+				       prv->offset + prv->length) /
+				      PAGE_SIZE;
+
+		if (WARN_ON(offset))
+			return ERR_PTR(-EINVAL);
+
+		/* Merge contiguous pages into the last SG */
+		prv_len = prv->length;
+		while (n_pages && page_to_pfn(pages[0]) == paddr) {
+			if (prv->length + PAGE_SIZE > max_segment)
+				break;
+			prv->length += PAGE_SIZE;
+			paddr++;
+			pages++;
+			n_pages--;
+		}
+		if (!n_pages)
+			goto out;
+	}
 
 	/* compute number of contiguous chunks */
 	chunks = 1;
@@ -410,13 +475,9 @@ int __sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
 		}
 	}
 
-	ret = sg_alloc_table(sgt, chunks, gfp_mask);
-	if (unlikely(ret))
-		return ret;
-
 	/* merging chunks and putting them into the scatterlist */
 	cur_page = 0;
-	for_each_sg(sgt->sgl, s, sgt->orig_nents, i) {
+	for (i = 0; i < chunks; i++) {
 		unsigned int j, chunk_size;
 
 		/* look for the end of the current chunk */
@@ -429,15 +490,30 @@ int __sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
 				break;
 		}
 
+		/* Pass how many chunks might be left */
+		s = get_next_sg(sgt, s, chunks - i + left_pages, gfp_mask);
+		if (IS_ERR(s)) {
+			/*
+			 * Adjust entry length to be as before function was
+			 * called.
+			 */
+			if (prv)
+				prv->length = prv_len;
+			return s;
+		}
 		chunk_size = ((j - cur_page) << PAGE_SHIFT) - offset;
 		sg_set_page(s, pages[cur_page],
 			    min_t(unsigned long, size, chunk_size), offset);
+		added_nents++;
 		size -= chunk_size;
 		offset = 0;
 		cur_page = j;
 	}
-
-	return 0;
+	sgt->nents += added_nents;
+out:
+	if (!left_pages)
+		sg_mark_end(s);
+	return s;
 }
 EXPORT_SYMBOL(__sg_alloc_table_from_pages);
 
@@ -465,8 +541,9 @@ int sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
 			      unsigned int n_pages, unsigned int offset,
 			      unsigned long size, gfp_t gfp_mask)
 {
-	return __sg_alloc_table_from_pages(sgt, pages, n_pages, offset, size,
-					   SCATTERLIST_MAX_SEGMENT, gfp_mask);
+	return PTR_ERR_OR_ZERO(__sg_alloc_table_from_pages(sgt, pages, n_pages,
+			offset, size, SCATTERLIST_MAX_SEGMENT,
+			NULL, 0, gfp_mask));
 }
 EXPORT_SYMBOL(sg_alloc_table_from_pages);
 
diff --git a/tools/testing/scatterlist/main.c b/tools/testing/scatterlist/main.c
index c6db0a1989b2..b2c7e9f7b8d3 100644
--- a/tools/testing/scatterlist/main.c
+++ b/tools/testing/scatterlist/main.c
@@ -79,14 +79,13 @@ int main(void)
 	for (i = 0, test = tests; test->expected_segments; test++, i++) {
 		struct page *pages[MAX_PAGES];
 		struct sg_table st;
-		int ret;
+		struct scatterlist *sg;
 
 		set_pages(pages, test->pfn, test->num_pages);
 
-		ret = __sg_alloc_table_from_pages(&st, pages, test->num_pages,
-						  0, test->size, test->max_seg,
-						  GFP_KERNEL);
-		assert(ret == test->alloc_ret);
+		sg = __sg_alloc_table_from_pages(&st, pages, test->num_pages, 0,
+				test->size, test->max_seg, NULL, 0, GFP_KERNEL);
+		assert(PTR_ERR_OR_ZERO(sg) == test->alloc_ret);
 
 		if (test->alloc_ret)
 			continue;
-- 
cgit v1.2.3


From ec6347bb43395cb92126788a1a5b25302543f815 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Mon, 5 Oct 2020 20:40:16 -0700
Subject: x86, powerpc: Rename memcpy_mcsafe() to copy_mc_to_{user, kernel}()

In reaction to a proposal to introduce a memcpy_mcsafe_fast()
implementation Linus points out that memcpy_mcsafe() is poorly named
relative to communicating the scope of the interface. Specifically what
addresses are valid to pass as source, destination, and what faults /
exceptions are handled.

Of particular concern is that even though x86 might be able to handle
the semantics of copy_mc_to_user() with its common copy_user_generic()
implementation other archs likely need / want an explicit path for this
case:

  On Fri, May 1, 2020 at 11:28 AM Linus Torvalds <torvalds@linux-foundation.org> wrote:
  >
  > On Thu, Apr 30, 2020 at 6:21 PM Dan Williams <dan.j.williams@intel.com> wrote:
  > >
  > > However now I see that copy_user_generic() works for the wrong reason.
  > > It works because the exception on the source address due to poison
  > > looks no different than a write fault on the user address to the
  > > caller, it's still just a short copy. So it makes copy_to_user() work
  > > for the wrong reason relative to the name.
  >
  > Right.
  >
  > And it won't work that way on other architectures. On x86, we have a
  > generic function that can take faults on either side, and we use it
  > for both cases (and for the "in_user" case too), but that's an
  > artifact of the architecture oddity.
  >
  > In fact, it's probably wrong even on x86 - because it can hide bugs -
  > but writing those things is painful enough that everybody prefers
  > having just one function.

Replace a single top-level memcpy_mcsafe() with either
copy_mc_to_user(), or copy_mc_to_kernel().

Introduce an x86 copy_mc_fragile() name as the rename for the
low-level x86 implementation formerly named memcpy_mcsafe(). It is used
as the slow / careful backend that is supplanted by a fast
copy_mc_generic() in a follow-on patch.

One side-effect of this reorganization is that separating copy_mc_64.S
to its own file means that perf no longer needs to track dependencies
for its memcpy_64.S benchmarks.

 [ bp: Massage a bit. ]

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Borislav Petkov <bp@suse.de>
Reviewed-by: Tony Luck <tony.luck@intel.com>
Acked-by: Michael Ellerman <mpe@ellerman.id.au>
Cc: <stable@vger.kernel.org>
Link: http://lore.kernel.org/r/CAHk-=wjSqtXAqfUJxFtWNwmguFASTgB0dz1dT3V-78Quiezqbg@mail.gmail.com
Link: https://lkml.kernel.org/r/160195561680.2163339.11574962055305783722.stgit@dwillia2-desk3.amr.corp.intel.com
---
 arch/powerpc/Kconfig                               |   2 +-
 arch/powerpc/include/asm/string.h                  |   2 -
 arch/powerpc/include/asm/uaccess.h                 |  40 ++--
 arch/powerpc/lib/Makefile                          |   2 +-
 arch/powerpc/lib/copy_mc_64.S                      | 242 +++++++++++++++++++++
 arch/powerpc/lib/memcpy_mcsafe_64.S                | 242 ---------------------
 arch/x86/Kconfig                                   |   2 +-
 arch/x86/Kconfig.debug                             |   2 +-
 arch/x86/include/asm/copy_mc_test.h                |  75 +++++++
 arch/x86/include/asm/mce.h                         |   9 +
 arch/x86/include/asm/mcsafe_test.h                 |  75 -------
 arch/x86/include/asm/string_64.h                   |  32 ---
 arch/x86/include/asm/uaccess.h                     |   9 +
 arch/x86/include/asm/uaccess_64.h                  |  20 --
 arch/x86/kernel/cpu/mce/core.c                     |   8 +-
 arch/x86/kernel/quirks.c                           |  10 +-
 arch/x86/lib/Makefile                              |   1 +
 arch/x86/lib/copy_mc.c                             |  82 +++++++
 arch/x86/lib/copy_mc_64.S                          | 127 +++++++++++
 arch/x86/lib/memcpy_64.S                           | 115 ----------
 arch/x86/lib/usercopy_64.c                         |  21 --
 drivers/md/dm-writecache.c                         |  15 +-
 drivers/nvdimm/claim.c                             |   2 +-
 drivers/nvdimm/pmem.c                              |   6 +-
 include/linux/string.h                             |   9 +-
 include/linux/uaccess.h                            |  13 ++
 include/linux/uio.h                                |  10 +-
 lib/Kconfig                                        |   7 +-
 lib/iov_iter.c                                     |  48 ++--
 tools/arch/x86/include/asm/mcsafe_test.h           |  13 --
 tools/arch/x86/lib/memcpy_64.S                     | 115 ----------
 tools/objtool/check.c                              |   4 +-
 tools/perf/bench/Build                             |   1 -
 tools/perf/bench/mem-memcpy-x86-64-lib.c           |  24 --
 tools/testing/nvdimm/test/nfit.c                   |  49 +++--
 .../testing/selftests/powerpc/copyloops/.gitignore |   2 +-
 tools/testing/selftests/powerpc/copyloops/Makefile |   6 +-
 .../selftests/powerpc/copyloops/copy_mc_64.S       |   1 +
 .../selftests/powerpc/copyloops/memcpy_mcsafe_64.S |   1 -
 39 files changed, 673 insertions(+), 771 deletions(-)
 create mode 100644 arch/powerpc/lib/copy_mc_64.S
 delete mode 100644 arch/powerpc/lib/memcpy_mcsafe_64.S
 create mode 100644 arch/x86/include/asm/copy_mc_test.h
 delete mode 100644 arch/x86/include/asm/mcsafe_test.h
 create mode 100644 arch/x86/lib/copy_mc.c
 create mode 100644 arch/x86/lib/copy_mc_64.S
 delete mode 100644 tools/arch/x86/include/asm/mcsafe_test.h
 delete mode 100644 tools/perf/bench/mem-memcpy-x86-64-lib.c
 create mode 120000 tools/testing/selftests/powerpc/copyloops/copy_mc_64.S
 delete mode 120000 tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S

(limited to 'lib')

diff --git a/arch/powerpc/Kconfig b/arch/powerpc/Kconfig
index 1f48bbfb3ce9..76473eda3426 100644
--- a/arch/powerpc/Kconfig
+++ b/arch/powerpc/Kconfig
@@ -136,7 +136,7 @@ config PPC
 	select ARCH_HAS_STRICT_KERNEL_RWX	if (PPC32 && !HIBERNATION)
 	select ARCH_HAS_TICK_BROADCAST		if GENERIC_CLOCKEVENTS_BROADCAST
 	select ARCH_HAS_UACCESS_FLUSHCACHE
-	select ARCH_HAS_UACCESS_MCSAFE		if PPC64
+	select ARCH_HAS_COPY_MC			if PPC64
 	select ARCH_HAS_UBSAN_SANITIZE_ALL
 	select ARCH_HAVE_NMI_SAFE_CMPXCHG
 	select ARCH_KEEP_MEMBLOCK
diff --git a/arch/powerpc/include/asm/string.h b/arch/powerpc/include/asm/string.h
index 283552cd0e58..2aa0e31e6884 100644
--- a/arch/powerpc/include/asm/string.h
+++ b/arch/powerpc/include/asm/string.h
@@ -53,9 +53,7 @@ void *__memmove(void *to, const void *from, __kernel_size_t n);
 #ifndef CONFIG_KASAN
 #define __HAVE_ARCH_MEMSET32
 #define __HAVE_ARCH_MEMSET64
-#define __HAVE_ARCH_MEMCPY_MCSAFE
 
-extern int memcpy_mcsafe(void *dst, const void *src, __kernel_size_t sz);
 extern void *__memset16(uint16_t *, uint16_t v, __kernel_size_t);
 extern void *__memset32(uint32_t *, uint32_t v, __kernel_size_t);
 extern void *__memset64(uint64_t *, uint64_t v, __kernel_size_t);
diff --git a/arch/powerpc/include/asm/uaccess.h b/arch/powerpc/include/asm/uaccess.h
index 00699903f1ef..20a35373cafc 100644
--- a/arch/powerpc/include/asm/uaccess.h
+++ b/arch/powerpc/include/asm/uaccess.h
@@ -435,6 +435,32 @@ do {								\
 extern unsigned long __copy_tofrom_user(void __user *to,
 		const void __user *from, unsigned long size);
 
+#ifdef CONFIG_ARCH_HAS_COPY_MC
+unsigned long __must_check
+copy_mc_generic(void *to, const void *from, unsigned long size);
+
+static inline unsigned long __must_check
+copy_mc_to_kernel(void *to, const void *from, unsigned long size)
+{
+	return copy_mc_generic(to, from, size);
+}
+#define copy_mc_to_kernel copy_mc_to_kernel
+
+static inline unsigned long __must_check
+copy_mc_to_user(void __user *to, const void *from, unsigned long n)
+{
+	if (likely(check_copy_size(from, n, true))) {
+		if (access_ok(to, n)) {
+			allow_write_to_user(to, n);
+			n = copy_mc_generic((void *)to, from, n);
+			prevent_write_to_user(to, n);
+		}
+	}
+
+	return n;
+}
+#endif
+
 #ifdef __powerpc64__
 static inline unsigned long
 raw_copy_in_user(void __user *to, const void __user *from, unsigned long n)
@@ -523,20 +549,6 @@ raw_copy_to_user(void __user *to, const void *from, unsigned long n)
 	return ret;
 }
 
-static __always_inline unsigned long __must_check
-copy_to_user_mcsafe(void __user *to, const void *from, unsigned long n)
-{
-	if (likely(check_copy_size(from, n, true))) {
-		if (access_ok(to, n)) {
-			allow_write_to_user(to, n);
-			n = memcpy_mcsafe((void *)to, from, n);
-			prevent_write_to_user(to, n);
-		}
-	}
-
-	return n;
-}
-
 unsigned long __arch_clear_user(void __user *addr, unsigned long size);
 
 static inline unsigned long clear_user(void __user *addr, unsigned long size)
diff --git a/arch/powerpc/lib/Makefile b/arch/powerpc/lib/Makefile
index d66a645503eb..69a91b571845 100644
--- a/arch/powerpc/lib/Makefile
+++ b/arch/powerpc/lib/Makefile
@@ -39,7 +39,7 @@ obj-$(CONFIG_PPC_BOOK3S_64) += copyuser_power7.o copypage_power7.o \
 			       memcpy_power7.o
 
 obj64-y	+= copypage_64.o copyuser_64.o mem_64.o hweight_64.o \
-	   memcpy_64.o memcpy_mcsafe_64.o
+	   memcpy_64.o copy_mc_64.o
 
 ifndef CONFIG_PPC_QUEUED_SPINLOCKS
 obj64-$(CONFIG_SMP)	+= locks.o
diff --git a/arch/powerpc/lib/copy_mc_64.S b/arch/powerpc/lib/copy_mc_64.S
new file mode 100644
index 000000000000..88d46c471493
--- /dev/null
+++ b/arch/powerpc/lib/copy_mc_64.S
@@ -0,0 +1,242 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * Copyright (C) IBM Corporation, 2011
+ * Derived from copyuser_power7.s by Anton Blanchard <anton@au.ibm.com>
+ * Author - Balbir Singh <bsingharora@gmail.com>
+ */
+#include <asm/ppc_asm.h>
+#include <asm/errno.h>
+#include <asm/export.h>
+
+	.macro err1
+100:
+	EX_TABLE(100b,.Ldo_err1)
+	.endm
+
+	.macro err2
+200:
+	EX_TABLE(200b,.Ldo_err2)
+	.endm
+
+	.macro err3
+300:	EX_TABLE(300b,.Ldone)
+	.endm
+
+.Ldo_err2:
+	ld	r22,STK_REG(R22)(r1)
+	ld	r21,STK_REG(R21)(r1)
+	ld	r20,STK_REG(R20)(r1)
+	ld	r19,STK_REG(R19)(r1)
+	ld	r18,STK_REG(R18)(r1)
+	ld	r17,STK_REG(R17)(r1)
+	ld	r16,STK_REG(R16)(r1)
+	ld	r15,STK_REG(R15)(r1)
+	ld	r14,STK_REG(R14)(r1)
+	addi	r1,r1,STACKFRAMESIZE
+.Ldo_err1:
+	/* Do a byte by byte copy to get the exact remaining size */
+	mtctr	r7
+46:
+err3;	lbz	r0,0(r4)
+	addi	r4,r4,1
+err3;	stb	r0,0(r3)
+	addi	r3,r3,1
+	bdnz	46b
+	li	r3,0
+	blr
+
+.Ldone:
+	mfctr	r3
+	blr
+
+
+_GLOBAL(copy_mc_generic)
+	mr	r7,r5
+	cmpldi	r5,16
+	blt	.Lshort_copy
+
+.Lcopy:
+	/* Get the source 8B aligned */
+	neg	r6,r4
+	mtocrf	0x01,r6
+	clrldi	r6,r6,(64-3)
+
+	bf	cr7*4+3,1f
+err1;	lbz	r0,0(r4)
+	addi	r4,r4,1
+err1;	stb	r0,0(r3)
+	addi	r3,r3,1
+	subi	r7,r7,1
+
+1:	bf	cr7*4+2,2f
+err1;	lhz	r0,0(r4)
+	addi	r4,r4,2
+err1;	sth	r0,0(r3)
+	addi	r3,r3,2
+	subi	r7,r7,2
+
+2:	bf	cr7*4+1,3f
+err1;	lwz	r0,0(r4)
+	addi	r4,r4,4
+err1;	stw	r0,0(r3)
+	addi	r3,r3,4
+	subi	r7,r7,4
+
+3:	sub	r5,r5,r6
+	cmpldi	r5,128
+
+	mflr	r0
+	stdu	r1,-STACKFRAMESIZE(r1)
+	std	r14,STK_REG(R14)(r1)
+	std	r15,STK_REG(R15)(r1)
+	std	r16,STK_REG(R16)(r1)
+	std	r17,STK_REG(R17)(r1)
+	std	r18,STK_REG(R18)(r1)
+	std	r19,STK_REG(R19)(r1)
+	std	r20,STK_REG(R20)(r1)
+	std	r21,STK_REG(R21)(r1)
+	std	r22,STK_REG(R22)(r1)
+	std	r0,STACKFRAMESIZE+16(r1)
+
+	blt	5f
+	srdi	r6,r5,7
+	mtctr	r6
+
+	/* Now do cacheline (128B) sized loads and stores. */
+	.align	5
+4:
+err2;	ld	r0,0(r4)
+err2;	ld	r6,8(r4)
+err2;	ld	r8,16(r4)
+err2;	ld	r9,24(r4)
+err2;	ld	r10,32(r4)
+err2;	ld	r11,40(r4)
+err2;	ld	r12,48(r4)
+err2;	ld	r14,56(r4)
+err2;	ld	r15,64(r4)
+err2;	ld	r16,72(r4)
+err2;	ld	r17,80(r4)
+err2;	ld	r18,88(r4)
+err2;	ld	r19,96(r4)
+err2;	ld	r20,104(r4)
+err2;	ld	r21,112(r4)
+err2;	ld	r22,120(r4)
+	addi	r4,r4,128
+err2;	std	r0,0(r3)
+err2;	std	r6,8(r3)
+err2;	std	r8,16(r3)
+err2;	std	r9,24(r3)
+err2;	std	r10,32(r3)
+err2;	std	r11,40(r3)
+err2;	std	r12,48(r3)
+err2;	std	r14,56(r3)
+err2;	std	r15,64(r3)
+err2;	std	r16,72(r3)
+err2;	std	r17,80(r3)
+err2;	std	r18,88(r3)
+err2;	std	r19,96(r3)
+err2;	std	r20,104(r3)
+err2;	std	r21,112(r3)
+err2;	std	r22,120(r3)
+	addi	r3,r3,128
+	subi	r7,r7,128
+	bdnz	4b
+
+	clrldi	r5,r5,(64-7)
+
+	/* Up to 127B to go */
+5:	srdi	r6,r5,4
+	mtocrf	0x01,r6
+
+6:	bf	cr7*4+1,7f
+err2;	ld	r0,0(r4)
+err2;	ld	r6,8(r4)
+err2;	ld	r8,16(r4)
+err2;	ld	r9,24(r4)
+err2;	ld	r10,32(r4)
+err2;	ld	r11,40(r4)
+err2;	ld	r12,48(r4)
+err2;	ld	r14,56(r4)
+	addi	r4,r4,64
+err2;	std	r0,0(r3)
+err2;	std	r6,8(r3)
+err2;	std	r8,16(r3)
+err2;	std	r9,24(r3)
+err2;	std	r10,32(r3)
+err2;	std	r11,40(r3)
+err2;	std	r12,48(r3)
+err2;	std	r14,56(r3)
+	addi	r3,r3,64
+	subi	r7,r7,64
+
+7:	ld	r14,STK_REG(R14)(r1)
+	ld	r15,STK_REG(R15)(r1)
+	ld	r16,STK_REG(R16)(r1)
+	ld	r17,STK_REG(R17)(r1)
+	ld	r18,STK_REG(R18)(r1)
+	ld	r19,STK_REG(R19)(r1)
+	ld	r20,STK_REG(R20)(r1)
+	ld	r21,STK_REG(R21)(r1)
+	ld	r22,STK_REG(R22)(r1)
+	addi	r1,r1,STACKFRAMESIZE
+
+	/* Up to 63B to go */
+	bf	cr7*4+2,8f
+err1;	ld	r0,0(r4)
+err1;	ld	r6,8(r4)
+err1;	ld	r8,16(r4)
+err1;	ld	r9,24(r4)
+	addi	r4,r4,32
+err1;	std	r0,0(r3)
+err1;	std	r6,8(r3)
+err1;	std	r8,16(r3)
+err1;	std	r9,24(r3)
+	addi	r3,r3,32
+	subi	r7,r7,32
+
+	/* Up to 31B to go */
+8:	bf	cr7*4+3,9f
+err1;	ld	r0,0(r4)
+err1;	ld	r6,8(r4)
+	addi	r4,r4,16
+err1;	std	r0,0(r3)
+err1;	std	r6,8(r3)
+	addi	r3,r3,16
+	subi	r7,r7,16
+
+9:	clrldi	r5,r5,(64-4)
+
+	/* Up to 15B to go */
+.Lshort_copy:
+	mtocrf	0x01,r5
+	bf	cr7*4+0,12f
+err1;	lwz	r0,0(r4)	/* Less chance of a reject with word ops */
+err1;	lwz	r6,4(r4)
+	addi	r4,r4,8
+err1;	stw	r0,0(r3)
+err1;	stw	r6,4(r3)
+	addi	r3,r3,8
+	subi	r7,r7,8
+
+12:	bf	cr7*4+1,13f
+err1;	lwz	r0,0(r4)
+	addi	r4,r4,4
+err1;	stw	r0,0(r3)
+	addi	r3,r3,4
+	subi	r7,r7,4
+
+13:	bf	cr7*4+2,14f
+err1;	lhz	r0,0(r4)
+	addi	r4,r4,2
+err1;	sth	r0,0(r3)
+	addi	r3,r3,2
+	subi	r7,r7,2
+
+14:	bf	cr7*4+3,15f
+err1;	lbz	r0,0(r4)
+err1;	stb	r0,0(r3)
+
+15:	li	r3,0
+	blr
+
+EXPORT_SYMBOL_GPL(copy_mc_generic);
diff --git a/arch/powerpc/lib/memcpy_mcsafe_64.S b/arch/powerpc/lib/memcpy_mcsafe_64.S
deleted file mode 100644
index cb882d9a6d8a..000000000000
--- a/arch/powerpc/lib/memcpy_mcsafe_64.S
+++ /dev/null
@@ -1,242 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/*
- * Copyright (C) IBM Corporation, 2011
- * Derived from copyuser_power7.s by Anton Blanchard <anton@au.ibm.com>
- * Author - Balbir Singh <bsingharora@gmail.com>
- */
-#include <asm/ppc_asm.h>
-#include <asm/errno.h>
-#include <asm/export.h>
-
-	.macro err1
-100:
-	EX_TABLE(100b,.Ldo_err1)
-	.endm
-
-	.macro err2
-200:
-	EX_TABLE(200b,.Ldo_err2)
-	.endm
-
-	.macro err3
-300:	EX_TABLE(300b,.Ldone)
-	.endm
-
-.Ldo_err2:
-	ld	r22,STK_REG(R22)(r1)
-	ld	r21,STK_REG(R21)(r1)
-	ld	r20,STK_REG(R20)(r1)
-	ld	r19,STK_REG(R19)(r1)
-	ld	r18,STK_REG(R18)(r1)
-	ld	r17,STK_REG(R17)(r1)
-	ld	r16,STK_REG(R16)(r1)
-	ld	r15,STK_REG(R15)(r1)
-	ld	r14,STK_REG(R14)(r1)
-	addi	r1,r1,STACKFRAMESIZE
-.Ldo_err1:
-	/* Do a byte by byte copy to get the exact remaining size */
-	mtctr	r7
-46:
-err3;	lbz	r0,0(r4)
-	addi	r4,r4,1
-err3;	stb	r0,0(r3)
-	addi	r3,r3,1
-	bdnz	46b
-	li	r3,0
-	blr
-
-.Ldone:
-	mfctr	r3
-	blr
-
-
-_GLOBAL(memcpy_mcsafe)
-	mr	r7,r5
-	cmpldi	r5,16
-	blt	.Lshort_copy
-
-.Lcopy:
-	/* Get the source 8B aligned */
-	neg	r6,r4
-	mtocrf	0x01,r6
-	clrldi	r6,r6,(64-3)
-
-	bf	cr7*4+3,1f
-err1;	lbz	r0,0(r4)
-	addi	r4,r4,1
-err1;	stb	r0,0(r3)
-	addi	r3,r3,1
-	subi	r7,r7,1
-
-1:	bf	cr7*4+2,2f
-err1;	lhz	r0,0(r4)
-	addi	r4,r4,2
-err1;	sth	r0,0(r3)
-	addi	r3,r3,2
-	subi	r7,r7,2
-
-2:	bf	cr7*4+1,3f
-err1;	lwz	r0,0(r4)
-	addi	r4,r4,4
-err1;	stw	r0,0(r3)
-	addi	r3,r3,4
-	subi	r7,r7,4
-
-3:	sub	r5,r5,r6
-	cmpldi	r5,128
-
-	mflr	r0
-	stdu	r1,-STACKFRAMESIZE(r1)
-	std	r14,STK_REG(R14)(r1)
-	std	r15,STK_REG(R15)(r1)
-	std	r16,STK_REG(R16)(r1)
-	std	r17,STK_REG(R17)(r1)
-	std	r18,STK_REG(R18)(r1)
-	std	r19,STK_REG(R19)(r1)
-	std	r20,STK_REG(R20)(r1)
-	std	r21,STK_REG(R21)(r1)
-	std	r22,STK_REG(R22)(r1)
-	std	r0,STACKFRAMESIZE+16(r1)
-
-	blt	5f
-	srdi	r6,r5,7
-	mtctr	r6
-
-	/* Now do cacheline (128B) sized loads and stores. */
-	.align	5
-4:
-err2;	ld	r0,0(r4)
-err2;	ld	r6,8(r4)
-err2;	ld	r8,16(r4)
-err2;	ld	r9,24(r4)
-err2;	ld	r10,32(r4)
-err2;	ld	r11,40(r4)
-err2;	ld	r12,48(r4)
-err2;	ld	r14,56(r4)
-err2;	ld	r15,64(r4)
-err2;	ld	r16,72(r4)
-err2;	ld	r17,80(r4)
-err2;	ld	r18,88(r4)
-err2;	ld	r19,96(r4)
-err2;	ld	r20,104(r4)
-err2;	ld	r21,112(r4)
-err2;	ld	r22,120(r4)
-	addi	r4,r4,128
-err2;	std	r0,0(r3)
-err2;	std	r6,8(r3)
-err2;	std	r8,16(r3)
-err2;	std	r9,24(r3)
-err2;	std	r10,32(r3)
-err2;	std	r11,40(r3)
-err2;	std	r12,48(r3)
-err2;	std	r14,56(r3)
-err2;	std	r15,64(r3)
-err2;	std	r16,72(r3)
-err2;	std	r17,80(r3)
-err2;	std	r18,88(r3)
-err2;	std	r19,96(r3)
-err2;	std	r20,104(r3)
-err2;	std	r21,112(r3)
-err2;	std	r22,120(r3)
-	addi	r3,r3,128
-	subi	r7,r7,128
-	bdnz	4b
-
-	clrldi	r5,r5,(64-7)
-
-	/* Up to 127B to go */
-5:	srdi	r6,r5,4
-	mtocrf	0x01,r6
-
-6:	bf	cr7*4+1,7f
-err2;	ld	r0,0(r4)
-err2;	ld	r6,8(r4)
-err2;	ld	r8,16(r4)
-err2;	ld	r9,24(r4)
-err2;	ld	r10,32(r4)
-err2;	ld	r11,40(r4)
-err2;	ld	r12,48(r4)
-err2;	ld	r14,56(r4)
-	addi	r4,r4,64
-err2;	std	r0,0(r3)
-err2;	std	r6,8(r3)
-err2;	std	r8,16(r3)
-err2;	std	r9,24(r3)
-err2;	std	r10,32(r3)
-err2;	std	r11,40(r3)
-err2;	std	r12,48(r3)
-err2;	std	r14,56(r3)
-	addi	r3,r3,64
-	subi	r7,r7,64
-
-7:	ld	r14,STK_REG(R14)(r1)
-	ld	r15,STK_REG(R15)(r1)
-	ld	r16,STK_REG(R16)(r1)
-	ld	r17,STK_REG(R17)(r1)
-	ld	r18,STK_REG(R18)(r1)
-	ld	r19,STK_REG(R19)(r1)
-	ld	r20,STK_REG(R20)(r1)
-	ld	r21,STK_REG(R21)(r1)
-	ld	r22,STK_REG(R22)(r1)
-	addi	r1,r1,STACKFRAMESIZE
-
-	/* Up to 63B to go */
-	bf	cr7*4+2,8f
-err1;	ld	r0,0(r4)
-err1;	ld	r6,8(r4)
-err1;	ld	r8,16(r4)
-err1;	ld	r9,24(r4)
-	addi	r4,r4,32
-err1;	std	r0,0(r3)
-err1;	std	r6,8(r3)
-err1;	std	r8,16(r3)
-err1;	std	r9,24(r3)
-	addi	r3,r3,32
-	subi	r7,r7,32
-
-	/* Up to 31B to go */
-8:	bf	cr7*4+3,9f
-err1;	ld	r0,0(r4)
-err1;	ld	r6,8(r4)
-	addi	r4,r4,16
-err1;	std	r0,0(r3)
-err1;	std	r6,8(r3)
-	addi	r3,r3,16
-	subi	r7,r7,16
-
-9:	clrldi	r5,r5,(64-4)
-
-	/* Up to 15B to go */
-.Lshort_copy:
-	mtocrf	0x01,r5
-	bf	cr7*4+0,12f
-err1;	lwz	r0,0(r4)	/* Less chance of a reject with word ops */
-err1;	lwz	r6,4(r4)
-	addi	r4,r4,8
-err1;	stw	r0,0(r3)
-err1;	stw	r6,4(r3)
-	addi	r3,r3,8
-	subi	r7,r7,8
-
-12:	bf	cr7*4+1,13f
-err1;	lwz	r0,0(r4)
-	addi	r4,r4,4
-err1;	stw	r0,0(r3)
-	addi	r3,r3,4
-	subi	r7,r7,4
-
-13:	bf	cr7*4+2,14f
-err1;	lhz	r0,0(r4)
-	addi	r4,r4,2
-err1;	sth	r0,0(r3)
-	addi	r3,r3,2
-	subi	r7,r7,2
-
-14:	bf	cr7*4+3,15f
-err1;	lbz	r0,0(r4)
-err1;	stb	r0,0(r3)
-
-15:	li	r3,0
-	blr
-
-EXPORT_SYMBOL_GPL(memcpy_mcsafe);
diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 7101ac64bb20..e876b3a087f9 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -75,7 +75,7 @@ config X86
 	select ARCH_HAS_PTE_DEVMAP		if X86_64
 	select ARCH_HAS_PTE_SPECIAL
 	select ARCH_HAS_UACCESS_FLUSHCACHE	if X86_64
-	select ARCH_HAS_UACCESS_MCSAFE		if X86_64 && X86_MCE
+	select ARCH_HAS_COPY_MC			if X86_64
 	select ARCH_HAS_SET_MEMORY
 	select ARCH_HAS_SET_DIRECT_MAP
 	select ARCH_HAS_STRICT_KERNEL_RWX
diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug
index ee1d3c5834c6..27b5e2bc6a01 100644
--- a/arch/x86/Kconfig.debug
+++ b/arch/x86/Kconfig.debug
@@ -62,7 +62,7 @@ config EARLY_PRINTK_USB_XDBC
 	  You should normally say N here, unless you want to debug early
 	  crashes or need a very simple printk logging facility.
 
-config MCSAFE_TEST
+config COPY_MC_TEST
 	def_bool n
 
 config EFI_PGT_DUMP
diff --git a/arch/x86/include/asm/copy_mc_test.h b/arch/x86/include/asm/copy_mc_test.h
new file mode 100644
index 000000000000..e4991ba96726
--- /dev/null
+++ b/arch/x86/include/asm/copy_mc_test.h
@@ -0,0 +1,75 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _COPY_MC_TEST_H_
+#define _COPY_MC_TEST_H_
+
+#ifndef __ASSEMBLY__
+#ifdef CONFIG_COPY_MC_TEST
+extern unsigned long copy_mc_test_src;
+extern unsigned long copy_mc_test_dst;
+
+static inline void copy_mc_inject_src(void *addr)
+{
+	if (addr)
+		copy_mc_test_src = (unsigned long) addr;
+	else
+		copy_mc_test_src = ~0UL;
+}
+
+static inline void copy_mc_inject_dst(void *addr)
+{
+	if (addr)
+		copy_mc_test_dst = (unsigned long) addr;
+	else
+		copy_mc_test_dst = ~0UL;
+}
+#else /* CONFIG_COPY_MC_TEST */
+static inline void copy_mc_inject_src(void *addr)
+{
+}
+
+static inline void copy_mc_inject_dst(void *addr)
+{
+}
+#endif /* CONFIG_COPY_MC_TEST */
+
+#else /* __ASSEMBLY__ */
+#include <asm/export.h>
+
+#ifdef CONFIG_COPY_MC_TEST
+.macro COPY_MC_TEST_CTL
+	.pushsection .data
+	.align 8
+	.globl copy_mc_test_src
+	copy_mc_test_src:
+		.quad 0
+	EXPORT_SYMBOL_GPL(copy_mc_test_src)
+	.globl copy_mc_test_dst
+	copy_mc_test_dst:
+		.quad 0
+	EXPORT_SYMBOL_GPL(copy_mc_test_dst)
+	.popsection
+.endm
+
+.macro COPY_MC_TEST_SRC reg count target
+	leaq \count(\reg), %r9
+	cmp copy_mc_test_src, %r9
+	ja \target
+.endm
+
+.macro COPY_MC_TEST_DST reg count target
+	leaq \count(\reg), %r9
+	cmp copy_mc_test_dst, %r9
+	ja \target
+.endm
+#else
+.macro COPY_MC_TEST_CTL
+.endm
+
+.macro COPY_MC_TEST_SRC reg count target
+.endm
+
+.macro COPY_MC_TEST_DST reg count target
+.endm
+#endif /* CONFIG_COPY_MC_TEST */
+#endif /* __ASSEMBLY__ */
+#endif /* _COPY_MC_TEST_H_ */
diff --git a/arch/x86/include/asm/mce.h b/arch/x86/include/asm/mce.h
index 109af5c7f515..ba2062d6df92 100644
--- a/arch/x86/include/asm/mce.h
+++ b/arch/x86/include/asm/mce.h
@@ -174,6 +174,15 @@ extern void mce_unregister_decode_chain(struct notifier_block *nb);
 
 extern int mce_p5_enabled;
 
+#ifdef CONFIG_ARCH_HAS_COPY_MC
+extern void enable_copy_mc_fragile(void);
+unsigned long __must_check copy_mc_fragile(void *dst, const void *src, unsigned cnt);
+#else
+static inline void enable_copy_mc_fragile(void)
+{
+}
+#endif
+
 #ifdef CONFIG_X86_MCE
 int mcheck_init(void);
 void mcheck_cpu_init(struct cpuinfo_x86 *c);
diff --git a/arch/x86/include/asm/mcsafe_test.h b/arch/x86/include/asm/mcsafe_test.h
deleted file mode 100644
index eb59804b6201..000000000000
--- a/arch/x86/include/asm/mcsafe_test.h
+++ /dev/null
@@ -1,75 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _MCSAFE_TEST_H_
-#define _MCSAFE_TEST_H_
-
-#ifndef __ASSEMBLY__
-#ifdef CONFIG_MCSAFE_TEST
-extern unsigned long mcsafe_test_src;
-extern unsigned long mcsafe_test_dst;
-
-static inline void mcsafe_inject_src(void *addr)
-{
-	if (addr)
-		mcsafe_test_src = (unsigned long) addr;
-	else
-		mcsafe_test_src = ~0UL;
-}
-
-static inline void mcsafe_inject_dst(void *addr)
-{
-	if (addr)
-		mcsafe_test_dst = (unsigned long) addr;
-	else
-		mcsafe_test_dst = ~0UL;
-}
-#else /* CONFIG_MCSAFE_TEST */
-static inline void mcsafe_inject_src(void *addr)
-{
-}
-
-static inline void mcsafe_inject_dst(void *addr)
-{
-}
-#endif /* CONFIG_MCSAFE_TEST */
-
-#else /* __ASSEMBLY__ */
-#include <asm/export.h>
-
-#ifdef CONFIG_MCSAFE_TEST
-.macro MCSAFE_TEST_CTL
-	.pushsection .data
-	.align 8
-	.globl mcsafe_test_src
-	mcsafe_test_src:
-		.quad 0
-	EXPORT_SYMBOL_GPL(mcsafe_test_src)
-	.globl mcsafe_test_dst
-	mcsafe_test_dst:
-		.quad 0
-	EXPORT_SYMBOL_GPL(mcsafe_test_dst)
-	.popsection
-.endm
-
-.macro MCSAFE_TEST_SRC reg count target
-	leaq \count(\reg), %r9
-	cmp mcsafe_test_src, %r9
-	ja \target
-.endm
-
-.macro MCSAFE_TEST_DST reg count target
-	leaq \count(\reg), %r9
-	cmp mcsafe_test_dst, %r9
-	ja \target
-.endm
-#else
-.macro MCSAFE_TEST_CTL
-.endm
-
-.macro MCSAFE_TEST_SRC reg count target
-.endm
-
-.macro MCSAFE_TEST_DST reg count target
-.endm
-#endif /* CONFIG_MCSAFE_TEST */
-#endif /* __ASSEMBLY__ */
-#endif /* _MCSAFE_TEST_H_ */
diff --git a/arch/x86/include/asm/string_64.h b/arch/x86/include/asm/string_64.h
index 75314c3dbe47..6e450827f677 100644
--- a/arch/x86/include/asm/string_64.h
+++ b/arch/x86/include/asm/string_64.h
@@ -82,38 +82,6 @@ int strcmp(const char *cs, const char *ct);
 
 #endif
 
-#define __HAVE_ARCH_MEMCPY_MCSAFE 1
-__must_check unsigned long __memcpy_mcsafe(void *dst, const void *src,
-		size_t cnt);
-DECLARE_STATIC_KEY_FALSE(mcsafe_key);
-
-/**
- * memcpy_mcsafe - copy memory with indication if a machine check happened
- *
- * @dst:	destination address
- * @src:	source address
- * @cnt:	number of bytes to copy
- *
- * Low level memory copy function that catches machine checks
- * We only call into the "safe" function on systems that can
- * actually do machine check recovery. Everyone else can just
- * use memcpy().
- *
- * Return 0 for success, or number of bytes not copied if there was an
- * exception.
- */
-static __always_inline __must_check unsigned long
-memcpy_mcsafe(void *dst, const void *src, size_t cnt)
-{
-#ifdef CONFIG_X86_MCE
-	if (static_branch_unlikely(&mcsafe_key))
-		return __memcpy_mcsafe(dst, src, cnt);
-	else
-#endif
-		memcpy(dst, src, cnt);
-	return 0;
-}
-
 #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
 #define __HAVE_ARCH_MEMCPY_FLUSHCACHE 1
 void __memcpy_flushcache(void *dst, const void *src, size_t cnt);
diff --git a/arch/x86/include/asm/uaccess.h b/arch/x86/include/asm/uaccess.h
index ecefaffd15d4..eff7fb847149 100644
--- a/arch/x86/include/asm/uaccess.h
+++ b/arch/x86/include/asm/uaccess.h
@@ -455,6 +455,15 @@ extern __must_check long strnlen_user(const char __user *str, long n);
 unsigned long __must_check clear_user(void __user *mem, unsigned long len);
 unsigned long __must_check __clear_user(void __user *mem, unsigned long len);
 
+#ifdef CONFIG_ARCH_HAS_COPY_MC
+unsigned long __must_check
+copy_mc_to_kernel(void *to, const void *from, unsigned len);
+#define copy_mc_to_kernel copy_mc_to_kernel
+
+unsigned long __must_check
+copy_mc_to_user(void *to, const void *from, unsigned len);
+#endif
+
 /*
  * movsl can be slow when source and dest are not both 8-byte aligned
  */
diff --git a/arch/x86/include/asm/uaccess_64.h b/arch/x86/include/asm/uaccess_64.h
index bc10e3dc64fe..e7265a552f4f 100644
--- a/arch/x86/include/asm/uaccess_64.h
+++ b/arch/x86/include/asm/uaccess_64.h
@@ -46,22 +46,6 @@ copy_user_generic(void *to, const void *from, unsigned len)
 	return ret;
 }
 
-static __always_inline __must_check unsigned long
-copy_to_user_mcsafe(void *to, const void *from, unsigned len)
-{
-	unsigned long ret;
-
-	__uaccess_begin();
-	/*
-	 * Note, __memcpy_mcsafe() is explicitly used since it can
-	 * handle exceptions / faults.  memcpy_mcsafe() may fall back to
-	 * memcpy() which lacks this handling.
-	 */
-	ret = __memcpy_mcsafe(to, from, len);
-	__uaccess_end();
-	return ret;
-}
-
 static __always_inline __must_check unsigned long
 raw_copy_from_user(void *dst, const void __user *src, unsigned long size)
 {
@@ -102,8 +86,4 @@ __copy_from_user_flushcache(void *dst, const void __user *src, unsigned size)
 	kasan_check_write(dst, size);
 	return __copy_user_flushcache(dst, src, size);
 }
-
-unsigned long
-mcsafe_handle_tail(char *to, char *from, unsigned len);
-
 #endif /* _ASM_X86_UACCESS_64_H */
diff --git a/arch/x86/kernel/cpu/mce/core.c b/arch/x86/kernel/cpu/mce/core.c
index 11b6697be010..b5b70f4b351d 100644
--- a/arch/x86/kernel/cpu/mce/core.c
+++ b/arch/x86/kernel/cpu/mce/core.c
@@ -40,7 +40,6 @@
 #include <linux/debugfs.h>
 #include <linux/irq_work.h>
 #include <linux/export.h>
-#include <linux/jump_label.h>
 #include <linux/set_memory.h>
 #include <linux/sync_core.h>
 #include <linux/task_work.h>
@@ -2124,7 +2123,7 @@ void mce_disable_bank(int bank)
 	and older.
  * mce=nobootlog Don't log MCEs from before booting.
  * mce=bios_cmci_threshold Don't program the CMCI threshold
- * mce=recovery force enable memcpy_mcsafe()
+ * mce=recovery force enable copy_mc_fragile()
  */
 static int __init mcheck_enable(char *str)
 {
@@ -2732,13 +2731,10 @@ static void __init mcheck_debugfs_init(void)
 static void __init mcheck_debugfs_init(void) { }
 #endif
 
-DEFINE_STATIC_KEY_FALSE(mcsafe_key);
-EXPORT_SYMBOL_GPL(mcsafe_key);
-
 static int __init mcheck_late_init(void)
 {
 	if (mca_cfg.recovery)
-		static_branch_inc(&mcsafe_key);
+		enable_copy_mc_fragile();
 
 	mcheck_debugfs_init();
 
diff --git a/arch/x86/kernel/quirks.c b/arch/x86/kernel/quirks.c
index 1b10717c9321..6d0df6a58873 100644
--- a/arch/x86/kernel/quirks.c
+++ b/arch/x86/kernel/quirks.c
@@ -8,6 +8,7 @@
 
 #include <asm/hpet.h>
 #include <asm/setup.h>
+#include <asm/mce.h>
 
 #if defined(CONFIG_X86_IO_APIC) && defined(CONFIG_SMP) && defined(CONFIG_PCI)
 
@@ -624,10 +625,6 @@ static void amd_disable_seq_and_redirect_scrub(struct pci_dev *dev)
 DECLARE_PCI_FIXUP_FINAL(PCI_VENDOR_ID_AMD, PCI_DEVICE_ID_AMD_16H_NB_F3,
 			amd_disable_seq_and_redirect_scrub);
 
-#if defined(CONFIG_X86_64) && defined(CONFIG_X86_MCE)
-#include <linux/jump_label.h>
-#include <asm/string_64.h>
-
 /* Ivy Bridge, Haswell, Broadwell */
 static void quirk_intel_brickland_xeon_ras_cap(struct pci_dev *pdev)
 {
@@ -636,7 +633,7 @@ static void quirk_intel_brickland_xeon_ras_cap(struct pci_dev *pdev)
 	pci_read_config_dword(pdev, 0x84, &capid0);
 
 	if (capid0 & 0x10)
-		static_branch_inc(&mcsafe_key);
+		enable_copy_mc_fragile();
 }
 
 /* Skylake */
@@ -653,7 +650,7 @@ static void quirk_intel_purley_xeon_ras_cap(struct pci_dev *pdev)
 	 * enabled, so memory machine check recovery is also enabled.
 	 */
 	if ((capid0 & 0xc0) == 0xc0 || (capid5 & 0x1e0))
-		static_branch_inc(&mcsafe_key);
+		enable_copy_mc_fragile();
 
 }
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x0ec3, quirk_intel_brickland_xeon_ras_cap);
@@ -661,7 +658,6 @@ DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2fc0, quirk_intel_brickland_xeon_
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x6fc0, quirk_intel_brickland_xeon_ras_cap);
 DECLARE_PCI_FIXUP_EARLY(PCI_VENDOR_ID_INTEL, 0x2083, quirk_intel_purley_xeon_ras_cap);
 #endif
-#endif
 
 bool x86_apple_machine;
 EXPORT_SYMBOL(x86_apple_machine);
diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile
index d46fff11f06f..f7d23c9c22b2 100644
--- a/arch/x86/lib/Makefile
+++ b/arch/x86/lib/Makefile
@@ -44,6 +44,7 @@ obj-$(CONFIG_SMP) += msr-smp.o cache-smp.o
 lib-y := delay.o misc.o cmdline.o cpu.o
 lib-y += usercopy_$(BITS).o usercopy.o getuser.o putuser.o
 lib-y += memcpy_$(BITS).o
+lib-$(CONFIG_ARCH_HAS_COPY_MC) += copy_mc.o copy_mc_64.o
 lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o insn-eval.o
 lib-$(CONFIG_RANDOMIZE_BASE) += kaslr.o
 lib-$(CONFIG_FUNCTION_ERROR_INJECTION)	+= error-inject.o
diff --git a/arch/x86/lib/copy_mc.c b/arch/x86/lib/copy_mc.c
new file mode 100644
index 000000000000..2633635530b7
--- /dev/null
+++ b/arch/x86/lib/copy_mc.c
@@ -0,0 +1,82 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright(c) 2016-2020 Intel Corporation. All rights reserved. */
+
+#include <linux/jump_label.h>
+#include <linux/uaccess.h>
+#include <linux/export.h>
+#include <linux/string.h>
+#include <linux/types.h>
+
+#include <asm/mce.h>
+
+#ifdef CONFIG_X86_MCE
+/*
+ * See COPY_MC_TEST for self-test of the copy_mc_fragile()
+ * implementation.
+ */
+static DEFINE_STATIC_KEY_FALSE(copy_mc_fragile_key);
+
+void enable_copy_mc_fragile(void)
+{
+	static_branch_inc(&copy_mc_fragile_key);
+}
+#define copy_mc_fragile_enabled (static_branch_unlikely(&copy_mc_fragile_key))
+
+/*
+ * Similar to copy_user_handle_tail, probe for the write fault point, or
+ * source exception point.
+ */
+__visible notrace unsigned long
+copy_mc_fragile_handle_tail(char *to, char *from, unsigned len)
+{
+	for (; len; --len, to++, from++)
+		if (copy_mc_fragile(to, from, 1))
+			break;
+	return len;
+}
+#else
+/*
+ * No point in doing careful copying, or consulting a static key when
+ * there is no #MC handler in the CONFIG_X86_MCE=n case.
+ */
+void enable_copy_mc_fragile(void)
+{
+}
+#define copy_mc_fragile_enabled (0)
+#endif
+
+/**
+ * copy_mc_to_kernel - memory copy that handles source exceptions
+ *
+ * @dst:	destination address
+ * @src:	source address
+ * @len:	number of bytes to copy
+ *
+ * Call into the 'fragile' version on systems that have trouble
+ * actually do machine check recovery. Everyone else can just
+ * use memcpy().
+ *
+ * Return 0 for success, or number of bytes not copied if there was an
+ * exception.
+ */
+unsigned long __must_check copy_mc_to_kernel(void *dst, const void *src, unsigned len)
+{
+	if (copy_mc_fragile_enabled)
+		return copy_mc_fragile(dst, src, len);
+	memcpy(dst, src, len);
+	return 0;
+}
+EXPORT_SYMBOL_GPL(copy_mc_to_kernel);
+
+unsigned long __must_check copy_mc_to_user(void *dst, const void *src, unsigned len)
+{
+	unsigned long ret;
+
+	if (!copy_mc_fragile_enabled)
+		return copy_user_generic(dst, src, len);
+
+	__uaccess_begin();
+	ret = copy_mc_fragile(dst, src, len);
+	__uaccess_end();
+	return ret;
+}
diff --git a/arch/x86/lib/copy_mc_64.S b/arch/x86/lib/copy_mc_64.S
new file mode 100644
index 000000000000..c3b613c4544a
--- /dev/null
+++ b/arch/x86/lib/copy_mc_64.S
@@ -0,0 +1,127 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Copyright(c) 2016-2020 Intel Corporation. All rights reserved. */
+
+#include <linux/linkage.h>
+#include <asm/copy_mc_test.h>
+#include <asm/export.h>
+#include <asm/asm.h>
+
+#ifndef CONFIG_UML
+
+#ifdef CONFIG_X86_MCE
+COPY_MC_TEST_CTL
+
+/*
+ * copy_mc_fragile - copy memory with indication if an exception / fault happened
+ *
+ * The 'fragile' version is opted into by platform quirks and takes
+ * pains to avoid unrecoverable corner cases like 'fast-string'
+ * instruction sequences, and consuming poison across a cacheline
+ * boundary. The non-fragile version is equivalent to memcpy()
+ * regardless of CPU machine-check-recovery capability.
+ */
+SYM_FUNC_START(copy_mc_fragile)
+	cmpl $8, %edx
+	/* Less than 8 bytes? Go to byte copy loop */
+	jb .L_no_whole_words
+
+	/* Check for bad alignment of source */
+	testl $7, %esi
+	/* Already aligned */
+	jz .L_8byte_aligned
+
+	/* Copy one byte at a time until source is 8-byte aligned */
+	movl %esi, %ecx
+	andl $7, %ecx
+	subl $8, %ecx
+	negl %ecx
+	subl %ecx, %edx
+.L_read_leading_bytes:
+	movb (%rsi), %al
+	COPY_MC_TEST_SRC %rsi 1 .E_leading_bytes
+	COPY_MC_TEST_DST %rdi 1 .E_leading_bytes
+.L_write_leading_bytes:
+	movb %al, (%rdi)
+	incq %rsi
+	incq %rdi
+	decl %ecx
+	jnz .L_read_leading_bytes
+
+.L_8byte_aligned:
+	movl %edx, %ecx
+	andl $7, %edx
+	shrl $3, %ecx
+	jz .L_no_whole_words
+
+.L_read_words:
+	movq (%rsi), %r8
+	COPY_MC_TEST_SRC %rsi 8 .E_read_words
+	COPY_MC_TEST_DST %rdi 8 .E_write_words
+.L_write_words:
+	movq %r8, (%rdi)
+	addq $8, %rsi
+	addq $8, %rdi
+	decl %ecx
+	jnz .L_read_words
+
+	/* Any trailing bytes? */
+.L_no_whole_words:
+	andl %edx, %edx
+	jz .L_done_memcpy_trap
+
+	/* Copy trailing bytes */
+	movl %edx, %ecx
+.L_read_trailing_bytes:
+	movb (%rsi), %al
+	COPY_MC_TEST_SRC %rsi 1 .E_trailing_bytes
+	COPY_MC_TEST_DST %rdi 1 .E_trailing_bytes
+.L_write_trailing_bytes:
+	movb %al, (%rdi)
+	incq %rsi
+	incq %rdi
+	decl %ecx
+	jnz .L_read_trailing_bytes
+
+	/* Copy successful. Return zero */
+.L_done_memcpy_trap:
+	xorl %eax, %eax
+.L_done:
+	ret
+SYM_FUNC_END(copy_mc_fragile)
+EXPORT_SYMBOL_GPL(copy_mc_fragile)
+
+	.section .fixup, "ax"
+	/*
+	 * Return number of bytes not copied for any failure. Note that
+	 * there is no "tail" handling since the source buffer is 8-byte
+	 * aligned and poison is cacheline aligned.
+	 */
+.E_read_words:
+	shll	$3, %ecx
+.E_leading_bytes:
+	addl	%edx, %ecx
+.E_trailing_bytes:
+	mov	%ecx, %eax
+	jmp	.L_done
+
+	/*
+	 * For write fault handling, given the destination is unaligned,
+	 * we handle faults on multi-byte writes with a byte-by-byte
+	 * copy up to the write-protected page.
+	 */
+.E_write_words:
+	shll	$3, %ecx
+	addl	%edx, %ecx
+	movl	%ecx, %edx
+	jmp copy_mc_fragile_handle_tail
+
+	.previous
+
+	_ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes)
+	_ASM_EXTABLE_FAULT(.L_read_words, .E_read_words)
+	_ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes)
+	_ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)
+	_ASM_EXTABLE(.L_write_words, .E_write_words)
+	_ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
+#endif /* CONFIG_X86_MCE */
+#endif /* !CONFIG_UML */
diff --git a/arch/x86/lib/memcpy_64.S b/arch/x86/lib/memcpy_64.S
index bbcc05bcefad..037faac46b0c 100644
--- a/arch/x86/lib/memcpy_64.S
+++ b/arch/x86/lib/memcpy_64.S
@@ -4,7 +4,6 @@
 #include <linux/linkage.h>
 #include <asm/errno.h>
 #include <asm/cpufeatures.h>
-#include <asm/mcsafe_test.h>
 #include <asm/alternative-asm.h>
 #include <asm/export.h>
 
@@ -187,117 +186,3 @@ SYM_FUNC_START_LOCAL(memcpy_orig)
 SYM_FUNC_END(memcpy_orig)
 
 .popsection
-
-#ifndef CONFIG_UML
-
-MCSAFE_TEST_CTL
-
-/*
- * __memcpy_mcsafe - memory copy with machine check exception handling
- * Note that we only catch machine checks when reading the source addresses.
- * Writes to target are posted and don't generate machine checks.
- */
-SYM_FUNC_START(__memcpy_mcsafe)
-	cmpl $8, %edx
-	/* Less than 8 bytes? Go to byte copy loop */
-	jb .L_no_whole_words
-
-	/* Check for bad alignment of source */
-	testl $7, %esi
-	/* Already aligned */
-	jz .L_8byte_aligned
-
-	/* Copy one byte at a time until source is 8-byte aligned */
-	movl %esi, %ecx
-	andl $7, %ecx
-	subl $8, %ecx
-	negl %ecx
-	subl %ecx, %edx
-.L_read_leading_bytes:
-	movb (%rsi), %al
-	MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes
-	MCSAFE_TEST_DST %rdi 1 .E_leading_bytes
-.L_write_leading_bytes:
-	movb %al, (%rdi)
-	incq %rsi
-	incq %rdi
-	decl %ecx
-	jnz .L_read_leading_bytes
-
-.L_8byte_aligned:
-	movl %edx, %ecx
-	andl $7, %edx
-	shrl $3, %ecx
-	jz .L_no_whole_words
-
-.L_read_words:
-	movq (%rsi), %r8
-	MCSAFE_TEST_SRC %rsi 8 .E_read_words
-	MCSAFE_TEST_DST %rdi 8 .E_write_words
-.L_write_words:
-	movq %r8, (%rdi)
-	addq $8, %rsi
-	addq $8, %rdi
-	decl %ecx
-	jnz .L_read_words
-
-	/* Any trailing bytes? */
-.L_no_whole_words:
-	andl %edx, %edx
-	jz .L_done_memcpy_trap
-
-	/* Copy trailing bytes */
-	movl %edx, %ecx
-.L_read_trailing_bytes:
-	movb (%rsi), %al
-	MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes
-	MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes
-.L_write_trailing_bytes:
-	movb %al, (%rdi)
-	incq %rsi
-	incq %rdi
-	decl %ecx
-	jnz .L_read_trailing_bytes
-
-	/* Copy successful. Return zero */
-.L_done_memcpy_trap:
-	xorl %eax, %eax
-.L_done:
-	ret
-SYM_FUNC_END(__memcpy_mcsafe)
-EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
-
-	.section .fixup, "ax"
-	/*
-	 * Return number of bytes not copied for any failure. Note that
-	 * there is no "tail" handling since the source buffer is 8-byte
-	 * aligned and poison is cacheline aligned.
-	 */
-.E_read_words:
-	shll	$3, %ecx
-.E_leading_bytes:
-	addl	%edx, %ecx
-.E_trailing_bytes:
-	mov	%ecx, %eax
-	jmp	.L_done
-
-	/*
-	 * For write fault handling, given the destination is unaligned,
-	 * we handle faults on multi-byte writes with a byte-by-byte
-	 * copy up to the write-protected page.
-	 */
-.E_write_words:
-	shll	$3, %ecx
-	addl	%edx, %ecx
-	movl	%ecx, %edx
-	jmp mcsafe_handle_tail
-
-	.previous
-
-	_ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes)
-	_ASM_EXTABLE_FAULT(.L_read_words, .E_read_words)
-	_ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes)
-	_ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)
-	_ASM_EXTABLE(.L_write_words, .E_write_words)
-	_ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
-#endif
diff --git a/arch/x86/lib/usercopy_64.c b/arch/x86/lib/usercopy_64.c
index b0dfac3d3df7..5f1d4a9ebd5a 100644
--- a/arch/x86/lib/usercopy_64.c
+++ b/arch/x86/lib/usercopy_64.c
@@ -56,27 +56,6 @@ unsigned long clear_user(void __user *to, unsigned long n)
 }
 EXPORT_SYMBOL(clear_user);
 
-/*
- * Similar to copy_user_handle_tail, probe for the write fault point,
- * but reuse __memcpy_mcsafe in case a new read error is encountered.
- * clac() is handled in _copy_to_iter_mcsafe().
- */
-__visible notrace unsigned long
-mcsafe_handle_tail(char *to, char *from, unsigned len)
-{
-	for (; len; --len, to++, from++) {
-		/*
-		 * Call the assembly routine back directly since
-		 * memcpy_mcsafe() may silently fallback to memcpy.
-		 */
-		unsigned long rem = __memcpy_mcsafe(to, from, 1);
-
-		if (rem)
-			break;
-	}
-	return len;
-}
-
 #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE
 /**
  * clean_cache_range - write back a cache range with CLWB
diff --git a/drivers/md/dm-writecache.c b/drivers/md/dm-writecache.c
index 86dbe0c8b45c..9fc18fadacc6 100644
--- a/drivers/md/dm-writecache.c
+++ b/drivers/md/dm-writecache.c
@@ -49,7 +49,7 @@ do {								\
 #define pmem_assign(dest, src)	((dest) = (src))
 #endif
 
-#if defined(__HAVE_ARCH_MEMCPY_MCSAFE) && defined(DM_WRITECACHE_HAS_PMEM)
+#if IS_ENABLED(CONFIG_ARCH_HAS_COPY_MC) && defined(DM_WRITECACHE_HAS_PMEM)
 #define DM_WRITECACHE_HANDLE_HARDWARE_ERRORS
 #endif
 
@@ -984,7 +984,8 @@ static void writecache_resume(struct dm_target *ti)
 	}
 	wc->freelist_size = 0;
 
-	r = memcpy_mcsafe(&sb_seq_count, &sb(wc)->seq_count, sizeof(uint64_t));
+	r = copy_mc_to_kernel(&sb_seq_count, &sb(wc)->seq_count,
+			      sizeof(uint64_t));
 	if (r) {
 		writecache_error(wc, r, "hardware memory error when reading superblock: %d", r);
 		sb_seq_count = cpu_to_le64(0);
@@ -1000,7 +1001,8 @@ static void writecache_resume(struct dm_target *ti)
 			e->seq_count = -1;
 			continue;
 		}
-		r = memcpy_mcsafe(&wme, memory_entry(wc, e), sizeof(struct wc_memory_entry));
+		r = copy_mc_to_kernel(&wme, memory_entry(wc, e),
+				      sizeof(struct wc_memory_entry));
 		if (r) {
 			writecache_error(wc, r, "hardware memory error when reading metadata entry %lu: %d",
 					 (unsigned long)b, r);
@@ -1198,7 +1200,7 @@ static void bio_copy_block(struct dm_writecache *wc, struct bio *bio, void *data
 
 		if (rw == READ) {
 			int r;
-			r = memcpy_mcsafe(buf, data, size);
+			r = copy_mc_to_kernel(buf, data, size);
 			flush_dcache_page(bio_page(bio));
 			if (unlikely(r)) {
 				writecache_error(wc, r, "hardware memory error when reading data: %d", r);
@@ -2341,7 +2343,7 @@ invalid_optional:
 		}
 	}
 
-	r = memcpy_mcsafe(&s, sb(wc), sizeof(struct wc_memory_superblock));
+	r = copy_mc_to_kernel(&s, sb(wc), sizeof(struct wc_memory_superblock));
 	if (r) {
 		ti->error = "Hardware memory error when reading superblock";
 		goto bad;
@@ -2352,7 +2354,8 @@ invalid_optional:
 			ti->error = "Unable to initialize device";
 			goto bad;
 		}
-		r = memcpy_mcsafe(&s, sb(wc), sizeof(struct wc_memory_superblock));
+		r = copy_mc_to_kernel(&s, sb(wc),
+				      sizeof(struct wc_memory_superblock));
 		if (r) {
 			ti->error = "Hardware memory error when reading superblock";
 			goto bad;
diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
index 45964acba944..22d865ba6353 100644
--- a/drivers/nvdimm/claim.c
+++ b/drivers/nvdimm/claim.c
@@ -268,7 +268,7 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
 	if (rw == READ) {
 		if (unlikely(is_bad_pmem(&nsio->bb, sector, sz_align)))
 			return -EIO;
-		if (memcpy_mcsafe(buf, nsio->addr + offset, size) != 0)
+		if (copy_mc_to_kernel(buf, nsio->addr + offset, size) != 0)
 			return -EIO;
 		return 0;
 	}
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index fab29b514372..5c6939e004e2 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -125,7 +125,7 @@ static blk_status_t read_pmem(struct page *page, unsigned int off,
 	while (len) {
 		mem = kmap_atomic(page);
 		chunk = min_t(unsigned int, len, PAGE_SIZE - off);
-		rem = memcpy_mcsafe(mem + off, pmem_addr, chunk);
+		rem = copy_mc_to_kernel(mem + off, pmem_addr, chunk);
 		kunmap_atomic(mem);
 		if (rem)
 			return BLK_STS_IOERR;
@@ -304,7 +304,7 @@ static long pmem_dax_direct_access(struct dax_device *dax_dev,
 
 /*
  * Use the 'no check' versions of copy_from_iter_flushcache() and
- * copy_to_iter_mcsafe() to bypass HARDENED_USERCOPY overhead. Bounds
+ * copy_mc_to_iter() to bypass HARDENED_USERCOPY overhead. Bounds
  * checking, both file offset and device offset, is handled by
  * dax_iomap_actor()
  */
@@ -317,7 +317,7 @@ static size_t pmem_copy_from_iter(struct dax_device *dax_dev, pgoff_t pgoff,
 static size_t pmem_copy_to_iter(struct dax_device *dax_dev, pgoff_t pgoff,
 		void *addr, size_t bytes, struct iov_iter *i)
 {
-	return _copy_to_iter_mcsafe(addr, bytes, i);
+	return _copy_mc_to_iter(addr, bytes, i);
 }
 
 static const struct dax_operations pmem_dax_ops = {
diff --git a/include/linux/string.h b/include/linux/string.h
index 9b7a0632e87a..b1f3894a0a3e 100644
--- a/include/linux/string.h
+++ b/include/linux/string.h
@@ -161,20 +161,13 @@ extern int bcmp(const void *,const void *,__kernel_size_t);
 #ifndef __HAVE_ARCH_MEMCHR
 extern void * memchr(const void *,int,__kernel_size_t);
 #endif
-#ifndef __HAVE_ARCH_MEMCPY_MCSAFE
-static inline __must_check unsigned long memcpy_mcsafe(void *dst,
-		const void *src, size_t cnt)
-{
-	memcpy(dst, src, cnt);
-	return 0;
-}
-#endif
 #ifndef __HAVE_ARCH_MEMCPY_FLUSHCACHE
 static inline void memcpy_flushcache(void *dst, const void *src, size_t cnt)
 {
 	memcpy(dst, src, cnt);
 }
 #endif
+
 void *memchr_inv(const void *s, int c, size_t n);
 char *strreplace(char *s, char old, char new);
 
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 94b285411659..1ae36bc8db35 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -179,6 +179,19 @@ copy_in_user(void __user *to, const void __user *from, unsigned long n)
 }
 #endif
 
+#ifndef copy_mc_to_kernel
+/*
+ * Without arch opt-in this generic copy_mc_to_kernel() will not handle
+ * #MC (or arch equivalent) during source read.
+ */
+static inline unsigned long __must_check
+copy_mc_to_kernel(void *dst, const void *src, size_t cnt)
+{
+	memcpy(dst, src, cnt);
+	return 0;
+}
+#endif
+
 static __always_inline void pagefault_disabled_inc(void)
 {
 	current->pagefault_disabled++;
diff --git a/include/linux/uio.h b/include/linux/uio.h
index 3835a8a8e9ea..f14410c678bd 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -185,10 +185,10 @@ size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i);
 #define _copy_from_iter_flushcache _copy_from_iter_nocache
 #endif
 
-#ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE
-size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i);
+#ifdef CONFIG_ARCH_HAS_COPY_MC
+size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i);
 #else
-#define _copy_to_iter_mcsafe _copy_to_iter
+#define _copy_mc_to_iter _copy_to_iter
 #endif
 
 static __always_inline __must_check
@@ -201,12 +201,12 @@ size_t copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i)
 }
 
 static __always_inline __must_check
-size_t copy_to_iter_mcsafe(void *addr, size_t bytes, struct iov_iter *i)
+size_t copy_mc_to_iter(void *addr, size_t bytes, struct iov_iter *i)
 {
 	if (unlikely(!check_copy_size(addr, bytes, true)))
 		return 0;
 	else
-		return _copy_to_iter_mcsafe(addr, bytes, i);
+		return _copy_mc_to_iter(addr, bytes, i);
 }
 
 size_t iov_iter_zero(size_t bytes, struct iov_iter *);
diff --git a/lib/Kconfig b/lib/Kconfig
index b4b98a03ff98..b46a9fd122c8 100644
--- a/lib/Kconfig
+++ b/lib/Kconfig
@@ -635,7 +635,12 @@ config UACCESS_MEMCPY
 config ARCH_HAS_UACCESS_FLUSHCACHE
 	bool
 
-config ARCH_HAS_UACCESS_MCSAFE
+# arch has a concept of a recoverable synchronous exception due to a
+# memory-read error like x86 machine-check or ARM data-abort, and
+# implements copy_mc_to_{user,kernel} to abort and report
+# 'bytes-transferred' if that exception fires when accessing the source
+# buffer.
+config ARCH_HAS_COPY_MC
 	bool
 
 # Temporary. Goes away when all archs are cleaned up
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 5e40786c8f12..d13304a034f5 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -637,30 +637,30 @@ size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
 }
 EXPORT_SYMBOL(_copy_to_iter);
 
-#ifdef CONFIG_ARCH_HAS_UACCESS_MCSAFE
-static int copyout_mcsafe(void __user *to, const void *from, size_t n)
+#ifdef CONFIG_ARCH_HAS_COPY_MC
+static int copyout_mc(void __user *to, const void *from, size_t n)
 {
 	if (access_ok(to, n)) {
 		instrument_copy_to_user(to, from, n);
-		n = copy_to_user_mcsafe((__force void *) to, from, n);
+		n = copy_mc_to_user((__force void *) to, from, n);
 	}
 	return n;
 }
 
-static unsigned long memcpy_mcsafe_to_page(struct page *page, size_t offset,
+static unsigned long copy_mc_to_page(struct page *page, size_t offset,
 		const char *from, size_t len)
 {
 	unsigned long ret;
 	char *to;
 
 	to = kmap_atomic(page);
-	ret = memcpy_mcsafe(to + offset, from, len);
+	ret = copy_mc_to_kernel(to + offset, from, len);
 	kunmap_atomic(to);
 
 	return ret;
 }
 
-static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes,
+static size_t copy_mc_pipe_to_iter(const void *addr, size_t bytes,
 				struct iov_iter *i)
 {
 	struct pipe_inode_info *pipe = i->pipe;
@@ -678,7 +678,7 @@ static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes,
 		size_t chunk = min_t(size_t, n, PAGE_SIZE - off);
 		unsigned long rem;
 
-		rem = memcpy_mcsafe_to_page(pipe->bufs[i_head & p_mask].page,
+		rem = copy_mc_to_page(pipe->bufs[i_head & p_mask].page,
 					    off, addr, chunk);
 		i->head = i_head;
 		i->iov_offset = off + chunk - rem;
@@ -695,18 +695,17 @@ static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes,
 }
 
 /**
- * _copy_to_iter_mcsafe - copy to user with source-read error exception handling
+ * _copy_mc_to_iter - copy to iter with source memory error exception handling
  * @addr: source kernel address
  * @bytes: total transfer length
  * @iter: destination iterator
  *
- * The pmem driver arranges for filesystem-dax to use this facility via
- * dax_copy_to_iter() for protecting read/write to persistent memory.
- * Unless / until an architecture can guarantee identical performance
- * between _copy_to_iter_mcsafe() and _copy_to_iter() it would be a
- * performance regression to switch more users to the mcsafe version.
+ * The pmem driver deploys this for the dax operation
+ * (dax_copy_to_iter()) for dax reads (bypass page-cache and the
+ * block-layer). Upon #MC read(2) aborts and returns EIO or the bytes
+ * successfully copied.
  *
- * Otherwise, the main differences between this and typical _copy_to_iter().
+ * The main differences between this and typical _copy_to_iter().
  *
  * * Typical tail/residue handling after a fault retries the copy
  *   byte-by-byte until the fault happens again. Re-triggering machine
@@ -717,23 +716,22 @@ static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes,
  * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies.
  *   Compare to copy_to_iter() where only ITER_IOVEC attempts might return
  *   a short copy.
- *
- * See MCSAFE_TEST for self-test.
  */
-size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i)
+size_t _copy_mc_to_iter(const void *addr, size_t bytes, struct iov_iter *i)
 {
 	const char *from = addr;
 	unsigned long rem, curr_addr, s_addr = (unsigned long) addr;
 
 	if (unlikely(iov_iter_is_pipe(i)))
-		return copy_pipe_to_iter_mcsafe(addr, bytes, i);
+		return copy_mc_pipe_to_iter(addr, bytes, i);
 	if (iter_is_iovec(i))
 		might_fault();
 	iterate_and_advance(i, bytes, v,
-		copyout_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len),
+		copyout_mc(v.iov_base, (from += v.iov_len) - v.iov_len,
+			   v.iov_len),
 		({
-		rem = memcpy_mcsafe_to_page(v.bv_page, v.bv_offset,
-                               (from += v.bv_len) - v.bv_len, v.bv_len);
+		rem = copy_mc_to_page(v.bv_page, v.bv_offset,
+				      (from += v.bv_len) - v.bv_len, v.bv_len);
 		if (rem) {
 			curr_addr = (unsigned long) from;
 			bytes = curr_addr - s_addr - rem;
@@ -741,8 +739,8 @@ size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i)
 		}
 		}),
 		({
-		rem = memcpy_mcsafe(v.iov_base, (from += v.iov_len) - v.iov_len,
-				v.iov_len);
+		rem = copy_mc_to_kernel(v.iov_base, (from += v.iov_len)
+					- v.iov_len, v.iov_len);
 		if (rem) {
 			curr_addr = (unsigned long) from;
 			bytes = curr_addr - s_addr - rem;
@@ -753,8 +751,8 @@ size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i)
 
 	return bytes;
 }
-EXPORT_SYMBOL_GPL(_copy_to_iter_mcsafe);
-#endif /* CONFIG_ARCH_HAS_UACCESS_MCSAFE */
+EXPORT_SYMBOL_GPL(_copy_mc_to_iter);
+#endif /* CONFIG_ARCH_HAS_COPY_MC */
 
 size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i)
 {
diff --git a/tools/arch/x86/include/asm/mcsafe_test.h b/tools/arch/x86/include/asm/mcsafe_test.h
deleted file mode 100644
index 2ccd588fbad4..000000000000
--- a/tools/arch/x86/include/asm/mcsafe_test.h
+++ /dev/null
@@ -1,13 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-#ifndef _MCSAFE_TEST_H_
-#define _MCSAFE_TEST_H_
-
-.macro MCSAFE_TEST_CTL
-.endm
-
-.macro MCSAFE_TEST_SRC reg count target
-.endm
-
-.macro MCSAFE_TEST_DST reg count target
-.endm
-#endif /* _MCSAFE_TEST_H_ */
diff --git a/tools/arch/x86/lib/memcpy_64.S b/tools/arch/x86/lib/memcpy_64.S
index 45f8e1b02241..0b5b8ae56bd9 100644
--- a/tools/arch/x86/lib/memcpy_64.S
+++ b/tools/arch/x86/lib/memcpy_64.S
@@ -4,7 +4,6 @@
 #include <linux/linkage.h>
 #include <asm/errno.h>
 #include <asm/cpufeatures.h>
-#include <asm/mcsafe_test.h>
 #include <asm/alternative-asm.h>
 #include <asm/export.h>
 
@@ -187,117 +186,3 @@ SYM_FUNC_START(memcpy_orig)
 SYM_FUNC_END(memcpy_orig)
 
 .popsection
-
-#ifndef CONFIG_UML
-
-MCSAFE_TEST_CTL
-
-/*
- * __memcpy_mcsafe - memory copy with machine check exception handling
- * Note that we only catch machine checks when reading the source addresses.
- * Writes to target are posted and don't generate machine checks.
- */
-SYM_FUNC_START(__memcpy_mcsafe)
-	cmpl $8, %edx
-	/* Less than 8 bytes? Go to byte copy loop */
-	jb .L_no_whole_words
-
-	/* Check for bad alignment of source */
-	testl $7, %esi
-	/* Already aligned */
-	jz .L_8byte_aligned
-
-	/* Copy one byte at a time until source is 8-byte aligned */
-	movl %esi, %ecx
-	andl $7, %ecx
-	subl $8, %ecx
-	negl %ecx
-	subl %ecx, %edx
-.L_read_leading_bytes:
-	movb (%rsi), %al
-	MCSAFE_TEST_SRC %rsi 1 .E_leading_bytes
-	MCSAFE_TEST_DST %rdi 1 .E_leading_bytes
-.L_write_leading_bytes:
-	movb %al, (%rdi)
-	incq %rsi
-	incq %rdi
-	decl %ecx
-	jnz .L_read_leading_bytes
-
-.L_8byte_aligned:
-	movl %edx, %ecx
-	andl $7, %edx
-	shrl $3, %ecx
-	jz .L_no_whole_words
-
-.L_read_words:
-	movq (%rsi), %r8
-	MCSAFE_TEST_SRC %rsi 8 .E_read_words
-	MCSAFE_TEST_DST %rdi 8 .E_write_words
-.L_write_words:
-	movq %r8, (%rdi)
-	addq $8, %rsi
-	addq $8, %rdi
-	decl %ecx
-	jnz .L_read_words
-
-	/* Any trailing bytes? */
-.L_no_whole_words:
-	andl %edx, %edx
-	jz .L_done_memcpy_trap
-
-	/* Copy trailing bytes */
-	movl %edx, %ecx
-.L_read_trailing_bytes:
-	movb (%rsi), %al
-	MCSAFE_TEST_SRC %rsi 1 .E_trailing_bytes
-	MCSAFE_TEST_DST %rdi 1 .E_trailing_bytes
-.L_write_trailing_bytes:
-	movb %al, (%rdi)
-	incq %rsi
-	incq %rdi
-	decl %ecx
-	jnz .L_read_trailing_bytes
-
-	/* Copy successful. Return zero */
-.L_done_memcpy_trap:
-	xorl %eax, %eax
-.L_done:
-	ret
-SYM_FUNC_END(__memcpy_mcsafe)
-EXPORT_SYMBOL_GPL(__memcpy_mcsafe)
-
-	.section .fixup, "ax"
-	/*
-	 * Return number of bytes not copied for any failure. Note that
-	 * there is no "tail" handling since the source buffer is 8-byte
-	 * aligned and poison is cacheline aligned.
-	 */
-.E_read_words:
-	shll	$3, %ecx
-.E_leading_bytes:
-	addl	%edx, %ecx
-.E_trailing_bytes:
-	mov	%ecx, %eax
-	jmp	.L_done
-
-	/*
-	 * For write fault handling, given the destination is unaligned,
-	 * we handle faults on multi-byte writes with a byte-by-byte
-	 * copy up to the write-protected page.
-	 */
-.E_write_words:
-	shll	$3, %ecx
-	addl	%edx, %ecx
-	movl	%ecx, %edx
-	jmp mcsafe_handle_tail
-
-	.previous
-
-	_ASM_EXTABLE_FAULT(.L_read_leading_bytes, .E_leading_bytes)
-	_ASM_EXTABLE_FAULT(.L_read_words, .E_read_words)
-	_ASM_EXTABLE_FAULT(.L_read_trailing_bytes, .E_trailing_bytes)
-	_ASM_EXTABLE(.L_write_leading_bytes, .E_leading_bytes)
-	_ASM_EXTABLE(.L_write_words, .E_write_words)
-	_ASM_EXTABLE(.L_write_trailing_bytes, .E_trailing_bytes)
-#endif
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index e034a8f24f46..893f021fec63 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -548,8 +548,8 @@ static const char *uaccess_safe_builtin[] = {
 	"__ubsan_handle_shift_out_of_bounds",
 	/* misc */
 	"csum_partial_copy_generic",
-	"__memcpy_mcsafe",
-	"mcsafe_handle_tail",
+	"copy_mc_fragile",
+	"copy_mc_fragile_handle_tail",
 	"ftrace_likely_update", /* CONFIG_TRACE_BRANCH_PROFILING */
 	NULL
 };
diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build
index dd68a40a790c..878db6a59a41 100644
--- a/tools/perf/bench/Build
+++ b/tools/perf/bench/Build
@@ -13,7 +13,6 @@ perf-y += synthesize.o
 perf-y += kallsyms-parse.o
 perf-y += find-bit-bench.o
 
-perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-lib.o
 perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o
 perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o
 
diff --git a/tools/perf/bench/mem-memcpy-x86-64-lib.c b/tools/perf/bench/mem-memcpy-x86-64-lib.c
deleted file mode 100644
index 4130734dde84..000000000000
--- a/tools/perf/bench/mem-memcpy-x86-64-lib.c
+++ /dev/null
@@ -1,24 +0,0 @@
-/*
- * From code in arch/x86/lib/usercopy_64.c, copied to keep tools/ copy
- * of the kernel's arch/x86/lib/memcpy_64.s used in 'perf bench mem memcpy'
- * happy.
- */
-#include <linux/types.h>
-
-unsigned long __memcpy_mcsafe(void *dst, const void *src, size_t cnt);
-unsigned long mcsafe_handle_tail(char *to, char *from, unsigned len);
-
-unsigned long mcsafe_handle_tail(char *to, char *from, unsigned len)
-{
-	for (; len; --len, to++, from++) {
-		/*
-		 * Call the assembly routine back directly since
-		 * memcpy_mcsafe() may silently fallback to memcpy.
-		 */
-		unsigned long rem = __memcpy_mcsafe(to, from, 1);
-
-		if (rem)
-			break;
-	}
-	return len;
-}
diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c
index a1a5dc645b40..2ac0fff6dad8 100644
--- a/tools/testing/nvdimm/test/nfit.c
+++ b/tools/testing/nvdimm/test/nfit.c
@@ -23,7 +23,8 @@
 #include "nfit_test.h"
 #include "../watermark.h"
 
-#include <asm/mcsafe_test.h>
+#include <asm/copy_mc_test.h>
+#include <asm/mce.h>
 
 /*
  * Generate an NFIT table to describe the following topology:
@@ -3283,7 +3284,7 @@ static struct platform_driver nfit_test_driver = {
 	.id_table = nfit_test_id,
 };
 
-static char mcsafe_buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE)));
+static char copy_mc_buf[PAGE_SIZE] __attribute__((__aligned__(PAGE_SIZE)));
 
 enum INJECT {
 	INJECT_NONE,
@@ -3291,7 +3292,7 @@ enum INJECT {
 	INJECT_DST,
 };
 
-static void mcsafe_test_init(char *dst, char *src, size_t size)
+static void copy_mc_test_init(char *dst, char *src, size_t size)
 {
 	size_t i;
 
@@ -3300,7 +3301,7 @@ static void mcsafe_test_init(char *dst, char *src, size_t size)
 		src[i] = (char) i;
 }
 
-static bool mcsafe_test_validate(unsigned char *dst, unsigned char *src,
+static bool copy_mc_test_validate(unsigned char *dst, unsigned char *src,
 		size_t size, unsigned long rem)
 {
 	size_t i;
@@ -3321,12 +3322,12 @@ static bool mcsafe_test_validate(unsigned char *dst, unsigned char *src,
 	return true;
 }
 
-void mcsafe_test(void)
+void copy_mc_test(void)
 {
 	char *inject_desc[] = { "none", "source", "destination" };
 	enum INJECT inj;
 
-	if (IS_ENABLED(CONFIG_MCSAFE_TEST)) {
+	if (IS_ENABLED(CONFIG_COPY_MC_TEST)) {
 		pr_info("%s: run...\n", __func__);
 	} else {
 		pr_info("%s: disabled, skip.\n", __func__);
@@ -3344,31 +3345,31 @@ void mcsafe_test(void)
 
 			switch (inj) {
 			case INJECT_NONE:
-				mcsafe_inject_src(NULL);
-				mcsafe_inject_dst(NULL);
-				dst = &mcsafe_buf[2048];
-				src = &mcsafe_buf[1024 - i];
+				copy_mc_inject_src(NULL);
+				copy_mc_inject_dst(NULL);
+				dst = &copy_mc_buf[2048];
+				src = &copy_mc_buf[1024 - i];
 				expect = 0;
 				break;
 			case INJECT_SRC:
-				mcsafe_inject_src(&mcsafe_buf[1024]);
-				mcsafe_inject_dst(NULL);
-				dst = &mcsafe_buf[2048];
-				src = &mcsafe_buf[1024 - i];
+				copy_mc_inject_src(&copy_mc_buf[1024]);
+				copy_mc_inject_dst(NULL);
+				dst = &copy_mc_buf[2048];
+				src = &copy_mc_buf[1024 - i];
 				expect = 512 - i;
 				break;
 			case INJECT_DST:
-				mcsafe_inject_src(NULL);
-				mcsafe_inject_dst(&mcsafe_buf[2048]);
-				dst = &mcsafe_buf[2048 - i];
-				src = &mcsafe_buf[1024];
+				copy_mc_inject_src(NULL);
+				copy_mc_inject_dst(&copy_mc_buf[2048]);
+				dst = &copy_mc_buf[2048 - i];
+				src = &copy_mc_buf[1024];
 				expect = 512 - i;
 				break;
 			}
 
-			mcsafe_test_init(dst, src, 512);
-			rem = __memcpy_mcsafe(dst, src, 512);
-			valid = mcsafe_test_validate(dst, src, 512, expect);
+			copy_mc_test_init(dst, src, 512);
+			rem = copy_mc_fragile(dst, src, 512);
+			valid = copy_mc_test_validate(dst, src, 512, expect);
 			if (rem == expect && valid)
 				continue;
 			pr_info("%s: copy(%#lx, %#lx, %d) off: %d rem: %ld %s expect: %ld\n",
@@ -3380,8 +3381,8 @@ void mcsafe_test(void)
 		}
 	}
 
-	mcsafe_inject_src(NULL);
-	mcsafe_inject_dst(NULL);
+	copy_mc_inject_src(NULL);
+	copy_mc_inject_dst(NULL);
 }
 
 static __init int nfit_test_init(void)
@@ -3392,7 +3393,7 @@ static __init int nfit_test_init(void)
 	libnvdimm_test();
 	acpi_nfit_test();
 	device_dax_test();
-	mcsafe_test();
+	copy_mc_test();
 	dax_pmem_test();
 	dax_pmem_core_test();
 #ifdef CONFIG_DEV_DAX_PMEM_COMPAT
diff --git a/tools/testing/selftests/powerpc/copyloops/.gitignore b/tools/testing/selftests/powerpc/copyloops/.gitignore
index ddaf140b8255..994b11af765c 100644
--- a/tools/testing/selftests/powerpc/copyloops/.gitignore
+++ b/tools/testing/selftests/powerpc/copyloops/.gitignore
@@ -12,4 +12,4 @@ memcpy_p7_t1
 copyuser_64_exc_t0
 copyuser_64_exc_t1
 copyuser_64_exc_t2
-memcpy_mcsafe_64
+copy_mc_64
diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile
index 0917983a1c78..3095b1f1c02b 100644
--- a/tools/testing/selftests/powerpc/copyloops/Makefile
+++ b/tools/testing/selftests/powerpc/copyloops/Makefile
@@ -12,7 +12,7 @@ ASFLAGS = $(CFLAGS) -Wa,-mpower4
 TEST_GEN_PROGS := copyuser_64_t0 copyuser_64_t1 copyuser_64_t2 \
 		copyuser_p7_t0 copyuser_p7_t1 \
 		memcpy_64_t0 memcpy_64_t1 memcpy_64_t2 \
-		memcpy_p7_t0 memcpy_p7_t1 memcpy_mcsafe_64 \
+		memcpy_p7_t0 memcpy_p7_t1 copy_mc_64 \
 		copyuser_64_exc_t0 copyuser_64_exc_t1 copyuser_64_exc_t2
 
 EXTRA_SOURCES := validate.c ../harness.c stubs.S
@@ -45,9 +45,9 @@ $(OUTPUT)/memcpy_p7_t%:	memcpy_power7.S $(EXTRA_SOURCES)
 		-D SELFTEST_CASE=$(subst memcpy_p7_t,,$(notdir $@)) \
 		-o $@ $^
 
-$(OUTPUT)/memcpy_mcsafe_64: memcpy_mcsafe_64.S $(EXTRA_SOURCES)
+$(OUTPUT)/copy_mc_64: copy_mc_64.S $(EXTRA_SOURCES)
 	$(CC) $(CPPFLAGS) $(CFLAGS) \
-		-D COPY_LOOP=test_memcpy_mcsafe \
+		-D COPY_LOOP=test_copy_mc_generic \
 		-o $@ $^
 
 $(OUTPUT)/copyuser_64_exc_t%: copyuser_64.S exc_validate.c ../harness.c \
diff --git a/tools/testing/selftests/powerpc/copyloops/copy_mc_64.S b/tools/testing/selftests/powerpc/copyloops/copy_mc_64.S
new file mode 120000
index 000000000000..dcbe06d500fb
--- /dev/null
+++ b/tools/testing/selftests/powerpc/copyloops/copy_mc_64.S
@@ -0,0 +1 @@
+../../../../../arch/powerpc/lib/copy_mc_64.S
\ No newline at end of file
diff --git a/tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S b/tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S
deleted file mode 120000
index f0feef3062f6..000000000000
--- a/tools/testing/selftests/powerpc/copyloops/memcpy_mcsafe_64.S
+++ /dev/null
@@ -1 +0,0 @@
-../../../../../arch/powerpc/lib/memcpy_mcsafe_64.S
\ No newline at end of file
-- 
cgit v1.2.3


From bdbb4e29df8b790db50cb73ce25d23543329f05f Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Mon, 5 Oct 2020 15:07:38 -0700
Subject: netlink: add mask validation

We don't have good validation policy for existing unsigned int attrs
which serve as flags (for new ones we could use NLA_BITFIELD32).
With increased use of policy dumping having the validation be
expressed as part of the policy is important. Add validation
policy in form of a mask of supported/valid bits.

Support u64 in the uAPI to be future-proof, but really for now
the embedded mask member can only hold 32 bits, so anything with
bit 32+ set will always fail validation.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netlink.h        | 10 ++++++++++
 include/uapi/linux/netlink.h |  2 ++
 lib/nlattr.c                 | 36 ++++++++++++++++++++++++++++++++++++
 net/netlink/policy.c         |  8 ++++++++
 4 files changed, 56 insertions(+)

(limited to 'lib')

diff --git a/include/net/netlink.h b/include/net/netlink.h
index c5aa46f379bc..2b9e41075f19 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -200,6 +200,7 @@ enum nla_policy_validation {
 	NLA_VALIDATE_RANGE_WARN_TOO_LONG,
 	NLA_VALIDATE_MIN,
 	NLA_VALIDATE_MAX,
+	NLA_VALIDATE_MASK,
 	NLA_VALIDATE_RANGE_PTR,
 	NLA_VALIDATE_FUNCTION,
 };
@@ -317,6 +318,7 @@ struct nla_policy {
 	u16		len;
 	union {
 		const u32 bitfield32_valid;
+		const u32 mask;
 		const char *reject_message;
 		const struct nla_policy *nested_policy;
 		struct netlink_range_validation *range;
@@ -368,6 +370,8 @@ struct nla_policy {
 	(tp == NLA_S8 || tp == NLA_S16 || tp == NLA_S32 || tp == NLA_S64)
 
 #define __NLA_ENSURE(condition) BUILD_BUG_ON_ZERO(!(condition))
+#define NLA_ENSURE_UINT_TYPE(tp)			\
+	(__NLA_ENSURE(__NLA_IS_UINT_TYPE(tp)) + tp)
 #define NLA_ENSURE_UINT_OR_BINARY_TYPE(tp)		\
 	(__NLA_ENSURE(__NLA_IS_UINT_TYPE(tp) ||	\
 		      tp == NLA_MSECS ||		\
@@ -416,6 +420,12 @@ struct nla_policy {
 	.max = _max,					\
 }
 
+#define NLA_POLICY_MASK(tp, _mask) {			\
+	.type = NLA_ENSURE_UINT_TYPE(tp),		\
+	.validation_type = NLA_VALIDATE_MASK,		\
+	.mask = _mask,					\
+}
+
 #define NLA_POLICY_VALIDATE_FN(tp, fn, ...) {		\
 	.type = NLA_ENSURE_NO_VALIDATION_PTR(tp),	\
 	.validation_type = NLA_VALIDATE_FUNCTION,	\
diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h
index eac8a6a648ea..d02e472ba54c 100644
--- a/include/uapi/linux/netlink.h
+++ b/include/uapi/linux/netlink.h
@@ -331,6 +331,7 @@ enum netlink_attribute_type {
  *	the index, if limited inside the nesting (U32)
  * @NL_POLICY_TYPE_ATTR_BITFIELD32_MASK: valid mask for the
  *	bitfield32 type (U32)
+ * @NL_POLICY_TYPE_ATTR_MASK: mask of valid bits for unsigned integers (U64)
  * @NL_POLICY_TYPE_ATTR_PAD: pad attribute for 64-bit alignment
  */
 enum netlink_policy_type_attr {
@@ -346,6 +347,7 @@ enum netlink_policy_type_attr {
 	NL_POLICY_TYPE_ATTR_POLICY_MAXTYPE,
 	NL_POLICY_TYPE_ATTR_BITFIELD32_MASK,
 	NL_POLICY_TYPE_ATTR_PAD,
+	NL_POLICY_TYPE_ATTR_MASK,
 
 	/* keep last */
 	__NL_POLICY_TYPE_ATTR_MAX,
diff --git a/lib/nlattr.c b/lib/nlattr.c
index 80ff9fe83696..9c99f5daa4d2 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -323,6 +323,37 @@ static int nla_validate_int_range(const struct nla_policy *pt,
 	}
 }
 
+static int nla_validate_mask(const struct nla_policy *pt,
+			     const struct nlattr *nla,
+			     struct netlink_ext_ack *extack)
+{
+	u64 value;
+
+	switch (pt->type) {
+	case NLA_U8:
+		value = nla_get_u8(nla);
+		break;
+	case NLA_U16:
+		value = nla_get_u16(nla);
+		break;
+	case NLA_U32:
+		value = nla_get_u32(nla);
+		break;
+	case NLA_U64:
+		value = nla_get_u64(nla);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (value & ~(u64)pt->mask) {
+		NL_SET_ERR_MSG_ATTR(extack, nla, "reserved bit set");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int validate_nla(const struct nlattr *nla, int maxtype,
 			const struct nla_policy *policy, unsigned int validate,
 			struct netlink_ext_ack *extack, unsigned int depth)
@@ -503,6 +534,11 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
 		if (err)
 			return err;
 		break;
+	case NLA_VALIDATE_MASK:
+		err = nla_validate_mask(pt, nla, extack);
+		if (err)
+			return err;
+		break;
 	case NLA_VALIDATE_FUNCTION:
 		if (pt->validate) {
 			err = pt->validate(nla, extack);
diff --git a/net/netlink/policy.c b/net/netlink/policy.c
index cf23c0151721..ee26d01328ee 100644
--- a/net/netlink/policy.c
+++ b/net/netlink/policy.c
@@ -263,6 +263,14 @@ send_attribute:
 		else
 			type = NL_ATTR_TYPE_U64;
 
+		if (pt->validation_type == NLA_VALIDATE_MASK) {
+			if (nla_put_u64_64bit(skb, NL_POLICY_TYPE_ATTR_MASK,
+					      pt->mask,
+					      NL_POLICY_TYPE_ATTR_PAD))
+				goto nla_put_failure;
+			break;
+		}
+
 		nla_get_range_unsigned(pt, &range);
 
 		if (nla_put_u64_64bit(skb, NL_POLICY_TYPE_ATTR_MIN_VALUE_U,
-- 
cgit v1.2.3


From 2b0d3d3e4fcfb19d10f9a82910b8f0f05c56ee3e Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Thu, 1 Oct 2020 23:48:41 +0800
Subject: percpu_ref: reduce memory footprint of percpu_ref in fast path

'struct percpu_ref' is often embedded into one user structure, and the
instance is usually referenced in fast path, however actually only
'percpu_count_ptr' is needed in fast path.

So move other fields into one new structure of 'percpu_ref_data', and
allocate it dynamically via kzalloc(), then memory footprint of
'percpu_ref' in fast path is reduced a lot and becomes suitable to put
into hot cacheline of user structure.

Signed-off-by: Ming Lei <ming.lei@redhat.com>
Tested-by: Veronika Kabatova <vkabatov@redhat.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Acked-by: Tejun Heo <tj@kernel.org>
Cc: Sagi Grimberg <sagi@grimberg.me>
Cc: Tejun Heo <tj@kernel.org>
Cc: Christoph Hellwig <hch@lst.de>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/infiniband/sw/rdmavt/mr.c |   2 +-
 include/linux/percpu-refcount.h   |  52 +++++++--------
 lib/percpu-refcount.c             | 131 ++++++++++++++++++++++++++++----------
 3 files changed, 123 insertions(+), 62 deletions(-)

(limited to 'lib')

diff --git a/drivers/infiniband/sw/rdmavt/mr.c b/drivers/infiniband/sw/rdmavt/mr.c
index 2f7c25fea44a..8490fdb9c91e 100644
--- a/drivers/infiniband/sw/rdmavt/mr.c
+++ b/drivers/infiniband/sw/rdmavt/mr.c
@@ -499,7 +499,7 @@ static int rvt_check_refs(struct rvt_mregion *mr, const char *t)
 		rvt_pr_err(rdi,
 			   "%s timeout mr %p pd %p lkey %x refcount %ld\n",
 			   t, mr, mr->pd, mr->lkey,
-			   atomic_long_read(&mr->refcount.count));
+			   atomic_long_read(&mr->refcount.data->count));
 		rvt_get_mr(mr);
 		return -EBUSY;
 	}
diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h
index 87d8a38bdea1..16c35a728b4c 100644
--- a/include/linux/percpu-refcount.h
+++ b/include/linux/percpu-refcount.h
@@ -92,18 +92,30 @@ enum {
 	PERCPU_REF_ALLOW_REINIT	= 1 << 2,
 };
 
-struct percpu_ref {
+struct percpu_ref_data {
 	atomic_long_t		count;
-	/*
-	 * The low bit of the pointer indicates whether the ref is in percpu
-	 * mode; if set, then get/put will manipulate the atomic_t.
-	 */
-	unsigned long		percpu_count_ptr;
 	percpu_ref_func_t	*release;
 	percpu_ref_func_t	*confirm_switch;
 	bool			force_atomic:1;
 	bool			allow_reinit:1;
 	struct rcu_head		rcu;
+	struct percpu_ref	*ref;
+};
+
+struct percpu_ref {
+	/*
+	 * The low bit of the pointer indicates whether the ref is in percpu
+	 * mode; if set, then get/put will manipulate the atomic_t.
+	 */
+	unsigned long		percpu_count_ptr;
+
+	/*
+	 * 'percpu_ref' is often embedded into user structure, and only
+	 * 'percpu_count_ptr' is required in fast path, move other fields
+	 * into 'percpu_ref_data', so we can reduce memory footprint in
+	 * fast path.
+	 */
+	struct percpu_ref_data  *data;
 };
 
 int __must_check percpu_ref_init(struct percpu_ref *ref,
@@ -118,6 +130,7 @@ void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 				 percpu_ref_func_t *confirm_kill);
 void percpu_ref_resurrect(struct percpu_ref *ref);
 void percpu_ref_reinit(struct percpu_ref *ref);
+bool percpu_ref_is_zero(struct percpu_ref *ref);
 
 /**
  * percpu_ref_kill - drop the initial ref
@@ -191,7 +204,7 @@ static inline void percpu_ref_get_many(struct percpu_ref *ref, unsigned long nr)
 	if (__ref_is_percpu(ref, &percpu_count))
 		this_cpu_add(*percpu_count, nr);
 	else
-		atomic_long_add(nr, &ref->count);
+		atomic_long_add(nr, &ref->data->count);
 
 	rcu_read_unlock();
 }
@@ -231,7 +244,7 @@ static inline bool percpu_ref_tryget_many(struct percpu_ref *ref,
 		this_cpu_add(*percpu_count, nr);
 		ret = true;
 	} else {
-		ret = atomic_long_add_unless(&ref->count, nr, 0);
+		ret = atomic_long_add_unless(&ref->data->count, nr, 0);
 	}
 
 	rcu_read_unlock();
@@ -279,7 +292,7 @@ static inline bool percpu_ref_tryget_live(struct percpu_ref *ref)
 		this_cpu_inc(*percpu_count);
 		ret = true;
 	} else if (!(ref->percpu_count_ptr & __PERCPU_REF_DEAD)) {
-		ret = atomic_long_inc_not_zero(&ref->count);
+		ret = atomic_long_inc_not_zero(&ref->data->count);
 	}
 
 	rcu_read_unlock();
@@ -305,8 +318,8 @@ static inline void percpu_ref_put_many(struct percpu_ref *ref, unsigned long nr)
 
 	if (__ref_is_percpu(ref, &percpu_count))
 		this_cpu_sub(*percpu_count, nr);
-	else if (unlikely(atomic_long_sub_and_test(nr, &ref->count)))
-		ref->release(ref);
+	else if (unlikely(atomic_long_sub_and_test(nr, &ref->data->count)))
+		ref->data->release(ref);
 
 	rcu_read_unlock();
 }
@@ -339,21 +352,4 @@ static inline bool percpu_ref_is_dying(struct percpu_ref *ref)
 	return ref->percpu_count_ptr & __PERCPU_REF_DEAD;
 }
 
-/**
- * percpu_ref_is_zero - test whether a percpu refcount reached zero
- * @ref: percpu_ref to test
- *
- * Returns %true if @ref reached zero.
- *
- * This function is safe to call as long as @ref is between init and exit.
- */
-static inline bool percpu_ref_is_zero(struct percpu_ref *ref)
-{
-	unsigned long __percpu *percpu_count;
-
-	if (__ref_is_percpu(ref, &percpu_count))
-		return false;
-	return !atomic_long_read(&ref->count);
-}
-
 #endif
diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index 0ba686b8fe57..b6350d13538a 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -4,6 +4,7 @@
 #include <linux/kernel.h>
 #include <linux/sched.h>
 #include <linux/wait.h>
+#include <linux/slab.h>
 #include <linux/percpu-refcount.h>
 
 /*
@@ -64,18 +65,25 @@ int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release,
 	size_t align = max_t(size_t, 1 << __PERCPU_REF_FLAG_BITS,
 			     __alignof__(unsigned long));
 	unsigned long start_count = 0;
+	struct percpu_ref_data *data;
 
 	ref->percpu_count_ptr = (unsigned long)
 		__alloc_percpu_gfp(sizeof(unsigned long), align, gfp);
 	if (!ref->percpu_count_ptr)
 		return -ENOMEM;
 
-	ref->force_atomic = flags & PERCPU_REF_INIT_ATOMIC;
-	ref->allow_reinit = flags & PERCPU_REF_ALLOW_REINIT;
+	data = kzalloc(sizeof(*ref->data), gfp);
+	if (!data) {
+		free_percpu((void __percpu *)ref->percpu_count_ptr);
+		return -ENOMEM;
+	}
+
+	data->force_atomic = flags & PERCPU_REF_INIT_ATOMIC;
+	data->allow_reinit = flags & PERCPU_REF_ALLOW_REINIT;
 
 	if (flags & (PERCPU_REF_INIT_ATOMIC | PERCPU_REF_INIT_DEAD)) {
 		ref->percpu_count_ptr |= __PERCPU_REF_ATOMIC;
-		ref->allow_reinit = true;
+		data->allow_reinit = true;
 	} else {
 		start_count += PERCPU_COUNT_BIAS;
 	}
@@ -85,14 +93,28 @@ int percpu_ref_init(struct percpu_ref *ref, percpu_ref_func_t *release,
 	else
 		start_count++;
 
-	atomic_long_set(&ref->count, start_count);
+	atomic_long_set(&data->count, start_count);
 
-	ref->release = release;
-	ref->confirm_switch = NULL;
+	data->release = release;
+	data->confirm_switch = NULL;
+	data->ref = ref;
+	ref->data = data;
 	return 0;
 }
 EXPORT_SYMBOL_GPL(percpu_ref_init);
 
+static void __percpu_ref_exit(struct percpu_ref *ref)
+{
+	unsigned long __percpu *percpu_count = percpu_count_ptr(ref);
+
+	if (percpu_count) {
+		/* non-NULL confirm_switch indicates switching in progress */
+		WARN_ON_ONCE(ref->data->confirm_switch);
+		free_percpu(percpu_count);
+		ref->percpu_count_ptr = __PERCPU_REF_ATOMIC_DEAD;
+	}
+}
+
 /**
  * percpu_ref_exit - undo percpu_ref_init()
  * @ref: percpu_ref to exit
@@ -105,27 +127,36 @@ EXPORT_SYMBOL_GPL(percpu_ref_init);
  */
 void percpu_ref_exit(struct percpu_ref *ref)
 {
-	unsigned long __percpu *percpu_count = percpu_count_ptr(ref);
+	struct percpu_ref_data *data = ref->data;
+	unsigned long flags;
 
-	if (percpu_count) {
-		/* non-NULL confirm_switch indicates switching in progress */
-		WARN_ON_ONCE(ref->confirm_switch);
-		free_percpu(percpu_count);
-		ref->percpu_count_ptr = __PERCPU_REF_ATOMIC_DEAD;
-	}
+	__percpu_ref_exit(ref);
+
+	if (!data)
+		return;
+
+	spin_lock_irqsave(&percpu_ref_switch_lock, flags);
+	ref->percpu_count_ptr |= atomic_long_read(&ref->data->count) <<
+		__PERCPU_REF_FLAG_BITS;
+	ref->data = NULL;
+	spin_unlock_irqrestore(&percpu_ref_switch_lock, flags);
+
+	kfree(data);
 }
 EXPORT_SYMBOL_GPL(percpu_ref_exit);
 
 static void percpu_ref_call_confirm_rcu(struct rcu_head *rcu)
 {
-	struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu);
+	struct percpu_ref_data *data = container_of(rcu,
+			struct percpu_ref_data, rcu);
+	struct percpu_ref *ref = data->ref;
 
-	ref->confirm_switch(ref);
-	ref->confirm_switch = NULL;
+	data->confirm_switch(ref);
+	data->confirm_switch = NULL;
 	wake_up_all(&percpu_ref_switch_waitq);
 
-	if (!ref->allow_reinit)
-		percpu_ref_exit(ref);
+	if (!data->allow_reinit)
+		__percpu_ref_exit(ref);
 
 	/* drop ref from percpu_ref_switch_to_atomic() */
 	percpu_ref_put(ref);
@@ -133,7 +164,9 @@ static void percpu_ref_call_confirm_rcu(struct rcu_head *rcu)
 
 static void percpu_ref_switch_to_atomic_rcu(struct rcu_head *rcu)
 {
-	struct percpu_ref *ref = container_of(rcu, struct percpu_ref, rcu);
+	struct percpu_ref_data *data = container_of(rcu,
+			struct percpu_ref_data, rcu);
+	struct percpu_ref *ref = data->ref;
 	unsigned long __percpu *percpu_count = percpu_count_ptr(ref);
 	unsigned long count = 0;
 	int cpu;
@@ -142,7 +175,7 @@ static void percpu_ref_switch_to_atomic_rcu(struct rcu_head *rcu)
 		count += *per_cpu_ptr(percpu_count, cpu);
 
 	pr_debug("global %lu percpu %lu\n",
-		 atomic_long_read(&ref->count), count);
+		 atomic_long_read(&data->count), count);
 
 	/*
 	 * It's crucial that we sum the percpu counters _before_ adding the sum
@@ -156,11 +189,11 @@ static void percpu_ref_switch_to_atomic_rcu(struct rcu_head *rcu)
 	 * reaching 0 before we add the percpu counts. But doing it at the same
 	 * time is equivalent and saves us atomic operations:
 	 */
-	atomic_long_add((long)count - PERCPU_COUNT_BIAS, &ref->count);
+	atomic_long_add((long)count - PERCPU_COUNT_BIAS, &data->count);
 
-	WARN_ONCE(atomic_long_read(&ref->count) <= 0,
+	WARN_ONCE(atomic_long_read(&data->count) <= 0,
 		  "percpu ref (%ps) <= 0 (%ld) after switching to atomic",
-		  ref->release, atomic_long_read(&ref->count));
+		  data->release, atomic_long_read(&data->count));
 
 	/* @ref is viewed as dead on all CPUs, send out switch confirmation */
 	percpu_ref_call_confirm_rcu(rcu);
@@ -186,10 +219,11 @@ static void __percpu_ref_switch_to_atomic(struct percpu_ref *ref,
 	 * Non-NULL ->confirm_switch is used to indicate that switching is
 	 * in progress.  Use noop one if unspecified.
 	 */
-	ref->confirm_switch = confirm_switch ?: percpu_ref_noop_confirm_switch;
+	ref->data->confirm_switch = confirm_switch ?:
+		percpu_ref_noop_confirm_switch;
 
 	percpu_ref_get(ref);	/* put after confirmation */
-	call_rcu(&ref->rcu, percpu_ref_switch_to_atomic_rcu);
+	call_rcu(&ref->data->rcu, percpu_ref_switch_to_atomic_rcu);
 }
 
 static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref)
@@ -202,10 +236,10 @@ static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref)
 	if (!(ref->percpu_count_ptr & __PERCPU_REF_ATOMIC))
 		return;
 
-	if (WARN_ON_ONCE(!ref->allow_reinit))
+	if (WARN_ON_ONCE(!ref->data->allow_reinit))
 		return;
 
-	atomic_long_add(PERCPU_COUNT_BIAS, &ref->count);
+	atomic_long_add(PERCPU_COUNT_BIAS, &ref->data->count);
 
 	/*
 	 * Restore per-cpu operation.  smp_store_release() is paired
@@ -223,6 +257,8 @@ static void __percpu_ref_switch_to_percpu(struct percpu_ref *ref)
 static void __percpu_ref_switch_mode(struct percpu_ref *ref,
 				     percpu_ref_func_t *confirm_switch)
 {
+	struct percpu_ref_data *data = ref->data;
+
 	lockdep_assert_held(&percpu_ref_switch_lock);
 
 	/*
@@ -230,10 +266,10 @@ static void __percpu_ref_switch_mode(struct percpu_ref *ref,
 	 * its completion.  If the caller ensures that ATOMIC switching
 	 * isn't in progress, this function can be called from any context.
 	 */
-	wait_event_lock_irq(percpu_ref_switch_waitq, !ref->confirm_switch,
+	wait_event_lock_irq(percpu_ref_switch_waitq, !data->confirm_switch,
 			    percpu_ref_switch_lock);
 
-	if (ref->force_atomic || (ref->percpu_count_ptr & __PERCPU_REF_DEAD))
+	if (data->force_atomic || (ref->percpu_count_ptr & __PERCPU_REF_DEAD))
 		__percpu_ref_switch_to_atomic(ref, confirm_switch);
 	else
 		__percpu_ref_switch_to_percpu(ref);
@@ -266,7 +302,7 @@ void percpu_ref_switch_to_atomic(struct percpu_ref *ref,
 
 	spin_lock_irqsave(&percpu_ref_switch_lock, flags);
 
-	ref->force_atomic = true;
+	ref->data->force_atomic = true;
 	__percpu_ref_switch_mode(ref, confirm_switch);
 
 	spin_unlock_irqrestore(&percpu_ref_switch_lock, flags);
@@ -284,7 +320,7 @@ EXPORT_SYMBOL_GPL(percpu_ref_switch_to_atomic);
 void percpu_ref_switch_to_atomic_sync(struct percpu_ref *ref)
 {
 	percpu_ref_switch_to_atomic(ref, NULL);
-	wait_event(percpu_ref_switch_waitq, !ref->confirm_switch);
+	wait_event(percpu_ref_switch_waitq, !ref->data->confirm_switch);
 }
 EXPORT_SYMBOL_GPL(percpu_ref_switch_to_atomic_sync);
 
@@ -312,7 +348,7 @@ void percpu_ref_switch_to_percpu(struct percpu_ref *ref)
 
 	spin_lock_irqsave(&percpu_ref_switch_lock, flags);
 
-	ref->force_atomic = false;
+	ref->data->force_atomic = false;
 	__percpu_ref_switch_mode(ref, NULL);
 
 	spin_unlock_irqrestore(&percpu_ref_switch_lock, flags);
@@ -344,7 +380,8 @@ void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 	spin_lock_irqsave(&percpu_ref_switch_lock, flags);
 
 	WARN_ONCE(ref->percpu_count_ptr & __PERCPU_REF_DEAD,
-		  "%s called more than once on %ps!", __func__, ref->release);
+		  "%s called more than once on %ps!", __func__,
+		  ref->data->release);
 
 	ref->percpu_count_ptr |= __PERCPU_REF_DEAD;
 	__percpu_ref_switch_mode(ref, confirm_kill);
@@ -354,6 +391,34 @@ void percpu_ref_kill_and_confirm(struct percpu_ref *ref,
 }
 EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm);
 
+/**
+ * percpu_ref_is_zero - test whether a percpu refcount reached zero
+ * @ref: percpu_ref to test
+ *
+ * Returns %true if @ref reached zero.
+ *
+ * This function is safe to call as long as @ref is between init and exit.
+ */
+bool percpu_ref_is_zero(struct percpu_ref *ref)
+{
+	unsigned long __percpu *percpu_count;
+	unsigned long count, flags;
+
+	if (__ref_is_percpu(ref, &percpu_count))
+		return false;
+
+	/* protect us from being destroyed */
+	spin_lock_irqsave(&percpu_ref_switch_lock, flags);
+	if (ref->data)
+		count = atomic_long_read(&ref->data->count);
+	else
+		count = ref->percpu_count_ptr >> __PERCPU_REF_FLAG_BITS;
+	spin_unlock_irqrestore(&percpu_ref_switch_lock, flags);
+
+	return count == 0;
+}
+EXPORT_SYMBOL_GPL(percpu_ref_is_zero);
+
 /**
  * percpu_ref_reinit - re-initialize a percpu refcount
  * @ref: perpcu_ref to re-initialize
-- 
cgit v1.2.3


From dd841a749d1ded8e2e5facc4242ee0b6779fc0cb Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Sun, 14 Jun 2020 06:07:10 -0400
Subject: radix tree test suite: Fix compilation

Introducing local_lock broke compilation; fix it all up.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 include/linux/radix-tree.h                  | 1 +
 lib/radix-tree.c                            | 1 -
 tools/testing/radix-tree/linux/kernel.h     | 1 +
 tools/testing/radix-tree/linux/local_lock.h | 8 ++++++++
 tools/testing/radix-tree/test.h             | 4 ----
 5 files changed, 10 insertions(+), 5 deletions(-)
 create mode 100644 tools/testing/radix-tree/linux/local_lock.h

(limited to 'lib')

diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h
index c2a9f7c90727..5c85059a92ba 100644
--- a/include/linux/radix-tree.h
+++ b/include/linux/radix-tree.h
@@ -11,6 +11,7 @@
 #include <linux/bitops.h>
 #include <linux/kernel.h>
 #include <linux/list.h>
+#include <linux/percpu.h>
 #include <linux/preempt.h>
 #include <linux/rcupdate.h>
 #include <linux/spinlock.h>
diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 8e4a3a4397f2..0f10485d46b6 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -20,7 +20,6 @@
 #include <linux/kernel.h>
 #include <linux/kmemleak.h>
 #include <linux/percpu.h>
-#include <linux/local_lock.h>
 #include <linux/preempt.h>		/* in_interrupt() */
 #include <linux/radix-tree.h>
 #include <linux/rcupdate.h>
diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h
index 4568248222ae..39867fd80c8f 100644
--- a/tools/testing/radix-tree/linux/kernel.h
+++ b/tools/testing/radix-tree/linux/kernel.h
@@ -22,4 +22,5 @@
 #define __releases(x)
 #define __must_hold(x)
 
+#define EXPORT_PER_CPU_SYMBOL_GPL(x)
 #endif /* _KERNEL_H */
diff --git a/tools/testing/radix-tree/linux/local_lock.h b/tools/testing/radix-tree/linux/local_lock.h
new file mode 100644
index 000000000000..b3cf8b233ca4
--- /dev/null
+++ b/tools/testing/radix-tree/linux/local_lock.h
@@ -0,0 +1,8 @@
+#ifndef _LINUX_LOCAL_LOCK
+#define _LINUX_LOCAL_LOCK
+typedef struct { } local_lock_t;
+
+static inline void local_lock(local_lock_t *lock) { }
+static inline void local_unlock(local_lock_t *lock) { }
+#define INIT_LOCAL_LOCK(x) { }
+#endif
diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h
index 34dab4d18744..7ef7067e942c 100644
--- a/tools/testing/radix-tree/test.h
+++ b/tools/testing/radix-tree/test.h
@@ -56,8 +56,4 @@ int root_tag_get(struct radix_tree_root *root, unsigned int tag);
 unsigned long node_maxindex(struct radix_tree_node *);
 unsigned long shift_maxindex(unsigned int shift);
 int radix_tree_cpu_dead(unsigned int cpu);
-struct radix_tree_preload {
-	unsigned nr;
-	struct radix_tree_node *nodes;
-};
 extern struct radix_tree_preload radix_tree_preloads;
-- 
cgit v1.2.3


From a219b856a2b993da234108307be772448f22b0ce Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Thu, 2 Apr 2020 14:26:13 -0400
Subject: ida: Free allocated bitmap in error path

If a bitmap needs to be allocated, and then by the time the thread
is scheduled to be run again all the indices which would satisfy the
allocation have been allocated then we would leak the allocation.  Almost
impossible to hit in practice, but a trivial fix.  Found by Coverity.

Fixes: f32f004cddf8 ("ida: Convert to XArray")
Reported-by: coverity-bot <keescook+coverity-bot@chromium.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 lib/idr.c                           |  1 +
 tools/testing/radix-tree/idr-test.c | 29 +++++++++++++++++++++++++++++
 2 files changed, 30 insertions(+)

(limited to 'lib')

diff --git a/lib/idr.c b/lib/idr.c
index c2cf2c52bbde..4d2eef0259d2 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -470,6 +470,7 @@ alloc:
 	goto retry;
 nospc:
 	xas_unlock_irqrestore(&xas, flags);
+	kfree(alloc);
 	return -ENOSPC;
 }
 EXPORT_SYMBOL(ida_alloc_range);
diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c
index 8995092d541e..3b796dd5e577 100644
--- a/tools/testing/radix-tree/idr-test.c
+++ b/tools/testing/radix-tree/idr-test.c
@@ -523,8 +523,27 @@ static void *ida_random_fn(void *arg)
 	return NULL;
 }
 
+static void *ida_leak_fn(void *arg)
+{
+	struct ida *ida = arg;
+	time_t s = time(NULL);
+	int i, ret;
+
+	rcu_register_thread();
+
+	do for (i = 0; i < 1000; i++) {
+		ret = ida_alloc_range(ida, 128, 128, GFP_KERNEL);
+		if (ret >= 0)
+			ida_free(ida, 128);
+	} while (time(NULL) < s + 2);
+
+	rcu_unregister_thread();
+	return NULL;
+}
+
 void ida_thread_tests(void)
 {
+	DEFINE_IDA(ida);
 	pthread_t threads[20];
 	int i;
 
@@ -536,6 +555,16 @@ void ida_thread_tests(void)
 
 	while (i--)
 		pthread_join(threads[i], NULL);
+
+	for (i = 0; i < ARRAY_SIZE(threads); i++)
+		if (pthread_create(&threads[i], NULL, ida_leak_fn, &ida)) {
+			perror("creating ida thread");
+			exit(1);
+		}
+
+	while (i--)
+		pthread_join(threads[i], NULL);
+	assert(ida_is_empty(&ida));
 }
 
 void ida_tests(void)
-- 
cgit v1.2.3


From 062b735912b9f3aa3e14cd02b5ede08cf8bc093f Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Tue, 31 Mar 2020 14:23:59 -0400
Subject: XArray: Test two more things about xa_cmpxchg

1. If we xa_cmpxchg() an entry in, it marks the index as not free.
2. If we xa_cmpxchg() NULL in, it marks the index as free.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 lib/test_xarray.c | 3 +++
 1 file changed, 3 insertions(+)

(limited to 'lib')

diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index d4f97925dbd8..9fc3da430aba 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -393,6 +393,9 @@ static noinline void check_cmpxchg(struct xarray *xa)
 	XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, FIVE, LOTS, GFP_KERNEL) != FIVE);
 	XA_BUG_ON(xa, xa_cmpxchg(xa, 5, FIVE, NULL, GFP_KERNEL) != NULL);
 	XA_BUG_ON(xa, xa_cmpxchg(xa, 5, NULL, FIVE, GFP_KERNEL) != NULL);
+	XA_BUG_ON(xa, xa_insert(xa, 5, FIVE, GFP_KERNEL) != -EBUSY);
+	XA_BUG_ON(xa, xa_cmpxchg(xa, 5, FIVE, NULL, GFP_KERNEL) != FIVE);
+	XA_BUG_ON(xa, xa_insert(xa, 5, FIVE, GFP_KERNEL) == -EBUSY);
 	xa_erase_index(xa, 12345678);
 	xa_erase_index(xa, 5);
 	XA_BUG_ON(xa, !xa_empty(xa));
-- 
cgit v1.2.3


From 04e9e9bb8470bea74eafad1cafd552f3f06c32d9 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Sun, 14 Jun 2020 21:52:04 -0400
Subject: XArray: Test marked multiorder iterations

Demonstrate that starting a marked iteration partway through a marked
multi-order entry works.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 lib/test_xarray.c | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

(limited to 'lib')

diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index 9fc3da430aba..1122c4453c87 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -289,6 +289,27 @@ static noinline void check_xa_mark_2(struct xarray *xa)
 	xa_destroy(xa);
 }
 
+static noinline void check_xa_mark_3(struct xarray *xa)
+{
+#ifdef CONFIG_XARRAY_MULTI
+	XA_STATE(xas, xa, 0x41);
+	void *entry;
+	int count = 0;
+
+	xa_store_order(xa, 0x40, 2, xa_mk_index(0x40), GFP_KERNEL);
+	xa_set_mark(xa, 0x41, XA_MARK_0);
+
+	rcu_read_lock();
+	xas_for_each_marked(&xas, entry, ULONG_MAX, XA_MARK_0) {
+		count++;
+		XA_BUG_ON(xa, entry != xa_mk_index(0x40));
+	}
+	XA_BUG_ON(xa, count != 1);
+	rcu_read_unlock();
+	xa_destroy(xa);
+#endif
+}
+
 static noinline void check_xa_mark(struct xarray *xa)
 {
 	unsigned long index;
@@ -297,6 +318,7 @@ static noinline void check_xa_mark(struct xarray *xa)
 		check_xa_mark_1(xa, index);
 
 	check_xa_mark_2(xa);
+	check_xa_mark_3(xa);
 }
 
 static noinline void check_xa_shrink(struct xarray *xa)
-- 
cgit v1.2.3


From 7ea6bf2e6c409e245a31b9ac6a4ba24949ad442f Mon Sep 17 00:00:00 2001
From: Ming Lei <ming.lei@redhat.com>
Date: Fri, 9 Oct 2020 12:03:56 +0800
Subject: percpu_ref: don't refer to ref->data if it isn't allocated

We can't check ref->data->confirm_switch directly in __percpu_ref_exit(), since
ref->data may not be allocated in one not-initialized refcount.

Fixes: 2b0d3d3e4fcf ("percpu_ref: reduce memory footprint of percpu_ref in fast path")
Reported-by: syzbot+fd15ff734dace9e16437@syzkaller.appspotmail.com
Signed-off-by: Ming Lei <ming.lei@redhat.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 lib/percpu-refcount.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c
index b6350d13538a..e59eda07305e 100644
--- a/lib/percpu-refcount.c
+++ b/lib/percpu-refcount.c
@@ -109,7 +109,7 @@ static void __percpu_ref_exit(struct percpu_ref *ref)
 
 	if (percpu_count) {
 		/* non-NULL confirm_switch indicates switching in progress */
-		WARN_ON_ONCE(ref->data->confirm_switch);
+		WARN_ON_ONCE(ref->data && ref->data->confirm_switch);
 		free_percpu(percpu_count);
 		ref->percpu_count_ptr = __PERCPU_REF_ATOMIC_DEAD;
 	}
-- 
cgit v1.2.3


From aac35468ca20a3a0e75a24c13c0e31610727f120 Mon Sep 17 00:00:00 2001
From: Alan Maguire <alan.maguire@oracle.com>
Date: Tue, 4 Aug 2020 13:47:42 -0700
Subject: kunit: test: create a single centralized executor for all tests

Add a centralized executor to dispatch tests rather than relying on
late_initcall to schedule each test suite separately. Centralized
execution is for built-in tests only; modules will execute tests when
loaded.

Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
Co-developed-by: Iurii Zaikin <yzaikin@google.com>
Signed-off-by: Iurii Zaikin <yzaikin@google.com>
Co-developed-by: Brendan Higgins <brendanhiggins@google.com>
Signed-off-by: Brendan Higgins <brendanhiggins@google.com>
Reviewed-by: Stephen Boyd <sboyd@kernel.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 include/kunit/test.h | 67 +++++++++++++++++++++++++++++++++++-----------------
 lib/kunit/Makefile   |  3 ++-
 lib/kunit/executor.c | 28 ++++++++++++++++++++++
 lib/kunit/test.c     |  2 +-
 4 files changed, 76 insertions(+), 24 deletions(-)
 create mode 100644 lib/kunit/executor.c

(limited to 'lib')

diff --git a/include/kunit/test.h b/include/kunit/test.h
index 59f3144f009a..d2d261f58259 100644
--- a/include/kunit/test.h
+++ b/include/kunit/test.h
@@ -233,7 +233,7 @@ size_t kunit_suite_num_test_cases(struct kunit_suite *suite);
 unsigned int kunit_test_case_num(struct kunit_suite *suite,
 				 struct kunit_case *test_case);
 
-int __kunit_test_suites_init(struct kunit_suite **suites);
+int __kunit_test_suites_init(struct kunit_suite * const * const suites);
 
 void __kunit_test_suites_exit(struct kunit_suite **suites);
 
@@ -246,34 +246,57 @@ void __kunit_test_suites_exit(struct kunit_suite **suites);
  * Registers @suites_list with the test framework. See &struct kunit_suite for
  * more information.
  *
- * When builtin, KUnit tests are all run as late_initcalls; this means
- * that they cannot test anything where tests must run at a different init
- * phase. One significant restriction resulting from this is that KUnit
- * cannot reliably test anything that is initialize in the late_init phase;
- * another is that KUnit is useless to test things that need to be run in
- * an earlier init phase.
- *
- * An alternative is to build the tests as a module.  Because modules
- * do not support multiple late_initcall()s, we need to initialize an
- * array of suites for a module.
- *
- * TODO(brendanhiggins@google.com): Don't run all KUnit tests as
- * late_initcalls.  I have some future work planned to dispatch all KUnit
- * tests from the same place, and at the very least to do so after
- * everything else is definitely initialized.
+ * If a test suite is built-in, module_init() gets translated into
+ * an initcall which we don't want as the idea is that for builtins
+ * the executor will manage execution.  So ensure we do not define
+ * module_{init|exit} functions for the builtin case when registering
+ * suites via kunit_test_suites() below.
  */
-#define kunit_test_suites(suites_list...)				\
-	static struct kunit_suite *suites[] = {suites_list, NULL};	\
-	static int kunit_test_suites_init(void)				\
+#ifdef MODULE
+#define kunit_test_suites_for_module(__suites)				\
+	static int __init kunit_test_suites_init(void)			\
 	{								\
-		return __kunit_test_suites_init(suites);		\
+		return __kunit_test_suites_init(__suites);		\
 	}								\
-	late_initcall(kunit_test_suites_init);				\
+	module_init(kunit_test_suites_init);				\
+									\
 	static void __exit kunit_test_suites_exit(void)			\
 	{								\
-		return __kunit_test_suites_exit(suites);		\
+		return __kunit_test_suites_exit(__suites);		\
 	}								\
 	module_exit(kunit_test_suites_exit)
+#else
+#define kunit_test_suites_for_module(__suites)
+#endif /* MODULE */
+
+#define __kunit_test_suites(unique_array, unique_suites, ...)		       \
+	static struct kunit_suite *unique_array[] = { __VA_ARGS__, NULL };     \
+	kunit_test_suites_for_module(unique_array);			       \
+	static struct kunit_suite **unique_suites			       \
+	__used __section(.kunit_test_suites) = unique_array
+
+/**
+ * kunit_test_suites() - used to register one or more &struct kunit_suite
+ *			 with KUnit.
+ *
+ * @suites: a statically allocated list of &struct kunit_suite.
+ *
+ * Registers @suites with the test framework. See &struct kunit_suite for
+ * more information.
+ *
+ * When builtin,  KUnit tests are all run via executor; this is done
+ * by placing the array of struct kunit_suite * in the .kunit_test_suites
+ * ELF section.
+ *
+ * An alternative is to build the tests as a module.  Because modules do not
+ * support multiple initcall()s, we need to initialize an array of suites for a
+ * module.
+ *
+ */
+#define kunit_test_suites(...)						\
+	__kunit_test_suites(__UNIQUE_ID(array),				\
+			    __UNIQUE_ID(suites),			\
+			    __VA_ARGS__)
 
 #define kunit_test_suite(suite)	kunit_test_suites(&suite)
 
diff --git a/lib/kunit/Makefile b/lib/kunit/Makefile
index 724b94311ca3..c49f4ffb6273 100644
--- a/lib/kunit/Makefile
+++ b/lib/kunit/Makefile
@@ -3,7 +3,8 @@ obj-$(CONFIG_KUNIT) +=			kunit.o
 kunit-objs +=				test.o \
 					string-stream.o \
 					assert.o \
-					try-catch.o
+					try-catch.o \
+					executor.o
 
 ifeq ($(CONFIG_KUNIT_DEBUGFS),y)
 kunit-objs +=				debugfs.o
diff --git a/lib/kunit/executor.c b/lib/kunit/executor.c
new file mode 100644
index 000000000000..7015e7328dce
--- /dev/null
+++ b/lib/kunit/executor.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <kunit/test.h>
+
+/*
+ * These symbols point to the .kunit_test_suites section and are defined in
+ * include/asm-generic/vmlinux.lds.h, and consequently must be extern.
+ */
+extern struct kunit_suite * const * const __kunit_suites_start[];
+extern struct kunit_suite * const * const __kunit_suites_end[];
+
+#if IS_BUILTIN(CONFIG_KUNIT)
+
+static int kunit_run_all_tests(void)
+{
+	struct kunit_suite * const * const *suites;
+
+	for (suites = __kunit_suites_start;
+	     suites < __kunit_suites_end;
+	     suites++)
+			__kunit_test_suites_init(*suites);
+
+	return 0;
+}
+
+late_initcall(kunit_run_all_tests);
+
+#endif /* IS_BUILTIN(CONFIG_KUNIT) */
diff --git a/lib/kunit/test.c b/lib/kunit/test.c
index c36037200310..3fd89f91937f 100644
--- a/lib/kunit/test.c
+++ b/lib/kunit/test.c
@@ -381,7 +381,7 @@ static void kunit_init_suite(struct kunit_suite *suite)
 	kunit_debugfs_create_suite(suite);
 }
 
-int __kunit_test_suites_init(struct kunit_suite **suites)
+int __kunit_test_suites_init(struct kunit_suite * const * const suites)
 {
 	unsigned int i;
 
-- 
cgit v1.2.3


From 8c0d884986ba22f1020be9c02e41c030890ee8f2 Mon Sep 17 00:00:00 2001
From: Brendan Higgins <brendanhiggins@google.com>
Date: Tue, 4 Aug 2020 13:47:43 -0700
Subject: init: main: add KUnit to kernel init

Although we have not seen any actual examples where KUnit doesn't work
because it runs in the late init phase of the kernel, it has been a
concern for some time that this could potentially be an issue in the
future. So, remove KUnit from init calls entirely, instead call directly
from kernel_init() so that KUnit runs after late init.

Co-developed-by: Alan Maguire <alan.maguire@oracle.com>
Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
Signed-off-by: Brendan Higgins <brendanhiggins@google.com>
Reviewed-by: Stephen Boyd <sboyd@kernel.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 include/kunit/test.h | 9 +++++++++
 init/main.c          | 4 ++++
 lib/kunit/executor.c | 4 +---
 3 files changed, 14 insertions(+), 3 deletions(-)

(limited to 'lib')

diff --git a/include/kunit/test.h b/include/kunit/test.h
index d2d261f58259..7ea24466e49c 100644
--- a/include/kunit/test.h
+++ b/include/kunit/test.h
@@ -237,6 +237,15 @@ int __kunit_test_suites_init(struct kunit_suite * const * const suites);
 
 void __kunit_test_suites_exit(struct kunit_suite **suites);
 
+#if IS_BUILTIN(CONFIG_KUNIT)
+int kunit_run_all_tests(void);
+#else
+static inline int kunit_run_all_tests(void)
+{
+	return 0;
+}
+#endif /* IS_BUILTIN(CONFIG_KUNIT) */
+
 /**
  * kunit_test_suites() - used to register one or more &struct kunit_suite
  *			 with KUnit.
diff --git a/init/main.c b/init/main.c
index ae78fb68d231..232c8df465ee 100644
--- a/init/main.c
+++ b/init/main.c
@@ -107,6 +107,8 @@
 #define CREATE_TRACE_POINTS
 #include <trace/events/initcall.h>
 
+#include <kunit/test.h>
+
 static int kernel_init(void *);
 
 extern void init_IRQ(void);
@@ -1511,6 +1513,8 @@ static noinline void __init kernel_init_freeable(void)
 
 	do_basic_setup();
 
+	kunit_run_all_tests();
+
 	console_on_rootfs();
 
 	/*
diff --git a/lib/kunit/executor.c b/lib/kunit/executor.c
index 7015e7328dce..4aab7f70a88c 100644
--- a/lib/kunit/executor.c
+++ b/lib/kunit/executor.c
@@ -11,7 +11,7 @@ extern struct kunit_suite * const * const __kunit_suites_end[];
 
 #if IS_BUILTIN(CONFIG_KUNIT)
 
-static int kunit_run_all_tests(void)
+int kunit_run_all_tests(void)
 {
 	struct kunit_suite * const * const *suites;
 
@@ -23,6 +23,4 @@ static int kunit_run_all_tests(void)
 	return 0;
 }
 
-late_initcall(kunit_run_all_tests);
-
 #endif /* IS_BUILTIN(CONFIG_KUNIT) */
-- 
cgit v1.2.3


From 45dcbb6f5ef78b0a9c1b91bea2f6f227642a65aa Mon Sep 17 00:00:00 2001
From: Brendan Higgins <brendanhiggins@google.com>
Date: Tue, 4 Aug 2020 13:47:44 -0700
Subject: kunit: test: add test plan to KUnit TAP format

TAP 14 allows an optional test plan to be emitted before the start of
the start of testing[1]; this is valuable because it makes it possible
for a test harness to detect whether the number of tests run matches the
number of tests expected to be run, ensuring that no tests silently
failed.

Link[1]: https://github.com/isaacs/testanything.github.io/blob/tap14/tap-version-14-specification.md#the-plan
Signed-off-by: Brendan Higgins <brendanhiggins@google.com>
Reviewed-by: Stephen Boyd <sboyd@kernel.org>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 lib/kunit/executor.c                               | 17 +++++
 lib/kunit/test.c                                   | 11 ----
 tools/testing/kunit/kunit_parser.py                | 76 ++++++++++++++++++----
 .../test_data/test_is_test_passed-all_passed.log   |  1 +
 .../kunit/test_data/test_is_test_passed-crash.log  |  1 +
 .../test_data/test_is_test_passed-failure.log      |  1 +
 6 files changed, 82 insertions(+), 25 deletions(-)

(limited to 'lib')

diff --git a/lib/kunit/executor.c b/lib/kunit/executor.c
index 4aab7f70a88c..a95742a4ece7 100644
--- a/lib/kunit/executor.c
+++ b/lib/kunit/executor.c
@@ -11,10 +11,27 @@ extern struct kunit_suite * const * const __kunit_suites_end[];
 
 #if IS_BUILTIN(CONFIG_KUNIT)
 
+static void kunit_print_tap_header(void)
+{
+	struct kunit_suite * const * const *suites, * const *subsuite;
+	int num_of_suites = 0;
+
+	for (suites = __kunit_suites_start;
+	     suites < __kunit_suites_end;
+	     suites++)
+		for (subsuite = *suites; *subsuite != NULL; subsuite++)
+			num_of_suites++;
+
+	pr_info("TAP version 14\n");
+	pr_info("1..%d\n", num_of_suites);
+}
+
 int kunit_run_all_tests(void)
 {
 	struct kunit_suite * const * const *suites;
 
+	kunit_print_tap_header();
+
 	for (suites = __kunit_suites_start;
 	     suites < __kunit_suites_end;
 	     suites++)
diff --git a/lib/kunit/test.c b/lib/kunit/test.c
index 3fd89f91937f..de07876b6601 100644
--- a/lib/kunit/test.c
+++ b/lib/kunit/test.c
@@ -20,16 +20,6 @@ static void kunit_set_failure(struct kunit *test)
 	WRITE_ONCE(test->success, false);
 }
 
-static void kunit_print_tap_version(void)
-{
-	static bool kunit_has_printed_tap_version;
-
-	if (!kunit_has_printed_tap_version) {
-		pr_info("TAP version 14\n");
-		kunit_has_printed_tap_version = true;
-	}
-}
-
 /*
  * Append formatted message to log, size of which is limited to
  * KUNIT_LOG_SIZE bytes (including null terminating byte).
@@ -69,7 +59,6 @@ EXPORT_SYMBOL_GPL(kunit_suite_num_test_cases);
 
 static void kunit_print_subtest_start(struct kunit_suite *suite)
 {
-	kunit_print_tap_version();
 	kunit_log(KERN_INFO, suite, KUNIT_SUBTEST_INDENT "# Subtest: %s",
 		  suite->name);
 	kunit_log(KERN_INFO, suite, KUNIT_SUBTEST_INDENT "1..%zd",
diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py
index f13e0c0d6663..8019e3dd4c32 100644
--- a/tools/testing/kunit/kunit_parser.py
+++ b/tools/testing/kunit/kunit_parser.py
@@ -45,10 +45,11 @@ class TestStatus(Enum):
 	FAILURE = auto()
 	TEST_CRASHED = auto()
 	NO_TESTS = auto()
+	FAILURE_TO_PARSE_TESTS = auto()
 
 kunit_start_re = re.compile(r'TAP version [0-9]+$')
 kunit_end_re = re.compile('(List of all partitions:|'
-			  'Kernel panic - not syncing: VFS:|reboot: System halted)')
+			  'Kernel panic - not syncing: VFS:)')
 
 def isolate_kunit_output(kernel_output):
 	started = False
@@ -109,7 +110,7 @@ OkNotOkResult = namedtuple('OkNotOkResult', ['is_ok','description', 'text'])
 
 OK_NOT_OK_SUBTEST = re.compile(r'^[\s]+(ok|not ok) [0-9]+ - (.*)$')
 
-OK_NOT_OK_MODULE = re.compile(r'^(ok|not ok) [0-9]+ - (.*)$')
+OK_NOT_OK_MODULE = re.compile(r'^(ok|not ok) ([0-9]+) - (.*)$')
 
 def parse_ok_not_ok_test_case(lines: List[str], test_case: TestCase) -> bool:
 	save_non_diagnositic(lines, test_case)
@@ -197,7 +198,9 @@ def max_status(left: TestStatus, right: TestStatus) -> TestStatus:
 	else:
 		return TestStatus.SUCCESS
 
-def parse_ok_not_ok_test_suite(lines: List[str], test_suite: TestSuite) -> bool:
+def parse_ok_not_ok_test_suite(lines: List[str],
+			       test_suite: TestSuite,
+			       expected_suite_index: int) -> bool:
 	consume_non_diagnositic(lines)
 	if not lines:
 		test_suite.status = TestStatus.TEST_CRASHED
@@ -210,6 +213,12 @@ def parse_ok_not_ok_test_suite(lines: List[str], test_suite: TestSuite) -> bool:
 			test_suite.status = TestStatus.SUCCESS
 		else:
 			test_suite.status = TestStatus.FAILURE
+		suite_index = int(match.group(2))
+		if suite_index != expected_suite_index:
+			print_with_timestamp(
+				red('[ERROR] ') + 'expected_suite_index ' +
+				str(expected_suite_index) + ', but got ' +
+				str(suite_index))
 		return True
 	else:
 		return False
@@ -222,7 +231,7 @@ def bubble_up_test_case_errors(test_suite: TestSuite) -> TestStatus:
 	max_test_case_status = bubble_up_errors(lambda x: x.status, test_suite.cases)
 	return max_status(max_test_case_status, test_suite.status)
 
-def parse_test_suite(lines: List[str]) -> TestSuite:
+def parse_test_suite(lines: List[str], expected_suite_index: int) -> TestSuite:
 	if not lines:
 		return None
 	consume_non_diagnositic(lines)
@@ -241,7 +250,7 @@ def parse_test_suite(lines: List[str]) -> TestSuite:
 			break
 		test_suite.cases.append(test_case)
 		expected_test_case_num -= 1
-	if parse_ok_not_ok_test_suite(lines, test_suite):
+	if parse_ok_not_ok_test_suite(lines, test_suite, expected_suite_index):
 		test_suite.status = bubble_up_test_case_errors(test_suite)
 		return test_suite
 	elif not lines:
@@ -261,6 +270,17 @@ def parse_tap_header(lines: List[str]) -> bool:
 	else:
 		return False
 
+TEST_PLAN = re.compile(r'[0-9]+\.\.([0-9]+)')
+
+def parse_test_plan(lines: List[str]) -> int:
+	consume_non_diagnositic(lines)
+	match = TEST_PLAN.match(lines[0])
+	if match:
+		lines.pop(0)
+		return int(match.group(1))
+	else:
+		return None
+
 def bubble_up_suite_errors(test_suite_list: List[TestSuite]) -> TestStatus:
 	return bubble_up_errors(lambda x: x.status, test_suite_list)
 
@@ -268,20 +288,33 @@ def parse_test_result(lines: List[str]) -> TestResult:
 	consume_non_diagnositic(lines)
 	if not lines or not parse_tap_header(lines):
 		return TestResult(TestStatus.NO_TESTS, [], lines)
+	expected_test_suite_num = parse_test_plan(lines)
+	if not expected_test_suite_num:
+		return TestResult(TestStatus.FAILURE_TO_PARSE_TESTS, [], lines)
 	test_suites = []
-	test_suite = parse_test_suite(lines)
-	while test_suite:
-		test_suites.append(test_suite)
-		test_suite = parse_test_suite(lines)
-	return TestResult(bubble_up_suite_errors(test_suites), test_suites, lines)
+	for i in range(1, expected_test_suite_num + 1):
+		test_suite = parse_test_suite(lines, i)
+		if test_suite:
+			test_suites.append(test_suite)
+		else:
+			print_with_timestamp(
+				red('[ERROR] ') + ' expected ' +
+				str(expected_test_suite_num) +
+				' test suites, but got ' + str(i - 2))
+			break
+	test_suite = parse_test_suite(lines, -1)
+	if test_suite:
+		print_with_timestamp(red('[ERROR] ') +
+			'got unexpected test suite: ' + test_suite.name)
+	if test_suites:
+		return TestResult(bubble_up_suite_errors(test_suites), test_suites, lines)
+	else:
+		return TestResult(TestStatus.NO_TESTS, [], lines)
 
-def parse_run_tests(kernel_output) -> TestResult:
+def print_and_count_results(test_result: TestResult) -> None:
 	total_tests = 0
 	failed_tests = 0
 	crashed_tests = 0
-	test_result = parse_test_result(list(isolate_kunit_output(kernel_output)))
-	if test_result.status == TestStatus.NO_TESTS:
-		print_with_timestamp(red('[ERROR] ') + 'no kunit output detected')
 	for test_suite in test_result.suites:
 		if test_suite.status == TestStatus.SUCCESS:
 			print_suite_divider(green('[PASSED] ') + test_suite.name)
@@ -303,6 +336,21 @@ def parse_run_tests(kernel_output) -> TestResult:
 				print_with_timestamp(red('[FAILED] ') + test_case.name)
 				print_log(map(yellow, test_case.log))
 				print_with_timestamp('')
+	return total_tests, failed_tests, crashed_tests
+
+def parse_run_tests(kernel_output) -> TestResult:
+	total_tests = 0
+	failed_tests = 0
+	crashed_tests = 0
+	test_result = parse_test_result(list(isolate_kunit_output(kernel_output)))
+	if test_result.status == TestStatus.NO_TESTS:
+		print(red('[ERROR] ') + yellow('no tests run!'))
+	elif test_result.status == TestStatus.FAILURE_TO_PARSE_TESTS:
+		print(red('[ERROR] ') + yellow('could not parse test results!'))
+	else:
+		(total_tests,
+		 failed_tests,
+		 crashed_tests) = print_and_count_results(test_result)
 	print_with_timestamp(DIVIDER)
 	fmt = green if test_result.status == TestStatus.SUCCESS else red
 	print_with_timestamp(
diff --git a/tools/testing/kunit/test_data/test_is_test_passed-all_passed.log b/tools/testing/kunit/test_data/test_is_test_passed-all_passed.log
index 62ebc0288355..bc0dc8fe35b7 100644
--- a/tools/testing/kunit/test_data/test_is_test_passed-all_passed.log
+++ b/tools/testing/kunit/test_data/test_is_test_passed-all_passed.log
@@ -1,4 +1,5 @@
 TAP version 14
+1..2
 	# Subtest: sysctl_test
 	1..8
 	# sysctl_test_dointvec_null_tbl_data: sysctl_test_dointvec_null_tbl_data passed
diff --git a/tools/testing/kunit/test_data/test_is_test_passed-crash.log b/tools/testing/kunit/test_data/test_is_test_passed-crash.log
index 0b249870c8be..4d97f6708c4a 100644
--- a/tools/testing/kunit/test_data/test_is_test_passed-crash.log
+++ b/tools/testing/kunit/test_data/test_is_test_passed-crash.log
@@ -1,6 +1,7 @@
 printk: console [tty0] enabled
 printk: console [mc-1] enabled
 TAP version 14
+1..2
 	# Subtest: sysctl_test
 	1..8
 	# sysctl_test_dointvec_null_tbl_data: sysctl_test_dointvec_null_tbl_data passed
diff --git a/tools/testing/kunit/test_data/test_is_test_passed-failure.log b/tools/testing/kunit/test_data/test_is_test_passed-failure.log
index 9e89d32d5667..7a416497e3be 100644
--- a/tools/testing/kunit/test_data/test_is_test_passed-failure.log
+++ b/tools/testing/kunit/test_data/test_is_test_passed-failure.log
@@ -1,4 +1,5 @@
 TAP version 14
+1..2
 	# Subtest: sysctl_test
 	1..8
 	# sysctl_test_dointvec_null_tbl_data: sysctl_test_dointvec_null_tbl_data passed
-- 
cgit v1.2.3


From 44f3625bc61653ea3bde9960298faf2f5518fda5 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 8 Oct 2020 12:45:17 +0200
Subject: netlink: export policy in extended ACK

Add a new attribute NLMSGERR_ATTR_POLICY to the extended ACK
to advertise the policy, e.g. if an attribute was out of range,
you'll know the range that's permissible.

Add new NL_SET_ERR_MSG_ATTR_POL() and NL_SET_ERR_MSG_ATTR_POL()
macros to set this, since realistically it's only useful to do
this when the bad attribute (offset) is also returned.

Use it in lib/nlattr.c which practically does all the policy
validation.

v2:
 - add and use netlink_policy_dump_attr_size_estimate()
v3:
 - remove redundant break
v4:
 - really remove redundant break ... sorry

Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/netlink.h      | 30 ++++++++++++++--------
 include/net/netlink.h        |  4 +++
 include/uapi/linux/netlink.h |  2 ++
 lib/nlattr.c                 | 35 +++++++++++++------------
 net/netlink/af_netlink.c     |  5 ++++
 net/netlink/policy.c         | 61 ++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 110 insertions(+), 27 deletions(-)

(limited to 'lib')

diff --git a/include/linux/netlink.h b/include/linux/netlink.h
index e3e49f0e5c13..666cd0390699 100644
--- a/include/linux/netlink.h
+++ b/include/linux/netlink.h
@@ -68,12 +68,14 @@ netlink_kernel_create(struct net *net, int unit, struct netlink_kernel_cfg *cfg)
  * @_msg: message string to report - don't access directly, use
  *	%NL_SET_ERR_MSG
  * @bad_attr: attribute with error
+ * @policy: policy for a bad attribute
  * @cookie: cookie data to return to userspace (for success)
  * @cookie_len: actual cookie data length
  */
 struct netlink_ext_ack {
 	const char *_msg;
 	const struct nlattr *bad_attr;
+	const struct nla_policy *policy;
 	u8 cookie[NETLINK_MAX_COOKIE_LEN];
 	u8 cookie_len;
 };
@@ -95,21 +97,29 @@ struct netlink_ext_ack {
 #define NL_SET_ERR_MSG_MOD(extack, msg)			\
 	NL_SET_ERR_MSG((extack), KBUILD_MODNAME ": " msg)
 
-#define NL_SET_BAD_ATTR(extack, attr) do {		\
-	if ((extack))					\
+#define NL_SET_BAD_ATTR_POLICY(extack, attr, pol) do {	\
+	if ((extack)) {					\
 		(extack)->bad_attr = (attr);		\
+		(extack)->policy = (pol);		\
+	}						\
 } while (0)
 
-#define NL_SET_ERR_MSG_ATTR(extack, attr, msg) do {	\
-	static const char __msg[] = msg;		\
-	struct netlink_ext_ack *__extack = (extack);	\
-							\
-	if (__extack) {					\
-		__extack->_msg = __msg;			\
-		__extack->bad_attr = (attr);		\
-	}						\
+#define NL_SET_BAD_ATTR(extack, attr) NL_SET_BAD_ATTR_POLICY(extack, attr, NULL)
+
+#define NL_SET_ERR_MSG_ATTR_POL(extack, attr, pol, msg) do {	\
+	static const char __msg[] = msg;			\
+	struct netlink_ext_ack *__extack = (extack);		\
+								\
+	if (__extack) {						\
+		__extack->_msg = __msg;				\
+		__extack->bad_attr = (attr);			\
+		__extack->policy = (pol);			\
+	}							\
 } while (0)
 
+#define NL_SET_ERR_MSG_ATTR(extack, attr, msg)		\
+	NL_SET_ERR_MSG_ATTR_POL(extack, attr, NULL, msg)
+
 static inline void nl_set_extack_cookie_u64(struct netlink_ext_ack *extack,
 					    u64 cookie)
 {
diff --git a/include/net/netlink.h b/include/net/netlink.h
index 2b9e41075f19..7356f41d23ba 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -1957,6 +1957,10 @@ int netlink_policy_dump_get_policy_idx(struct netlink_policy_dump_state *state,
 bool netlink_policy_dump_loop(struct netlink_policy_dump_state *state);
 int netlink_policy_dump_write(struct sk_buff *skb,
 			      struct netlink_policy_dump_state *state);
+int netlink_policy_dump_attr_size_estimate(const struct nla_policy *pt);
+int netlink_policy_dump_write_attr(struct sk_buff *skb,
+				   const struct nla_policy *pt,
+				   int nestattr);
 void netlink_policy_dump_free(struct netlink_policy_dump_state *state);
 
 #endif
diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h
index d02e472ba54c..c3816ff7bfc3 100644
--- a/include/uapi/linux/netlink.h
+++ b/include/uapi/linux/netlink.h
@@ -129,6 +129,7 @@ struct nlmsgerr {
  * @NLMSGERR_ATTR_COOKIE: arbitrary subsystem specific cookie to
  *	be used - in the success case - to identify a created
  *	object or operation or similar (binary)
+ * @NLMSGERR_ATTR_POLICY: policy for a rejected attribute
  * @__NLMSGERR_ATTR_MAX: number of attributes
  * @NLMSGERR_ATTR_MAX: highest attribute number
  */
@@ -137,6 +138,7 @@ enum nlmsgerr_attrs {
 	NLMSGERR_ATTR_MSG,
 	NLMSGERR_ATTR_OFFS,
 	NLMSGERR_ATTR_COOKIE,
+	NLMSGERR_ATTR_POLICY,
 
 	__NLMSGERR_ATTR_MAX,
 	NLMSGERR_ATTR_MAX = __NLMSGERR_ATTR_MAX - 1
diff --git a/lib/nlattr.c b/lib/nlattr.c
index 9c99f5daa4d2..74019c8ebf6b 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -96,8 +96,8 @@ static int nla_validate_array(const struct nlattr *head, int len, int maxtype,
 			continue;
 
 		if (nla_len(entry) < NLA_HDRLEN) {
-			NL_SET_ERR_MSG_ATTR(extack, entry,
-					    "Array element too short");
+			NL_SET_ERR_MSG_ATTR_POL(extack, entry, policy,
+						"Array element too short");
 			return -ERANGE;
 		}
 
@@ -195,8 +195,8 @@ static int nla_validate_range_unsigned(const struct nla_policy *pt,
 		pr_warn_ratelimited("netlink: '%s': attribute type %d has an invalid length.\n",
 				    current->comm, pt->type);
 		if (validate & NL_VALIDATE_STRICT_ATTRS) {
-			NL_SET_ERR_MSG_ATTR(extack, nla,
-					    "invalid attribute length");
+			NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt,
+						"invalid attribute length");
 			return -EINVAL;
 		}
 
@@ -208,11 +208,11 @@ static int nla_validate_range_unsigned(const struct nla_policy *pt,
 		bool binary = pt->type == NLA_BINARY;
 
 		if (binary)
-			NL_SET_ERR_MSG_ATTR(extack, nla,
-					    "binary attribute size out of range");
+			NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt,
+						"binary attribute size out of range");
 		else
-			NL_SET_ERR_MSG_ATTR(extack, nla,
-					    "integer out of range");
+			NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt,
+						"integer out of range");
 
 		return -ERANGE;
 	}
@@ -291,8 +291,8 @@ static int nla_validate_int_range_signed(const struct nla_policy *pt,
 	nla_get_range_signed(pt, &range);
 
 	if (value < range.min || value > range.max) {
-		NL_SET_ERR_MSG_ATTR(extack, nla,
-				    "integer out of range");
+		NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt,
+					"integer out of range");
 		return -ERANGE;
 	}
 
@@ -377,8 +377,8 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
 		pr_warn_ratelimited("netlink: '%s': attribute type %d has an invalid length.\n",
 				    current->comm, type);
 		if (validate & NL_VALIDATE_STRICT_ATTRS) {
-			NL_SET_ERR_MSG_ATTR(extack, nla,
-					    "invalid attribute length");
+			NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt,
+						"invalid attribute length");
 			return -EINVAL;
 		}
 	}
@@ -386,14 +386,14 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
 	if (validate & NL_VALIDATE_NESTED) {
 		if ((pt->type == NLA_NESTED || pt->type == NLA_NESTED_ARRAY) &&
 		    !(nla->nla_type & NLA_F_NESTED)) {
-			NL_SET_ERR_MSG_ATTR(extack, nla,
-					    "NLA_F_NESTED is missing");
+			NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt,
+						"NLA_F_NESTED is missing");
 			return -EINVAL;
 		}
 		if (pt->type != NLA_NESTED && pt->type != NLA_NESTED_ARRAY &&
 		    pt->type != NLA_UNSPEC && (nla->nla_type & NLA_F_NESTED)) {
-			NL_SET_ERR_MSG_ATTR(extack, nla,
-					    "NLA_F_NESTED not expected");
+			NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt,
+						"NLA_F_NESTED not expected");
 			return -EINVAL;
 		}
 	}
@@ -550,7 +550,8 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
 
 	return 0;
 out_err:
-	NL_SET_ERR_MSG_ATTR(extack, nla, "Attribute failed policy validation");
+	NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt,
+				"Attribute failed policy validation");
 	return err;
 }
 
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index df675a8e1918..daca50d6bb12 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2420,6 +2420,8 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
 		tlvlen += nla_total_size(sizeof(u32));
 	if (nlk_has_extack && extack && extack->cookie_len)
 		tlvlen += nla_total_size(extack->cookie_len);
+	if (err && nlk_has_extack && extack && extack->policy)
+		tlvlen += netlink_policy_dump_attr_size_estimate(extack->policy);
 
 	if (tlvlen)
 		flags |= NLM_F_ACK_TLVS;
@@ -2452,6 +2454,9 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err,
 		if (extack->cookie_len)
 			WARN_ON(nla_put(skb, NLMSGERR_ATTR_COOKIE,
 					extack->cookie_len, extack->cookie));
+		if (extack->policy)
+			netlink_policy_dump_write_attr(skb, extack->policy,
+						       NLMSGERR_ATTR_POLICY);
 	}
 
 	nlmsg_end(skb, rep);
diff --git a/net/netlink/policy.c b/net/netlink/policy.c
index 4383436759e2..8d7c900e27f4 100644
--- a/net/netlink/policy.c
+++ b/net/netlink/policy.c
@@ -196,12 +196,54 @@ bool netlink_policy_dump_loop(struct netlink_policy_dump_state *state)
 	return !netlink_policy_dump_finished(state);
 }
 
+int netlink_policy_dump_attr_size_estimate(const struct nla_policy *pt)
+{
+	/* nested + type */
+	int common = 2 * nla_attr_size(sizeof(u32));
+
+	switch (pt->type) {
+	case NLA_UNSPEC:
+	case NLA_REJECT:
+		/* these actually don't need any space */
+		return 0;
+	case NLA_NESTED:
+	case NLA_NESTED_ARRAY:
+		/* common, policy idx, policy maxattr */
+		return common + 2 * nla_attr_size(sizeof(u32));
+	case NLA_U8:
+	case NLA_U16:
+	case NLA_U32:
+	case NLA_U64:
+	case NLA_MSECS:
+	case NLA_S8:
+	case NLA_S16:
+	case NLA_S32:
+	case NLA_S64:
+		/* maximum is common, u64 min/max with padding */
+		return common +
+		       2 * (nla_attr_size(0) + nla_attr_size(sizeof(u64)));
+	case NLA_BITFIELD32:
+		return common + nla_attr_size(sizeof(u32));
+	case NLA_STRING:
+	case NLA_NUL_STRING:
+	case NLA_BINARY:
+		/* maximum is common, u32 min-length/max-length */
+		return common + 2 * nla_attr_size(sizeof(u32));
+	case NLA_FLAG:
+		return common;
+	}
+
+	/* this should then cause a warning later */
+	return 0;
+}
+
 static int
 __netlink_policy_dump_write_attr(struct netlink_policy_dump_state *state,
 				 struct sk_buff *skb,
 				 const struct nla_policy *pt,
 				 int nestattr)
 {
+	int estimate = netlink_policy_dump_attr_size_estimate(pt);
 	enum netlink_attribute_type type;
 	struct nlattr *attr;
 
@@ -334,12 +376,31 @@ __netlink_policy_dump_write_attr(struct netlink_policy_dump_state *state,
 		goto nla_put_failure;
 
 	nla_nest_end(skb, attr);
+	WARN_ON(attr->nla_len > estimate);
+
 	return 0;
 nla_put_failure:
 	nla_nest_cancel(skb, attr);
 	return -ENOBUFS;
 }
 
+/**
+ * netlink_policy_dump_write_attr - write a given attribute policy
+ * @skb: the message skb to write to
+ * @pt: the attribute's policy
+ * @nestattr: the nested attribute ID to use
+ *
+ * Returns: 0 on success, an error code otherwise; -%ENODATA is
+ *	    special, indicating that there's no policy data and
+ *	    the attribute is generally rejected.
+ */
+int netlink_policy_dump_write_attr(struct sk_buff *skb,
+				   const struct nla_policy *pt,
+				   int nestattr)
+{
+	return __netlink_policy_dump_write_attr(NULL, skb, pt, nestattr);
+}
+
 /**
  * netlink_policy_dump_write - write current policy dump attributes
  * @skb: the message skb to write to
-- 
cgit v1.2.3


From f82cd2f0b5eb715b1a296e20b34da7d296b6e9a4 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Tue, 18 Aug 2020 09:05:56 -0400
Subject: XArray: Add private interface for workingset node deletion

Move the tricky bits of dealing with the XArray from the workingset
code to the XArray.  Make it clear in the documentation that this is a
private interface, and only export it for the benefit of the test suite.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 include/linux/xarray.h |  2 ++
 lib/test_xarray.c      |  7 +------
 lib/xarray.c           | 23 +++++++++++++++++++++++
 mm/workingset.c        | 13 ++-----------
 4 files changed, 28 insertions(+), 17 deletions(-)

(limited to 'lib')

diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 6b336098fca7..29db4e16eb89 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -1286,6 +1286,8 @@ static inline bool xa_is_advanced(const void *entry)
  */
 typedef void (*xa_update_node_t)(struct xa_node *node);
 
+void xa_delete_node(struct xa_node *, xa_update_node_t);
+
 /*
  * The xa_state is opaque to its users.  It contains various different pieces
  * of state involved in the current operation on the XArray.  It should be
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index 1122c4453c87..64ae07b1bcf4 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -1600,14 +1600,9 @@ static noinline void shadow_remove(struct xarray *xa)
 	xa_lock(xa);
 	while ((node = list_first_entry_or_null(&shadow_nodes,
 					struct xa_node, private_list))) {
-		XA_STATE(xas, node->array, 0);
 		XA_BUG_ON(xa, node->array != xa);
 		list_del_init(&node->private_list);
-		xas.xa_node = xa_parent_locked(node->array, node);
-		xas.xa_offset = node->offset;
-		xas.xa_shift = node->shift + XA_CHUNK_SHIFT;
-		xas_set_update(&xas, test_update_node);
-		xas_store(&xas, NULL);
+		xa_delete_node(node, test_update_node);
 	}
 	xa_unlock(xa);
 }
diff --git a/lib/xarray.c b/lib/xarray.c
index e9e641d3c0c3..1fa5c5658e63 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -1973,6 +1973,29 @@ unsigned int xa_extract(struct xarray *xa, void **dst, unsigned long start,
 }
 EXPORT_SYMBOL(xa_extract);
 
+/**
+ * xa_delete_node() - Private interface for workingset code.
+ * @node: Node to be removed from the tree.
+ * @update: Function to call to update ancestor nodes.
+ *
+ * Context: xa_lock must be held on entry and will not be released.
+ */
+void xa_delete_node(struct xa_node *node, xa_update_node_t update)
+{
+	struct xa_state xas = {
+		.xa = node->array,
+		.xa_index = (unsigned long)node->offset <<
+				(node->shift + XA_CHUNK_SHIFT),
+		.xa_shift = node->shift + XA_CHUNK_SHIFT,
+		.xa_offset = node->offset,
+		.xa_node = xa_parent_locked(node->array, node),
+		.xa_update = update,
+	};
+
+	xas_store(&xas, NULL);
+}
+EXPORT_SYMBOL_GPL(xa_delete_node);	/* For the benefit of the test suite */
+
 /**
  * xa_destroy() - Free all internal data structures.
  * @xa: XArray.
diff --git a/mm/workingset.c b/mm/workingset.c
index 92e66113a577..e185bfb8bd4e 100644
--- a/mm/workingset.c
+++ b/mm/workingset.c
@@ -519,12 +519,11 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
 					  void *arg) __must_hold(lru_lock)
 {
 	struct xa_node *node = container_of(item, struct xa_node, private_list);
-	XA_STATE(xas, node->array, 0);
 	struct address_space *mapping;
 	int ret;
 
 	/*
-	 * Page cache insertions and deletions synchroneously maintain
+	 * Page cache insertions and deletions synchronously maintain
 	 * the shadow node LRU under the i_pages lock and the
 	 * lru_lock.  Because the page cache tree is emptied before
 	 * the inode can be destroyed, holding the lru_lock pins any
@@ -559,15 +558,7 @@ static enum lru_status shadow_lru_isolate(struct list_head *item,
 	if (WARN_ON_ONCE(node->count != node->nr_values))
 		goto out_invalid;
 	mapping->nrexceptional -= node->nr_values;
-	xas.xa_node = xa_parent_locked(&mapping->i_pages, node);
-	xas.xa_offset = node->offset;
-	xas.xa_shift = node->shift + XA_CHUNK_SHIFT;
-	xas_set_update(&xas, workingset_update_node);
-	/*
-	 * We could store a shadow entry here which was the minimum of the
-	 * shadow entries we were tracking ...
-	 */
-	xas_store(&xas, NULL);
+	xa_delete_node(node, workingset_update_node);
 	__inc_lruvec_slab_state(node, WORKINGSET_NODERECLAIM);
 
 out_invalid:
-- 
cgit v1.2.3


From 84c34df158cf215b0cd1475ab3b8e6f212f81f23 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Tue, 13 Oct 2020 08:46:29 -0400
Subject: XArray: Fix xas_create_range for ranges above 4 billion

The 'sibs' variable would be shifted as a 32-bit integer, so if 'shift'
is more than 32, this is undefined behaviour.  In practice, this doesn't
happen because the page cache is the only user and nobody uses 16TB pages.

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
---
 lib/xarray.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/xarray.c b/lib/xarray.c
index 1fa5c5658e63..2046d676ab41 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -703,7 +703,7 @@ void xas_create_range(struct xa_state *xas)
 	unsigned char shift = xas->xa_shift;
 	unsigned char sibs = xas->xa_sibs;
 
-	xas->xa_index |= ((sibs + 1) << shift) - 1;
+	xas->xa_index |= ((sibs + 1UL) << shift) - 1;
 	if (xas_is_node(xas) && xas->xa_node->shift == xas->xa_shift)
 		xas->xa_offset |= sibs;
 	xas->xa_shift = 0;
-- 
cgit v1.2.3


From d2585f5164c298aaaed14c2c8d313cbe7bd5b253 Mon Sep 17 00:00:00 2001
From: Vitor Massaru Iha <vitor@massaru.org>
Date: Wed, 29 Jul 2020 14:58:49 -0300
Subject: lib: kunit: add bitfield test conversion to KUnit

This adds the conversion of the runtime tests of test_bitfield,
from `lib/test_bitfield.c` to KUnit tests.

Code Style Documentation: [0]

Signed-off-by: Vitor Massaru Iha <vitor@massaru.org>
Link: [0] https://lore.kernel.org/linux-kselftest/20200620054944.167330-1-davidgow@google.com/T/#u
Reviewed-by: Brendan Higgins <brendanhiggins@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 lib/Kconfig.debug    |  23 ++++---
 lib/Makefile         |   2 +-
 lib/bitfield_kunit.c | 156 +++++++++++++++++++++++++++++++++++++++++++++++
 lib/test_bitfield.c  | 168 ---------------------------------------------------
 4 files changed, 173 insertions(+), 176 deletions(-)
 create mode 100644 lib/bitfield_kunit.c
 delete mode 100644 lib/test_bitfield.c

(limited to 'lib')

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index e068c3c7189a..4f09c6505a2e 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2037,13 +2037,6 @@ config TEST_BITMAP
 
 	  If unsure, say N.
 
-config TEST_BITFIELD
-	tristate "Test bitfield functions at runtime"
-	help
-	  Enable this option to test the bitfield functions at boot.
-
-	  If unsure, say N.
-
 config TEST_UUID
 	tristate "Test functions located in the uuid module at runtime"
 
@@ -2193,6 +2186,22 @@ config TEST_SYSCTL
 
 	  If unsure, say N.
 
+config BITFIELD_KUNIT
+	tristate "KUnit test bitfield functions at runtime"
+	depends on KUNIT
+	help
+	  Enable this option to test the bitfield functions at boot.
+
+	  KUnit tests run during boot and output the results to the debug log
+	  in TAP format (http://testanything.org/). Only useful for kernel devs
+	  running the KUnit test harness, and not intended for inclusion into a
+	  production build.
+
+	  For more information on KUnit and unit tests in general please refer
+	  to the KUnit documentation in Documentation/dev-tools/kunit/.
+
+	  If unsure, say N.
+
 config SYSCTL_KUNIT_TEST
 	tristate "KUnit test for sysctl" if !KUNIT_ALL_TESTS
 	depends on KUNIT
diff --git a/lib/Makefile b/lib/Makefile
index e290fc5707ea..d862d41fdc3d 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -80,7 +80,6 @@ obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o
 obj-$(CONFIG_TEST_PRINTF) += test_printf.o
 obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o
 obj-$(CONFIG_TEST_STRSCPY) += test_strscpy.o
-obj-$(CONFIG_TEST_BITFIELD) += test_bitfield.o
 obj-$(CONFIG_TEST_UUID) += test_uuid.o
 obj-$(CONFIG_TEST_XARRAY) += test_xarray.o
 obj-$(CONFIG_TEST_PARMAN) += test_parman.o
@@ -340,6 +339,7 @@ obj-$(CONFIG_OBJAGG) += objagg.o
 obj-$(CONFIG_PLDMFW) += pldmfw/
 
 # KUnit tests
+obj-$(CONFIG_BITFIELD_KUNIT) += bitfield_kunit.o
 obj-$(CONFIG_LIST_KUNIT_TEST) += list-test.o
 obj-$(CONFIG_LINEAR_RANGES_TEST) += test_linear_ranges.o
 obj-$(CONFIG_BITS_TEST) += test_bits.o
diff --git a/lib/bitfield_kunit.c b/lib/bitfield_kunit.c
new file mode 100644
index 000000000000..d63a2be5aff8
--- /dev/null
+++ b/lib/bitfield_kunit.c
@@ -0,0 +1,156 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * Test cases for bitfield helpers.
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#include <kunit/test.h>
+#include <linux/bitfield.h>
+
+#define CHECK_ENC_GET_U(tp, v, field, res) do {				\
+		{							\
+			u##tp _res;					\
+									\
+			_res = u##tp##_encode_bits(v, field);		\
+			KUNIT_ASSERT_FALSE_MSG(context, _res != res,	\
+				       "u" #tp "_encode_bits(" #v ", " #field ") is 0x%llx != " #res "\n",	\
+				       (u64)_res);			\
+			KUNIT_ASSERT_FALSE(context,			\
+				   u##tp##_get_bits(_res, field) != v);	\
+		}							\
+	} while (0)
+
+#define CHECK_ENC_GET_LE(tp, v, field, res) do {			\
+		{							\
+			__le##tp _res;					\
+									\
+			_res = le##tp##_encode_bits(v, field);		\
+			KUNIT_ASSERT_FALSE_MSG(context,			\
+				       _res != cpu_to_le##tp(res),	\
+				       "le" #tp "_encode_bits(" #v ", " #field ") is 0x%llx != 0x%llx",\
+				       (u64)le##tp##_to_cpu(_res),	\
+				       (u64)(res));			\
+			KUNIT_ASSERT_FALSE(context,			\
+				   le##tp##_get_bits(_res, field) != v);\
+		}							\
+	} while (0)
+
+#define CHECK_ENC_GET_BE(tp, v, field, res) do {			\
+		{							\
+			__be##tp _res;					\
+									\
+			_res = be##tp##_encode_bits(v, field);		\
+			KUNIT_ASSERT_FALSE_MSG(context,			\
+				       _res != cpu_to_be##tp(res),	\
+				       "be" #tp "_encode_bits(" #v ", " #field ") is 0x%llx != 0x%llx",	\
+				       (u64)be##tp##_to_cpu(_res),	\
+				       (u64)(res));			\
+			KUNIT_ASSERT_FALSE(context,			\
+				   be##tp##_get_bits(_res, field) != v);\
+		}							\
+	} while (0)
+
+#define CHECK_ENC_GET(tp, v, field, res) do {				\
+		CHECK_ENC_GET_U(tp, v, field, res);			\
+		CHECK_ENC_GET_LE(tp, v, field, res);			\
+		CHECK_ENC_GET_BE(tp, v, field, res);			\
+	} while (0)
+
+static void __init test_bitfields_constants(struct kunit *context)
+{
+	/*
+	 * NOTE
+	 * This whole function compiles (or at least should, if everything
+	 * is going according to plan) to nothing after optimisation.
+	 */
+
+	CHECK_ENC_GET(16,  1, 0x000f, 0x0001);
+	CHECK_ENC_GET(16,  3, 0x00f0, 0x0030);
+	CHECK_ENC_GET(16,  5, 0x0f00, 0x0500);
+	CHECK_ENC_GET(16,  7, 0xf000, 0x7000);
+	CHECK_ENC_GET(16, 14, 0x000f, 0x000e);
+	CHECK_ENC_GET(16, 15, 0x00f0, 0x00f0);
+
+	CHECK_ENC_GET_U(8,  1, 0x0f, 0x01);
+	CHECK_ENC_GET_U(8,  3, 0xf0, 0x30);
+	CHECK_ENC_GET_U(8, 14, 0x0f, 0x0e);
+	CHECK_ENC_GET_U(8, 15, 0xf0, 0xf0);
+
+	CHECK_ENC_GET(32,  1, 0x00000f00, 0x00000100);
+	CHECK_ENC_GET(32,  3, 0x0000f000, 0x00003000);
+	CHECK_ENC_GET(32,  5, 0x000f0000, 0x00050000);
+	CHECK_ENC_GET(32,  7, 0x00f00000, 0x00700000);
+	CHECK_ENC_GET(32, 14, 0x0f000000, 0x0e000000);
+	CHECK_ENC_GET(32, 15, 0xf0000000, 0xf0000000);
+
+	CHECK_ENC_GET(64,  1, 0x00000f0000000000ull, 0x0000010000000000ull);
+	CHECK_ENC_GET(64,  3, 0x0000f00000000000ull, 0x0000300000000000ull);
+	CHECK_ENC_GET(64,  5, 0x000f000000000000ull, 0x0005000000000000ull);
+	CHECK_ENC_GET(64,  7, 0x00f0000000000000ull, 0x0070000000000000ull);
+	CHECK_ENC_GET(64, 14, 0x0f00000000000000ull, 0x0e00000000000000ull);
+	CHECK_ENC_GET(64, 15, 0xf000000000000000ull, 0xf000000000000000ull);
+}
+
+#define CHECK(tp, mask) do {						\
+		u64 v;							\
+									\
+		for (v = 0; v < 1 << hweight32(mask); v++)		\
+			KUNIT_ASSERT_FALSE(context,			\
+				tp##_encode_bits(v, mask) != v << __ffs64(mask));\
+	} while (0)
+
+static void __init test_bitfields_variables(struct kunit *context)
+{
+	CHECK(u8, 0x0f);
+	CHECK(u8, 0xf0);
+	CHECK(u8, 0x38);
+
+	CHECK(u16, 0x0038);
+	CHECK(u16, 0x0380);
+	CHECK(u16, 0x3800);
+	CHECK(u16, 0x8000);
+
+	CHECK(u32, 0x80000000);
+	CHECK(u32, 0x7f000000);
+	CHECK(u32, 0x07e00000);
+	CHECK(u32, 0x00018000);
+
+	CHECK(u64, 0x8000000000000000ull);
+	CHECK(u64, 0x7f00000000000000ull);
+	CHECK(u64, 0x0001800000000000ull);
+	CHECK(u64, 0x0000000080000000ull);
+	CHECK(u64, 0x000000007f000000ull);
+	CHECK(u64, 0x0000000018000000ull);
+	CHECK(u64, 0x0000001f8000000ull);
+}
+
+
+static void __init test_bitfields_compile(struct kunit *context)
+{
+	/* these should fail compilation */
+	CHECK_ENC_GET(16, 16, 0x0f00, 0x1000);
+	u32_encode_bits(7, 0x06000000);
+
+	/* this should at least give a warning */
+	u16_encode_bits(0, 0x60000);
+}
+
+static struct kunit_case __refdata bitfields_test_cases[] = {
+	KUNIT_CASE(test_bitfields_constants),
+	KUNIT_CASE(test_bitfields_variables),
+#ifdef TEST_BITFIELD_COMPILE
+	KUNIT_CASE(test_bitfields_compile),
+#endif
+	{}
+};
+
+static struct kunit_suite bitfields_test_suite = {
+	.name = "bitfields",
+	.test_cases = bitfields_test_cases,
+};
+
+kunit_test_suites(&bitfields_test_suite);
+
+MODULE_AUTHOR("Johannes Berg <johannes@sipsolutions.net>");
+MODULE_LICENSE("GPL");
diff --git a/lib/test_bitfield.c b/lib/test_bitfield.c
deleted file mode 100644
index 5b8f4108662d..000000000000
--- a/lib/test_bitfield.c
+++ /dev/null
@@ -1,168 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0+
-/*
- * Test cases for bitfield helpers.
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/kernel.h>
-#include <linux/module.h>
-#include <linux/bitfield.h>
-
-#define CHECK_ENC_GET_U(tp, v, field, res) do {				\
-		{							\
-			u##tp _res;					\
-									\
-			_res = u##tp##_encode_bits(v, field);		\
-			if (_res != res) {				\
-				pr_warn("u" #tp "_encode_bits(" #v ", " #field ") is 0x%llx != " #res "\n",\
-					(u64)_res);			\
-				return -EINVAL;				\
-			}						\
-			if (u##tp##_get_bits(_res, field) != v)		\
-				return -EINVAL;				\
-		}							\
-	} while (0)
-
-#define CHECK_ENC_GET_LE(tp, v, field, res) do {			\
-		{							\
-			__le##tp _res;					\
-									\
-			_res = le##tp##_encode_bits(v, field);		\
-			if (_res != cpu_to_le##tp(res)) {		\
-				pr_warn("le" #tp "_encode_bits(" #v ", " #field ") is 0x%llx != 0x%llx\n",\
-					(u64)le##tp##_to_cpu(_res),	\
-					(u64)(res));			\
-				return -EINVAL;				\
-			}						\
-			if (le##tp##_get_bits(_res, field) != v)	\
-				return -EINVAL;				\
-		}							\
-	} while (0)
-
-#define CHECK_ENC_GET_BE(tp, v, field, res) do {			\
-		{							\
-			__be##tp _res;					\
-									\
-			_res = be##tp##_encode_bits(v, field);		\
-			if (_res != cpu_to_be##tp(res)) {		\
-				pr_warn("be" #tp "_encode_bits(" #v ", " #field ") is 0x%llx != 0x%llx\n",\
-					(u64)be##tp##_to_cpu(_res),	\
-					(u64)(res));			\
-				return -EINVAL;				\
-			}						\
-			if (be##tp##_get_bits(_res, field) != v)	\
-				return -EINVAL;				\
-		}							\
-	} while (0)
-
-#define CHECK_ENC_GET(tp, v, field, res) do {				\
-		CHECK_ENC_GET_U(tp, v, field, res);			\
-		CHECK_ENC_GET_LE(tp, v, field, res);			\
-		CHECK_ENC_GET_BE(tp, v, field, res);			\
-	} while (0)
-
-static int test_constants(void)
-{
-	/*
-	 * NOTE
-	 * This whole function compiles (or at least should, if everything
-	 * is going according to plan) to nothing after optimisation.
-	 */
-
-	CHECK_ENC_GET(16,  1, 0x000f, 0x0001);
-	CHECK_ENC_GET(16,  3, 0x00f0, 0x0030);
-	CHECK_ENC_GET(16,  5, 0x0f00, 0x0500);
-	CHECK_ENC_GET(16,  7, 0xf000, 0x7000);
-	CHECK_ENC_GET(16, 14, 0x000f, 0x000e);
-	CHECK_ENC_GET(16, 15, 0x00f0, 0x00f0);
-
-	CHECK_ENC_GET_U(8,  1, 0x0f, 0x01);
-	CHECK_ENC_GET_U(8,  3, 0xf0, 0x30);
-	CHECK_ENC_GET_U(8, 14, 0x0f, 0x0e);
-	CHECK_ENC_GET_U(8, 15, 0xf0, 0xf0);
-
-	CHECK_ENC_GET(32,  1, 0x00000f00, 0x00000100);
-	CHECK_ENC_GET(32,  3, 0x0000f000, 0x00003000);
-	CHECK_ENC_GET(32,  5, 0x000f0000, 0x00050000);
-	CHECK_ENC_GET(32,  7, 0x00f00000, 0x00700000);
-	CHECK_ENC_GET(32, 14, 0x0f000000, 0x0e000000);
-	CHECK_ENC_GET(32, 15, 0xf0000000, 0xf0000000);
-
-	CHECK_ENC_GET(64,  1, 0x00000f0000000000ull, 0x0000010000000000ull);
-	CHECK_ENC_GET(64,  3, 0x0000f00000000000ull, 0x0000300000000000ull);
-	CHECK_ENC_GET(64,  5, 0x000f000000000000ull, 0x0005000000000000ull);
-	CHECK_ENC_GET(64,  7, 0x00f0000000000000ull, 0x0070000000000000ull);
-	CHECK_ENC_GET(64, 14, 0x0f00000000000000ull, 0x0e00000000000000ull);
-	CHECK_ENC_GET(64, 15, 0xf000000000000000ull, 0xf000000000000000ull);
-
-	return 0;
-}
-
-#define CHECK(tp, mask) do {						\
-		u64 v;							\
-									\
-		for (v = 0; v < 1 << hweight32(mask); v++)		\
-			if (tp##_encode_bits(v, mask) != v << __ffs64(mask)) \
-				return -EINVAL;				\
-	} while (0)
-
-static int test_variables(void)
-{
-	CHECK(u8, 0x0f);
-	CHECK(u8, 0xf0);
-	CHECK(u8, 0x38);
-
-	CHECK(u16, 0x0038);
-	CHECK(u16, 0x0380);
-	CHECK(u16, 0x3800);
-	CHECK(u16, 0x8000);
-
-	CHECK(u32, 0x80000000);
-	CHECK(u32, 0x7f000000);
-	CHECK(u32, 0x07e00000);
-	CHECK(u32, 0x00018000);
-
-	CHECK(u64, 0x8000000000000000ull);
-	CHECK(u64, 0x7f00000000000000ull);
-	CHECK(u64, 0x0001800000000000ull);
-	CHECK(u64, 0x0000000080000000ull);
-	CHECK(u64, 0x000000007f000000ull);
-	CHECK(u64, 0x0000000018000000ull);
-	CHECK(u64, 0x0000001f8000000ull);
-
-	return 0;
-}
-
-static int __init test_bitfields(void)
-{
-	int ret = test_constants();
-
-	if (ret) {
-		pr_warn("constant tests failed!\n");
-		return ret;
-	}
-
-	ret = test_variables();
-	if (ret) {
-		pr_warn("variable tests failed!\n");
-		return ret;
-	}
-
-#ifdef TEST_BITFIELD_COMPILE
-	/* these should fail compilation */
-	CHECK_ENC_GET(16, 16, 0x0f00, 0x1000);
-	u32_encode_bits(7, 0x06000000);
-
-	/* this should at least give a warning */
-	u16_encode_bits(0, 0x60000);
-#endif
-
-	pr_info("tests passed\n");
-
-	return 0;
-}
-module_init(test_bitfields)
-
-MODULE_AUTHOR("Johannes Berg <johannes@sipsolutions.net>");
-MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From 527f6750d92beb9c787d8aba48477b1e834d64e5 Mon Sep 17 00:00:00 2001
From: Marco Elver <elver@google.com>
Date: Tue, 13 Oct 2020 16:47:51 -0700
Subject: kasan: remove mentions of unsupported Clang versions

Since the kernel now requires at least Clang 10.0.1, remove any mention of
old Clang versions and simplify the documentation.

Signed-off-by: Marco Elver <elver@google.com>
Signed-off-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Andrey Konovalov <andreyknvl@google.com>
Reviewed-by: Kees Cook <keescook@chromium.org>
Reviewed-by: Nathan Chancellor <natechancellor@gmail.com>
Cc: Fangrui Song <maskray@google.com>
Cc: Miguel Ojeda <miguel.ojeda.sandonis@gmail.com>
Cc: Sedat Dilek <sedat.dilek@gmail.com>
Cc: Alexei Starovoitov <ast@kernel.org>
Cc: Daniel Borkmann <daniel@iogearbox.net>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Vincenzo Frascino <vincenzo.frascino@arm.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lkml.kernel.org/r/20200902225911.209899-7-ndesaulniers@google.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/dev-tools/kasan.rst | 4 ++--
 lib/Kconfig.kasan                 | 9 ++++-----
 2 files changed, 6 insertions(+), 7 deletions(-)

(limited to 'lib')

diff --git a/Documentation/dev-tools/kasan.rst b/Documentation/dev-tools/kasan.rst
index 38fd5681fade..4abc84b1798c 100644
--- a/Documentation/dev-tools/kasan.rst
+++ b/Documentation/dev-tools/kasan.rst
@@ -13,10 +13,10 @@ KASAN uses compile-time instrumentation to insert validity checks before every
 memory access, and therefore requires a compiler version that supports that.
 
 Generic KASAN is supported in both GCC and Clang. With GCC it requires version
-8.3.0 or later. With Clang it requires version 7.0.0 or later, but detection of
+8.3.0 or later. Any supported Clang version is compatible, but detection of
 out-of-bounds accesses for global variables is only supported since Clang 11.
 
-Tag-based KASAN is only supported in Clang and requires version 7.0.0 or later.
+Tag-based KASAN is only supported in Clang.
 
 Currently generic KASAN is supported for the x86_64, arm64, xtensa, s390 and
 riscv architectures, and tag-based KASAN is supported only for arm64.
diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan
index 047b53dbfd58..033a5bc67ac4 100644
--- a/lib/Kconfig.kasan
+++ b/lib/Kconfig.kasan
@@ -54,9 +54,9 @@ config KASAN_GENERIC
 	  Enables generic KASAN mode.
 
 	  This mode is supported in both GCC and Clang. With GCC it requires
-	  version 8.3.0 or later. With Clang it requires version 7.0.0 or
-	  later, but detection of out-of-bounds accesses for global variables
-	  is supported only since Clang 11.
+	  version 8.3.0 or later. Any supported Clang version is compatible,
+	  but detection of out-of-bounds accesses for global variables is
+	  supported only since Clang 11.
 
 	  This mode consumes about 1/8th of available memory at kernel start
 	  and introduces an overhead of ~x1.5 for the rest of the allocations.
@@ -78,8 +78,7 @@ config KASAN_SW_TAGS
 	  Enables software tag-based KASAN mode.
 
 	  This mode requires Top Byte Ignore support by the CPU and therefore
-	  is only supported for arm64. This mode requires Clang version 7.0.0
-	  or later.
+	  is only supported for arm64. This mode requires Clang.
 
 	  This mode consumes about 1/16th of available memory at kernel start
 	  and introduces an overhead of ~20% for the rest of the allocations.
-- 
cgit v1.2.3


From a4574f63edc6f76fb46dcd65d3eb4d5a8e23ba38 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 13 Oct 2020 16:50:29 -0700
Subject: mm/memremap_pages: convert to 'struct range'
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The 'struct resource' in 'struct dev_pagemap' is only used for holding
resource span information.  The other fields, 'name', 'flags', 'desc',
'parent', 'sibling', and 'child' are all unused wasted space.

This is in preparation for introducing a multi-range extension of
devm_memremap_pages().

The bulk of this change is unwinding all the places internal to libnvdimm
that used 'struct resource' unnecessarily, and replacing instances of
'struct dev_pagemap'.res with 'struct dev_pagemap'.range.

P2PDMA had a minor usage of the resource flags field, but only to report
failures with "%pR".  That is replaced with an open coded print of the
range.

[dan.carpenter@oracle.com: mm/hmm/test: use after free in dmirror_allocate_chunk()]
  Link: https://lkml.kernel.org/r/20200926121402.GA7467@kadam

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Boris Ostrovsky <boris.ostrovsky@oracle.com>	[xen]
Cc: Paul Mackerras <paulus@ozlabs.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Ben Skeggs <bskeggs@redhat.com>
Cc: David Airlie <airlied@linux.ie>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Ira Weiny <ira.weiny@intel.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Stefano Stabellini <sstabellini@kernel.org>
Cc: "Jérôme Glisse" <jglisse@redhat.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brice Goglin <Brice.Goglin@inria.fr>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Hulk Robot <hulkci@huawei.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jason Gunthorpe <jgg@mellanox.com>
Cc: Jason Yan <yanaijie@huawei.com>
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: Jia He <justin.he@arm.com>
Cc: Joao Martins <joao.m.martins@oracle.com>
Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: kernel test robot <lkp@intel.com>
Cc: Mike Rapoport <rppt@linux.ibm.com>
Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Wei Yang <richard.weiyang@linux.alibaba.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lkml.kernel.org/r/159643103173.4062302.768998885691711532.stgit@dwillia2-desk3.amr.corp.intel.com
Link: https://lkml.kernel.org/r/160106115761.30709.13539840236873663620.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/kvm/book3s_hv_uvmem.c     | 13 +++---
 drivers/dax/bus.c                      | 10 ++---
 drivers/dax/bus.h                      |  2 +-
 drivers/dax/dax-private.h              |  5 ---
 drivers/dax/device.c                   |  3 +-
 drivers/dax/hmem/hmem.c                |  5 ++-
 drivers/dax/pmem/core.c                | 12 +++---
 drivers/gpu/drm/nouveau/nouveau_dmem.c | 14 +++----
 drivers/nvdimm/badrange.c              | 26 ++++++------
 drivers/nvdimm/claim.c                 | 13 +++---
 drivers/nvdimm/nd.h                    |  3 +-
 drivers/nvdimm/pfn_devs.c              | 12 +++---
 drivers/nvdimm/pmem.c                  | 26 ++++++------
 drivers/nvdimm/region.c                | 21 ++++++----
 drivers/pci/p2pdma.c                   | 11 +++--
 drivers/xen/unpopulated-alloc.c        | 44 ++++++++++++-------
 include/linux/memremap.h               |  5 ++-
 include/linux/range.h                  |  6 +++
 lib/test_hmm.c                         | 50 +++++++++++-----------
 mm/memremap.c                          | 77 +++++++++++++++++-----------------
 tools/testing/nvdimm/test/iomap.c      |  2 +-
 21 files changed, 195 insertions(+), 165 deletions(-)

(limited to 'lib')

diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c
index 7705d5557239..29ec555055c2 100644
--- a/arch/powerpc/kvm/book3s_hv_uvmem.c
+++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
@@ -687,9 +687,9 @@ static struct page *kvmppc_uvmem_get_page(unsigned long gpa, struct kvm *kvm)
 	struct kvmppc_uvmem_page_pvt *pvt;
 	unsigned long pfn_last, pfn_first;
 
-	pfn_first = kvmppc_uvmem_pgmap.res.start >> PAGE_SHIFT;
+	pfn_first = kvmppc_uvmem_pgmap.range.start >> PAGE_SHIFT;
 	pfn_last = pfn_first +
-		   (resource_size(&kvmppc_uvmem_pgmap.res) >> PAGE_SHIFT);
+		   (range_len(&kvmppc_uvmem_pgmap.range) >> PAGE_SHIFT);
 
 	spin_lock(&kvmppc_uvmem_bitmap_lock);
 	bit = find_first_zero_bit(kvmppc_uvmem_bitmap,
@@ -1007,7 +1007,7 @@ static vm_fault_t kvmppc_uvmem_migrate_to_ram(struct vm_fault *vmf)
 static void kvmppc_uvmem_page_free(struct page *page)
 {
 	unsigned long pfn = page_to_pfn(page) -
-			(kvmppc_uvmem_pgmap.res.start >> PAGE_SHIFT);
+			(kvmppc_uvmem_pgmap.range.start >> PAGE_SHIFT);
 	struct kvmppc_uvmem_page_pvt *pvt;
 
 	spin_lock(&kvmppc_uvmem_bitmap_lock);
@@ -1170,7 +1170,8 @@ int kvmppc_uvmem_init(void)
 	}
 
 	kvmppc_uvmem_pgmap.type = MEMORY_DEVICE_PRIVATE;
-	kvmppc_uvmem_pgmap.res = *res;
+	kvmppc_uvmem_pgmap.range.start = res->start;
+	kvmppc_uvmem_pgmap.range.end = res->end;
 	kvmppc_uvmem_pgmap.ops = &kvmppc_uvmem_ops;
 	/* just one global instance: */
 	kvmppc_uvmem_pgmap.owner = &kvmppc_uvmem_pgmap;
@@ -1205,7 +1206,7 @@ void kvmppc_uvmem_free(void)
 		return;
 
 	memunmap_pages(&kvmppc_uvmem_pgmap);
-	release_mem_region(kvmppc_uvmem_pgmap.res.start,
-			   resource_size(&kvmppc_uvmem_pgmap.res));
+	release_mem_region(kvmppc_uvmem_pgmap.range.start,
+			   range_len(&kvmppc_uvmem_pgmap.range));
 	kfree(kvmppc_uvmem_bitmap);
 }
diff --git a/drivers/dax/bus.c b/drivers/dax/bus.c
index 53d07f2f1285..00fa73a8dfb4 100644
--- a/drivers/dax/bus.c
+++ b/drivers/dax/bus.c
@@ -515,7 +515,7 @@ static void dax_region_unregister(void *region)
 }
 
 struct dax_region *alloc_dax_region(struct device *parent, int region_id,
-		struct resource *res, int target_node, unsigned int align,
+		struct range *range, int target_node, unsigned int align,
 		unsigned long flags)
 {
 	struct dax_region *dax_region;
@@ -530,8 +530,8 @@ struct dax_region *alloc_dax_region(struct device *parent, int region_id,
 		return NULL;
 	}
 
-	if (!IS_ALIGNED(res->start, align)
-			|| !IS_ALIGNED(resource_size(res), align))
+	if (!IS_ALIGNED(range->start, align)
+			|| !IS_ALIGNED(range_len(range), align))
 		return NULL;
 
 	dax_region = kzalloc(sizeof(*dax_region), GFP_KERNEL);
@@ -546,8 +546,8 @@ struct dax_region *alloc_dax_region(struct device *parent, int region_id,
 	dax_region->target_node = target_node;
 	ida_init(&dax_region->ida);
 	dax_region->res = (struct resource) {
-		.start = res->start,
-		.end = res->end,
+		.start = range->start,
+		.end = range->end,
 		.flags = IORESOURCE_MEM | flags,
 	};
 
diff --git a/drivers/dax/bus.h b/drivers/dax/bus.h
index da27ea70a19a..72b92f95509f 100644
--- a/drivers/dax/bus.h
+++ b/drivers/dax/bus.h
@@ -13,7 +13,7 @@ void dax_region_put(struct dax_region *dax_region);
 
 #define IORESOURCE_DAX_STATIC (1UL << 0)
 struct dax_region *alloc_dax_region(struct device *parent, int region_id,
-		struct resource *res, int target_node, unsigned int align,
+		struct range *range, int target_node, unsigned int align,
 		unsigned long flags);
 
 enum dev_dax_subsys {
diff --git a/drivers/dax/dax-private.h b/drivers/dax/dax-private.h
index b81a1494d82b..0cbb2ec81ca7 100644
--- a/drivers/dax/dax-private.h
+++ b/drivers/dax/dax-private.h
@@ -61,11 +61,6 @@ struct dev_dax {
 	struct range range;
 };
 
-static inline u64 range_len(struct range *range)
-{
-	return range->end - range->start + 1;
-}
-
 static inline struct dev_dax *to_dev_dax(struct device *dev)
 {
 	return container_of(dev, struct dev_dax, dev);
diff --git a/drivers/dax/device.c b/drivers/dax/device.c
index 9833fa83b537..a14448bca83d 100644
--- a/drivers/dax/device.c
+++ b/drivers/dax/device.c
@@ -416,8 +416,7 @@ int dev_dax_probe(struct dev_dax *dev_dax)
 		pgmap = devm_kzalloc(dev, sizeof(*pgmap), GFP_KERNEL);
 		if (!pgmap)
 			return -ENOMEM;
-		pgmap->res.start = range->start;
-		pgmap->res.end = range->end;
+		pgmap->range = *range;
 	}
 	pgmap->type = MEMORY_DEVICE_GENERIC;
 	addr = devm_memremap_pages(dev, pgmap);
diff --git a/drivers/dax/hmem/hmem.c b/drivers/dax/hmem/hmem.c
index aa260009dfc7..1a3347bb6143 100644
--- a/drivers/dax/hmem/hmem.c
+++ b/drivers/dax/hmem/hmem.c
@@ -13,13 +13,16 @@ static int dax_hmem_probe(struct platform_device *pdev)
 	struct dev_dax_data data;
 	struct dev_dax *dev_dax;
 	struct resource *res;
+	struct range range;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!res)
 		return -ENOMEM;
 
 	mri = dev->platform_data;
-	dax_region = alloc_dax_region(dev, pdev->id, res, mri->target_node,
+	range.start = res->start;
+	range.end = res->end;
+	dax_region = alloc_dax_region(dev, pdev->id, &range, mri->target_node,
 			PMD_SIZE, 0);
 	if (!dax_region)
 		return -ENOMEM;
diff --git a/drivers/dax/pmem/core.c b/drivers/dax/pmem/core.c
index 4fe700884338..62b26bfceab1 100644
--- a/drivers/dax/pmem/core.c
+++ b/drivers/dax/pmem/core.c
@@ -9,7 +9,7 @@
 
 struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys)
 {
-	struct resource res;
+	struct range range;
 	int rc, id, region_id;
 	resource_size_t offset;
 	struct nd_pfn_sb *pfn_sb;
@@ -50,10 +50,10 @@ struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys)
 	if (rc != 2)
 		return ERR_PTR(-EINVAL);
 
-	/* adjust the dax_region resource to the start of data */
-	memcpy(&res, &pgmap.res, sizeof(res));
-	res.start += offset;
-	dax_region = alloc_dax_region(dev, region_id, &res,
+	/* adjust the dax_region range to the start of data */
+	range = pgmap.range;
+	range.start += offset,
+	dax_region = alloc_dax_region(dev, region_id, &range,
 			nd_region->target_node, le32_to_cpu(pfn_sb->align),
 			IORESOURCE_DAX_STATIC);
 	if (!dax_region)
@@ -64,7 +64,7 @@ struct dev_dax *__dax_pmem_probe(struct device *dev, enum dev_dax_subsys subsys)
 		.id = id,
 		.pgmap = &pgmap,
 		.subsys = subsys,
-		.size = resource_size(&res),
+		.size = range_len(&range),
 	};
 	dev_dax = devm_create_dev_dax(&data);
 
diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c
index 4e8112fde3e6..25811ed7e274 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
@@ -101,7 +101,7 @@ unsigned long nouveau_dmem_page_addr(struct page *page)
 {
 	struct nouveau_dmem_chunk *chunk = nouveau_page_to_chunk(page);
 	unsigned long off = (page_to_pfn(page) << PAGE_SHIFT) -
-				chunk->pagemap.res.start;
+				chunk->pagemap.range.start;
 
 	return chunk->bo->offset + off;
 }
@@ -249,7 +249,8 @@ nouveau_dmem_chunk_alloc(struct nouveau_drm *drm, struct page **ppage)
 
 	chunk->drm = drm;
 	chunk->pagemap.type = MEMORY_DEVICE_PRIVATE;
-	chunk->pagemap.res = *res;
+	chunk->pagemap.range.start = res->start;
+	chunk->pagemap.range.end = res->end;
 	chunk->pagemap.ops = &nouveau_dmem_pagemap_ops;
 	chunk->pagemap.owner = drm->dev;
 
@@ -273,7 +274,7 @@ nouveau_dmem_chunk_alloc(struct nouveau_drm *drm, struct page **ppage)
 	list_add(&chunk->list, &drm->dmem->chunks);
 	mutex_unlock(&drm->dmem->mutex);
 
-	pfn_first = chunk->pagemap.res.start >> PAGE_SHIFT;
+	pfn_first = chunk->pagemap.range.start >> PAGE_SHIFT;
 	page = pfn_to_page(pfn_first);
 	spin_lock(&drm->dmem->lock);
 	for (i = 0; i < DMEM_CHUNK_NPAGES - 1; ++i, ++page) {
@@ -294,8 +295,7 @@ out_bo_unpin:
 out_bo_free:
 	nouveau_bo_ref(NULL, &chunk->bo);
 out_release:
-	release_mem_region(chunk->pagemap.res.start,
-			   resource_size(&chunk->pagemap.res));
+	release_mem_region(chunk->pagemap.range.start, range_len(&chunk->pagemap.range));
 out_free:
 	kfree(chunk);
 out:
@@ -382,8 +382,8 @@ nouveau_dmem_fini(struct nouveau_drm *drm)
 		nouveau_bo_ref(NULL, &chunk->bo);
 		list_del(&chunk->list);
 		memunmap_pages(&chunk->pagemap);
-		release_mem_region(chunk->pagemap.res.start,
-				   resource_size(&chunk->pagemap.res));
+		release_mem_region(chunk->pagemap.range.start,
+				   range_len(&chunk->pagemap.range));
 		kfree(chunk);
 	}
 
diff --git a/drivers/nvdimm/badrange.c b/drivers/nvdimm/badrange.c
index b9eeefa27e3a..aaf6e215a8c6 100644
--- a/drivers/nvdimm/badrange.c
+++ b/drivers/nvdimm/badrange.c
@@ -211,7 +211,7 @@ static void __add_badblock_range(struct badblocks *bb, u64 ns_offset, u64 len)
 }
 
 static void badblocks_populate(struct badrange *badrange,
-		struct badblocks *bb, const struct resource *res)
+		struct badblocks *bb, const struct range *range)
 {
 	struct badrange_entry *bre;
 
@@ -222,34 +222,34 @@ static void badblocks_populate(struct badrange *badrange,
 		u64 bre_end = bre->start + bre->length - 1;
 
 		/* Discard intervals with no intersection */
-		if (bre_end < res->start)
+		if (bre_end < range->start)
 			continue;
-		if (bre->start >  res->end)
+		if (bre->start > range->end)
 			continue;
 		/* Deal with any overlap after start of the namespace */
-		if (bre->start >= res->start) {
+		if (bre->start >= range->start) {
 			u64 start = bre->start;
 			u64 len;
 
-			if (bre_end <= res->end)
+			if (bre_end <= range->end)
 				len = bre->length;
 			else
-				len = res->start + resource_size(res)
+				len = range->start + range_len(range)
 					- bre->start;
-			__add_badblock_range(bb, start - res->start, len);
+			__add_badblock_range(bb, start - range->start, len);
 			continue;
 		}
 		/*
 		 * Deal with overlap for badrange starting before
 		 * the namespace.
 		 */
-		if (bre->start < res->start) {
+		if (bre->start < range->start) {
 			u64 len;
 
-			if (bre_end < res->end)
-				len = bre->start + bre->length - res->start;
+			if (bre_end < range->end)
+				len = bre->start + bre->length - range->start;
 			else
-				len = resource_size(res);
+				len = range_len(range);
 			__add_badblock_range(bb, 0, len);
 		}
 	}
@@ -267,7 +267,7 @@ static void badblocks_populate(struct badrange *badrange,
  * and add badblocks entries for all matching sub-ranges
  */
 void nvdimm_badblocks_populate(struct nd_region *nd_region,
-		struct badblocks *bb, const struct resource *res)
+		struct badblocks *bb, const struct range *range)
 {
 	struct nvdimm_bus *nvdimm_bus;
 
@@ -279,7 +279,7 @@ void nvdimm_badblocks_populate(struct nd_region *nd_region,
 	nvdimm_bus = walk_to_nvdimm_bus(&nd_region->dev);
 
 	nvdimm_bus_lock(&nvdimm_bus->dev);
-	badblocks_populate(&nvdimm_bus->badrange, bb, res);
+	badblocks_populate(&nvdimm_bus->badrange, bb, range);
 	nvdimm_bus_unlock(&nvdimm_bus->dev);
 }
 EXPORT_SYMBOL_GPL(nvdimm_badblocks_populate);
diff --git a/drivers/nvdimm/claim.c b/drivers/nvdimm/claim.c
index 22d865ba6353..5a7c80053c62 100644
--- a/drivers/nvdimm/claim.c
+++ b/drivers/nvdimm/claim.c
@@ -303,13 +303,16 @@ static int nsio_rw_bytes(struct nd_namespace_common *ndns,
 int devm_nsio_enable(struct device *dev, struct nd_namespace_io *nsio,
 		resource_size_t size)
 {
-	struct resource *res = &nsio->res;
 	struct nd_namespace_common *ndns = &nsio->common;
+	struct range range = {
+		.start = nsio->res.start,
+		.end = nsio->res.end,
+	};
 
 	nsio->size = size;
-	if (!devm_request_mem_region(dev, res->start, size,
+	if (!devm_request_mem_region(dev, range.start, size,
 				dev_name(&ndns->dev))) {
-		dev_warn(dev, "could not reserve region %pR\n", res);
+		dev_warn(dev, "could not reserve region %pR\n", &nsio->res);
 		return -EBUSY;
 	}
 
@@ -317,9 +320,9 @@ int devm_nsio_enable(struct device *dev, struct nd_namespace_io *nsio,
 	if (devm_init_badblocks(dev, &nsio->bb))
 		return -ENOMEM;
 	nvdimm_badblocks_populate(to_nd_region(ndns->dev.parent), &nsio->bb,
-			&nsio->res);
+			&range);
 
-	nsio->addr = devm_memremap(dev, res->start, size, ARCH_MEMREMAP_PMEM);
+	nsio->addr = devm_memremap(dev, range.start, size, ARCH_MEMREMAP_PMEM);
 
 	return PTR_ERR_OR_ZERO(nsio->addr);
 }
diff --git a/drivers/nvdimm/nd.h b/drivers/nvdimm/nd.h
index 72740108ba42..696b55556d4d 100644
--- a/drivers/nvdimm/nd.h
+++ b/drivers/nvdimm/nd.h
@@ -377,8 +377,9 @@ int nvdimm_namespace_detach_btt(struct nd_btt *nd_btt);
 const char *nvdimm_namespace_disk_name(struct nd_namespace_common *ndns,
 		char *name);
 unsigned int pmem_sector_size(struct nd_namespace_common *ndns);
+struct range;
 void nvdimm_badblocks_populate(struct nd_region *nd_region,
-		struct badblocks *bb, const struct resource *res);
+		struct badblocks *bb, const struct range *range);
 int devm_namespace_enable(struct device *dev, struct nd_namespace_common *ndns,
 		resource_size_t size);
 void devm_namespace_disable(struct device *dev,
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index 3e11ef8d3f5b..3c4787b92a6a 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -672,7 +672,7 @@ static unsigned long init_altmap_reserve(resource_size_t base)
 
 static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap)
 {
-	struct resource *res = &pgmap->res;
+	struct range *range = &pgmap->range;
 	struct vmem_altmap *altmap = &pgmap->altmap;
 	struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb;
 	u64 offset = le64_to_cpu(pfn_sb->dataoff);
@@ -689,16 +689,16 @@ static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap)
 		.end_pfn = PHYS_PFN(end),
 	};
 
-	memcpy(res, &nsio->res, sizeof(*res));
-	res->start += start_pad;
-	res->end -= end_trunc;
-
+	*range = (struct range) {
+		.start = nsio->res.start + start_pad,
+		.end = nsio->res.end - end_trunc,
+	};
 	if (nd_pfn->mode == PFN_MODE_RAM) {
 		if (offset < reserve)
 			return -EINVAL;
 		nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns);
 	} else if (nd_pfn->mode == PFN_MODE_PMEM) {
-		nd_pfn->npfns = PHYS_PFN((resource_size(res) - offset));
+		nd_pfn->npfns = PHYS_PFN((range_len(range) - offset));
 		if (le64_to_cpu(nd_pfn->pfn_sb->npfns) > nd_pfn->npfns)
 			dev_info(&nd_pfn->dev,
 					"number of pfns truncated from %lld to %ld\n",
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index c86a0ceaece6..1f394f44838f 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -375,7 +375,7 @@ static int pmem_attach_disk(struct device *dev,
 	struct nd_region *nd_region = to_nd_region(dev->parent);
 	int nid = dev_to_node(dev), fua;
 	struct resource *res = &nsio->res;
-	struct resource bb_res;
+	struct range bb_range;
 	struct nd_pfn *nd_pfn = NULL;
 	struct dax_device *dax_dev;
 	struct nd_pfn_sb *pfn_sb;
@@ -434,24 +434,26 @@ static int pmem_attach_disk(struct device *dev,
 		pfn_sb = nd_pfn->pfn_sb;
 		pmem->data_offset = le64_to_cpu(pfn_sb->dataoff);
 		pmem->pfn_pad = resource_size(res) -
-			resource_size(&pmem->pgmap.res);
+			range_len(&pmem->pgmap.range);
 		pmem->pfn_flags |= PFN_MAP;
-		memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res));
-		bb_res.start += pmem->data_offset;
+		bb_range = pmem->pgmap.range;
+		bb_range.start += pmem->data_offset;
 	} else if (pmem_should_map_pages(dev)) {
-		memcpy(&pmem->pgmap.res, &nsio->res, sizeof(pmem->pgmap.res));
+		pmem->pgmap.range.start = res->start;
+		pmem->pgmap.range.end = res->end;
 		pmem->pgmap.type = MEMORY_DEVICE_FS_DAX;
 		pmem->pgmap.ops = &fsdax_pagemap_ops;
 		addr = devm_memremap_pages(dev, &pmem->pgmap);
 		pmem->pfn_flags |= PFN_MAP;
-		memcpy(&bb_res, &pmem->pgmap.res, sizeof(bb_res));
+		bb_range = pmem->pgmap.range;
 	} else {
 		if (devm_add_action_or_reset(dev, pmem_release_queue,
 					&pmem->pgmap))
 			return -ENOMEM;
 		addr = devm_memremap(dev, pmem->phys_addr,
 				pmem->size, ARCH_MEMREMAP_PMEM);
-		memcpy(&bb_res, &nsio->res, sizeof(bb_res));
+		bb_range.start =  res->start;
+		bb_range.end = res->end;
 	}
 
 	if (IS_ERR(addr))
@@ -480,7 +482,7 @@ static int pmem_attach_disk(struct device *dev,
 			/ 512);
 	if (devm_init_badblocks(dev, &pmem->bb))
 		return -ENOMEM;
-	nvdimm_badblocks_populate(nd_region, &pmem->bb, &bb_res);
+	nvdimm_badblocks_populate(nd_region, &pmem->bb, &bb_range);
 	disk->bb = &pmem->bb;
 
 	if (is_nvdimm_sync(nd_region))
@@ -591,8 +593,8 @@ static void nd_pmem_notify(struct device *dev, enum nvdimm_event event)
 	resource_size_t offset = 0, end_trunc = 0;
 	struct nd_namespace_common *ndns;
 	struct nd_namespace_io *nsio;
-	struct resource res;
 	struct badblocks *bb;
+	struct range range;
 	struct kernfs_node *bb_state;
 
 	if (event != NVDIMM_REVALIDATE_POISON)
@@ -628,9 +630,9 @@ static void nd_pmem_notify(struct device *dev, enum nvdimm_event event)
 		nsio = to_nd_namespace_io(&ndns->dev);
 	}
 
-	res.start = nsio->res.start + offset;
-	res.end = nsio->res.end - end_trunc;
-	nvdimm_badblocks_populate(nd_region, bb, &res);
+	range.start = nsio->res.start + offset;
+	range.end = nsio->res.end - end_trunc;
+	nvdimm_badblocks_populate(nd_region, bb, &range);
 	if (bb_state)
 		sysfs_notify_dirent(bb_state);
 }
diff --git a/drivers/nvdimm/region.c b/drivers/nvdimm/region.c
index 0f6978e72e7c..bfce87ed72ab 100644
--- a/drivers/nvdimm/region.c
+++ b/drivers/nvdimm/region.c
@@ -35,7 +35,10 @@ static int nd_region_probe(struct device *dev)
 		return rc;
 
 	if (is_memory(&nd_region->dev)) {
-		struct resource ndr_res;
+		struct range range = {
+			.start = nd_region->ndr_start,
+			.end = nd_region->ndr_start + nd_region->ndr_size - 1,
+		};
 
 		if (devm_init_badblocks(dev, &nd_region->bb))
 			return -ENODEV;
@@ -44,9 +47,7 @@ static int nd_region_probe(struct device *dev)
 		if (!nd_region->bb_state)
 			dev_warn(&nd_region->dev,
 					"'badblocks' notification disabled\n");
-		ndr_res.start = nd_region->ndr_start;
-		ndr_res.end = nd_region->ndr_start + nd_region->ndr_size - 1;
-		nvdimm_badblocks_populate(nd_region, &nd_region->bb, &ndr_res);
+		nvdimm_badblocks_populate(nd_region, &nd_region->bb, &range);
 	}
 
 	rc = nd_region_register_namespaces(nd_region, &err);
@@ -121,14 +122,16 @@ static void nd_region_notify(struct device *dev, enum nvdimm_event event)
 {
 	if (event == NVDIMM_REVALIDATE_POISON) {
 		struct nd_region *nd_region = to_nd_region(dev);
-		struct resource res;
 
 		if (is_memory(&nd_region->dev)) {
-			res.start = nd_region->ndr_start;
-			res.end = nd_region->ndr_start +
-				nd_region->ndr_size - 1;
+			struct range range = {
+				.start = nd_region->ndr_start,
+				.end = nd_region->ndr_start +
+					nd_region->ndr_size - 1,
+			};
+
 			nvdimm_badblocks_populate(nd_region,
-					&nd_region->bb, &res);
+					&nd_region->bb, &range);
 			if (nd_region->bb_state)
 				sysfs_notify_dirent(nd_region->bb_state);
 		}
diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c
index f357f9a32b3a..256850513813 100644
--- a/drivers/pci/p2pdma.c
+++ b/drivers/pci/p2pdma.c
@@ -185,9 +185,8 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
 		return -ENOMEM;
 
 	pgmap = &p2p_pgmap->pgmap;
-	pgmap->res.start = pci_resource_start(pdev, bar) + offset;
-	pgmap->res.end = pgmap->res.start + size - 1;
-	pgmap->res.flags = pci_resource_flags(pdev, bar);
+	pgmap->range.start = pci_resource_start(pdev, bar) + offset;
+	pgmap->range.end = pgmap->range.start + size - 1;
 	pgmap->type = MEMORY_DEVICE_PCI_P2PDMA;
 
 	p2p_pgmap->provider = pdev;
@@ -202,13 +201,13 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
 
 	error = gen_pool_add_owner(pdev->p2pdma->pool, (unsigned long)addr,
 			pci_bus_address(pdev, bar) + offset,
-			resource_size(&pgmap->res), dev_to_node(&pdev->dev),
+			range_len(&pgmap->range), dev_to_node(&pdev->dev),
 			pgmap->ref);
 	if (error)
 		goto pages_free;
 
-	pci_info(pdev, "added peer-to-peer DMA memory %pR\n",
-		 &pgmap->res);
+	pci_info(pdev, "added peer-to-peer DMA memory %#llx-%#llx\n",
+		 pgmap->range.start, pgmap->range.end);
 
 	return 0;
 
diff --git a/drivers/xen/unpopulated-alloc.c b/drivers/xen/unpopulated-alloc.c
index 3b98dc921426..091b8669eca3 100644
--- a/drivers/xen/unpopulated-alloc.c
+++ b/drivers/xen/unpopulated-alloc.c
@@ -18,27 +18,37 @@ static unsigned int list_count;
 static int fill_list(unsigned int nr_pages)
 {
 	struct dev_pagemap *pgmap;
+	struct resource *res;
 	void *vaddr;
 	unsigned int i, alloc_pages = round_up(nr_pages, PAGES_PER_SECTION);
-	int ret;
+	int ret = -ENOMEM;
+
+	res = kzalloc(sizeof(*res), GFP_KERNEL);
+	if (!res)
+		return -ENOMEM;
 
 	pgmap = kzalloc(sizeof(*pgmap), GFP_KERNEL);
 	if (!pgmap)
-		return -ENOMEM;
+		goto err_pgmap;
 
 	pgmap->type = MEMORY_DEVICE_GENERIC;
-	pgmap->res.name = "Xen scratch";
-	pgmap->res.flags = IORESOURCE_MEM | IORESOURCE_BUSY;
+	res->name = "Xen scratch";
+	res->flags = IORESOURCE_MEM | IORESOURCE_BUSY;
 
-	ret = allocate_resource(&iomem_resource, &pgmap->res,
+	ret = allocate_resource(&iomem_resource, res,
 				alloc_pages * PAGE_SIZE, 0, -1,
 				PAGES_PER_SECTION * PAGE_SIZE, NULL, NULL);
 	if (ret < 0) {
 		pr_err("Cannot allocate new IOMEM resource\n");
-		kfree(pgmap);
-		return ret;
+		goto err_resource;
 	}
 
+	pgmap->range = (struct range) {
+		.start = res->start,
+		.end = res->end,
+	};
+	pgmap->owner = res;
+
 #ifdef CONFIG_XEN_HAVE_PVMMU
         /*
          * memremap will build page tables for the new memory so
@@ -50,14 +60,13 @@ static int fill_list(unsigned int nr_pages)
          * conflict with any devices.
          */
 	if (!xen_feature(XENFEAT_auto_translated_physmap)) {
-		xen_pfn_t pfn = PFN_DOWN(pgmap->res.start);
+		xen_pfn_t pfn = PFN_DOWN(res->start);
 
 		for (i = 0; i < alloc_pages; i++) {
 			if (!set_phys_to_machine(pfn + i, INVALID_P2M_ENTRY)) {
 				pr_warn("set_phys_to_machine() failed, no memory added\n");
-				release_resource(&pgmap->res);
-				kfree(pgmap);
-				return -ENOMEM;
+				ret = -ENOMEM;
+				goto err_memremap;
 			}
                 }
 	}
@@ -66,9 +75,8 @@ static int fill_list(unsigned int nr_pages)
 	vaddr = memremap_pages(pgmap, NUMA_NO_NODE);
 	if (IS_ERR(vaddr)) {
 		pr_err("Cannot remap memory range\n");
-		release_resource(&pgmap->res);
-		kfree(pgmap);
-		return PTR_ERR(vaddr);
+		ret = PTR_ERR(vaddr);
+		goto err_memremap;
 	}
 
 	for (i = 0; i < alloc_pages; i++) {
@@ -80,6 +88,14 @@ static int fill_list(unsigned int nr_pages)
 	}
 
 	return 0;
+
+err_memremap:
+	release_resource(res);
+err_resource:
+	kfree(pgmap);
+err_pgmap:
+	kfree(res);
+	return ret;
 }
 
 /**
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index e5862746751b..d0dd261d87c0 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -1,6 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _LINUX_MEMREMAP_H_
 #define _LINUX_MEMREMAP_H_
+#include <linux/range.h>
 #include <linux/ioport.h>
 #include <linux/percpu-refcount.h>
 
@@ -93,7 +94,7 @@ struct dev_pagemap_ops {
 /**
  * struct dev_pagemap - metadata for ZONE_DEVICE mappings
  * @altmap: pre-allocated/reserved memory for vmemmap allocations
- * @res: physical address range covered by @ref
+ * @range: physical address range covered by @ref
  * @ref: reference count that pins the devm_memremap_pages() mapping
  * @internal_ref: internal reference if @ref is not provided by the caller
  * @done: completion for @internal_ref
@@ -106,7 +107,7 @@ struct dev_pagemap_ops {
  */
 struct dev_pagemap {
 	struct vmem_altmap altmap;
-	struct resource res;
+	struct range range;
 	struct percpu_ref *ref;
 	struct percpu_ref internal_ref;
 	struct completion done;
diff --git a/include/linux/range.h b/include/linux/range.h
index d1fbeb664012..274681cc3154 100644
--- a/include/linux/range.h
+++ b/include/linux/range.h
@@ -1,12 +1,18 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 #ifndef _LINUX_RANGE_H
 #define _LINUX_RANGE_H
+#include <linux/types.h>
 
 struct range {
 	u64   start;
 	u64   end;
 };
 
+static inline u64 range_len(const struct range *range)
+{
+	return range->end - range->start + 1;
+}
+
 int add_range(struct range *range, int az, int nr_range,
 		u64 start, u64 end);
 
diff --git a/lib/test_hmm.c b/lib/test_hmm.c
index e7dc3de355b7..e97ca8ec0bce 100644
--- a/lib/test_hmm.c
+++ b/lib/test_hmm.c
@@ -460,6 +460,21 @@ static bool dmirror_allocate_chunk(struct dmirror_device *mdevice,
 	unsigned long pfn_last;
 	void *ptr;
 
+	devmem = kzalloc(sizeof(*devmem), GFP_KERNEL);
+	if (!devmem)
+		return -ENOMEM;
+
+	res = request_free_mem_region(&iomem_resource, DEVMEM_CHUNK_SIZE,
+				      "hmm_dmirror");
+	if (IS_ERR(res))
+		goto err_devmem;
+
+	devmem->pagemap.type = MEMORY_DEVICE_PRIVATE;
+	devmem->pagemap.range.start = res->start;
+	devmem->pagemap.range.end = res->end;
+	devmem->pagemap.ops = &dmirror_devmem_ops;
+	devmem->pagemap.owner = mdevice;
+
 	mutex_lock(&mdevice->devmem_lock);
 
 	if (mdevice->devmem_count == mdevice->devmem_capacity) {
@@ -472,33 +487,18 @@ static bool dmirror_allocate_chunk(struct dmirror_device *mdevice,
 				sizeof(new_chunks[0]) * new_capacity,
 				GFP_KERNEL);
 		if (!new_chunks)
-			goto err;
+			goto err_release;
 		mdevice->devmem_capacity = new_capacity;
 		mdevice->devmem_chunks = new_chunks;
 	}
 
-	res = request_free_mem_region(&iomem_resource, DEVMEM_CHUNK_SIZE,
-					"hmm_dmirror");
-	if (IS_ERR(res))
-		goto err;
-
-	devmem = kzalloc(sizeof(*devmem), GFP_KERNEL);
-	if (!devmem)
-		goto err_release;
-
-	devmem->pagemap.type = MEMORY_DEVICE_PRIVATE;
-	devmem->pagemap.res = *res;
-	devmem->pagemap.ops = &dmirror_devmem_ops;
-	devmem->pagemap.owner = mdevice;
-
 	ptr = memremap_pages(&devmem->pagemap, numa_node_id());
 	if (IS_ERR(ptr))
-		goto err_free;
+		goto err_release;
 
 	devmem->mdevice = mdevice;
-	pfn_first = devmem->pagemap.res.start >> PAGE_SHIFT;
-	pfn_last = pfn_first +
-		(resource_size(&devmem->pagemap.res) >> PAGE_SHIFT);
+	pfn_first = devmem->pagemap.range.start >> PAGE_SHIFT;
+	pfn_last = pfn_first + (range_len(&devmem->pagemap.range) >> PAGE_SHIFT);
 	mdevice->devmem_chunks[mdevice->devmem_count++] = devmem;
 
 	mutex_unlock(&mdevice->devmem_lock);
@@ -525,12 +525,12 @@ static bool dmirror_allocate_chunk(struct dmirror_device *mdevice,
 
 	return true;
 
-err_free:
-	kfree(devmem);
 err_release:
-	release_mem_region(res->start, resource_size(res));
-err:
 	mutex_unlock(&mdevice->devmem_lock);
+	release_mem_region(devmem->pagemap.range.start, range_len(&devmem->pagemap.range));
+err_devmem:
+	kfree(devmem);
+
 	return false;
 }
 
@@ -1100,8 +1100,8 @@ static void dmirror_device_remove(struct dmirror_device *mdevice)
 				mdevice->devmem_chunks[i];
 
 			memunmap_pages(&devmem->pagemap);
-			release_mem_region(devmem->pagemap.res.start,
-					   resource_size(&devmem->pagemap.res));
+			release_mem_region(devmem->pagemap.range.start,
+					   range_len(&devmem->pagemap.range));
 			kfree(devmem);
 		}
 		kfree(mdevice->devmem_chunks);
diff --git a/mm/memremap.c b/mm/memremap.c
index 006dace60b1a..d958d348b3ca 100644
--- a/mm/memremap.c
+++ b/mm/memremap.c
@@ -70,24 +70,24 @@ static void devmap_managed_enable_put(void)
 }
 #endif /* CONFIG_DEV_PAGEMAP_OPS */
 
-static void pgmap_array_delete(struct resource *res)
+static void pgmap_array_delete(struct range *range)
 {
-	xa_store_range(&pgmap_array, PHYS_PFN(res->start), PHYS_PFN(res->end),
+	xa_store_range(&pgmap_array, PHYS_PFN(range->start), PHYS_PFN(range->end),
 			NULL, GFP_KERNEL);
 	synchronize_rcu();
 }
 
 static unsigned long pfn_first(struct dev_pagemap *pgmap)
 {
-	return PHYS_PFN(pgmap->res.start) +
+	return PHYS_PFN(pgmap->range.start) +
 		vmem_altmap_offset(pgmap_altmap(pgmap));
 }
 
 static unsigned long pfn_end(struct dev_pagemap *pgmap)
 {
-	const struct resource *res = &pgmap->res;
+	const struct range *range = &pgmap->range;
 
-	return (res->start + resource_size(res)) >> PAGE_SHIFT;
+	return (range->start + range_len(range)) >> PAGE_SHIFT;
 }
 
 static unsigned long pfn_next(unsigned long pfn)
@@ -126,7 +126,7 @@ static void dev_pagemap_cleanup(struct dev_pagemap *pgmap)
 
 void memunmap_pages(struct dev_pagemap *pgmap)
 {
-	struct resource *res = &pgmap->res;
+	struct range *range = &pgmap->range;
 	struct page *first_page;
 	unsigned long pfn;
 	int nid;
@@ -143,20 +143,20 @@ void memunmap_pages(struct dev_pagemap *pgmap)
 	nid = page_to_nid(first_page);
 
 	mem_hotplug_begin();
-	remove_pfn_range_from_zone(page_zone(first_page), PHYS_PFN(res->start),
-				   PHYS_PFN(resource_size(res)));
+	remove_pfn_range_from_zone(page_zone(first_page), PHYS_PFN(range->start),
+				   PHYS_PFN(range_len(range)));
 	if (pgmap->type == MEMORY_DEVICE_PRIVATE) {
-		__remove_pages(PHYS_PFN(res->start),
-			       PHYS_PFN(resource_size(res)), NULL);
+		__remove_pages(PHYS_PFN(range->start),
+			       PHYS_PFN(range_len(range)), NULL);
 	} else {
-		arch_remove_memory(nid, res->start, resource_size(res),
+		arch_remove_memory(nid, range->start, range_len(range),
 				pgmap_altmap(pgmap));
-		kasan_remove_zero_shadow(__va(res->start), resource_size(res));
+		kasan_remove_zero_shadow(__va(range->start), range_len(range));
 	}
 	mem_hotplug_done();
 
-	untrack_pfn(NULL, PHYS_PFN(res->start), resource_size(res));
-	pgmap_array_delete(res);
+	untrack_pfn(NULL, PHYS_PFN(range->start), range_len(range));
+	pgmap_array_delete(range);
 	WARN_ONCE(pgmap->altmap.alloc, "failed to free all reserved pages\n");
 	devmap_managed_enable_put();
 }
@@ -182,7 +182,7 @@ static void dev_pagemap_percpu_release(struct percpu_ref *ref)
  */
 void *memremap_pages(struct dev_pagemap *pgmap, int nid)
 {
-	struct resource *res = &pgmap->res;
+	struct range *range = &pgmap->range;
 	struct dev_pagemap *conflict_pgmap;
 	struct mhp_params params = {
 		/*
@@ -251,7 +251,7 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid)
 			return ERR_PTR(error);
 	}
 
-	conflict_pgmap = get_dev_pagemap(PHYS_PFN(res->start), NULL);
+	conflict_pgmap = get_dev_pagemap(PHYS_PFN(range->start), NULL);
 	if (conflict_pgmap) {
 		WARN(1, "Conflicting mapping in same section\n");
 		put_dev_pagemap(conflict_pgmap);
@@ -259,7 +259,7 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid)
 		goto err_array;
 	}
 
-	conflict_pgmap = get_dev_pagemap(PHYS_PFN(res->end), NULL);
+	conflict_pgmap = get_dev_pagemap(PHYS_PFN(range->end), NULL);
 	if (conflict_pgmap) {
 		WARN(1, "Conflicting mapping in same section\n");
 		put_dev_pagemap(conflict_pgmap);
@@ -267,26 +267,27 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid)
 		goto err_array;
 	}
 
-	is_ram = region_intersects(res->start, resource_size(res),
+	is_ram = region_intersects(range->start, range_len(range),
 		IORESOURCE_SYSTEM_RAM, IORES_DESC_NONE);
 
 	if (is_ram != REGION_DISJOINT) {
-		WARN_ONCE(1, "%s attempted on %s region %pr\n", __func__,
-				is_ram == REGION_MIXED ? "mixed" : "ram", res);
+		WARN_ONCE(1, "attempted on %s region %#llx-%#llx\n",
+				is_ram == REGION_MIXED ? "mixed" : "ram",
+				range->start, range->end);
 		error = -ENXIO;
 		goto err_array;
 	}
 
-	error = xa_err(xa_store_range(&pgmap_array, PHYS_PFN(res->start),
-				PHYS_PFN(res->end), pgmap, GFP_KERNEL));
+	error = xa_err(xa_store_range(&pgmap_array, PHYS_PFN(range->start),
+				PHYS_PFN(range->end), pgmap, GFP_KERNEL));
 	if (error)
 		goto err_array;
 
 	if (nid < 0)
 		nid = numa_mem_id();
 
-	error = track_pfn_remap(NULL, &params.pgprot, PHYS_PFN(res->start),
-				0, resource_size(res));
+	error = track_pfn_remap(NULL, &params.pgprot, PHYS_PFN(range->start), 0,
+			range_len(range));
 	if (error)
 		goto err_pfn_remap;
 
@@ -304,16 +305,16 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid)
 	 * arch_add_memory().
 	 */
 	if (pgmap->type == MEMORY_DEVICE_PRIVATE) {
-		error = add_pages(nid, PHYS_PFN(res->start),
-				PHYS_PFN(resource_size(res)), &params);
+		error = add_pages(nid, PHYS_PFN(range->start),
+				PHYS_PFN(range_len(range)), &params);
 	} else {
-		error = kasan_add_zero_shadow(__va(res->start), resource_size(res));
+		error = kasan_add_zero_shadow(__va(range->start), range_len(range));
 		if (error) {
 			mem_hotplug_done();
 			goto err_kasan;
 		}
 
-		error = arch_add_memory(nid, res->start, resource_size(res),
+		error = arch_add_memory(nid, range->start, range_len(range),
 					&params);
 	}
 
@@ -321,8 +322,8 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid)
 		struct zone *zone;
 
 		zone = &NODE_DATA(nid)->node_zones[ZONE_DEVICE];
-		move_pfn_range_to_zone(zone, PHYS_PFN(res->start),
-				PHYS_PFN(resource_size(res)), params.altmap);
+		move_pfn_range_to_zone(zone, PHYS_PFN(range->start),
+				PHYS_PFN(range_len(range)), params.altmap);
 	}
 
 	mem_hotplug_done();
@@ -334,17 +335,17 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid)
 	 * to allow us to do the work while not holding the hotplug lock.
 	 */
 	memmap_init_zone_device(&NODE_DATA(nid)->node_zones[ZONE_DEVICE],
-				PHYS_PFN(res->start),
-				PHYS_PFN(resource_size(res)), pgmap);
+				PHYS_PFN(range->start),
+				PHYS_PFN(range_len(range)), pgmap);
 	percpu_ref_get_many(pgmap->ref, pfn_end(pgmap) - pfn_first(pgmap));
-	return __va(res->start);
+	return __va(range->start);
 
  err_add_memory:
-	kasan_remove_zero_shadow(__va(res->start), resource_size(res));
+	kasan_remove_zero_shadow(__va(range->start), range_len(range));
  err_kasan:
-	untrack_pfn(NULL, PHYS_PFN(res->start), resource_size(res));
+	untrack_pfn(NULL, PHYS_PFN(range->start), range_len(range));
  err_pfn_remap:
-	pgmap_array_delete(res);
+	pgmap_array_delete(range);
  err_array:
 	dev_pagemap_kill(pgmap);
 	dev_pagemap_cleanup(pgmap);
@@ -369,7 +370,7 @@ EXPORT_SYMBOL_GPL(memremap_pages);
  *    'live' on entry and will be killed and reaped at
  *    devm_memremap_pages_release() time, or if this routine fails.
  *
- * 4/ res is expected to be a host memory range that could feasibly be
+ * 4/ range is expected to be a host memory range that could feasibly be
  *    treated as a "System RAM" range, i.e. not a device mmio range, but
  *    this is not enforced.
  */
@@ -426,7 +427,7 @@ struct dev_pagemap *get_dev_pagemap(unsigned long pfn,
 	 * In the cached case we're already holding a live reference.
 	 */
 	if (pgmap) {
-		if (phys >= pgmap->res.start && phys <= pgmap->res.end)
+		if (phys >= pgmap->range.start && phys <= pgmap->range.end)
 			return pgmap;
 		put_dev_pagemap(pgmap);
 	}
diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c
index 03e40b3b0106..c62d372d426f 100644
--- a/tools/testing/nvdimm/test/iomap.c
+++ b/tools/testing/nvdimm/test/iomap.c
@@ -126,7 +126,7 @@ static void dev_pagemap_percpu_release(struct percpu_ref *ref)
 void *__wrap_devm_memremap_pages(struct device *dev, struct dev_pagemap *pgmap)
 {
 	int error;
-	resource_size_t offset = pgmap->res.start;
+	resource_size_t offset = pgmap->range.start;
 	struct nfit_test_resource *nfit_res = get_nfit_res(offset);
 
 	if (!nfit_res)
-- 
cgit v1.2.3


From b7b3c01b191596d27a6980d1a42504f5b607f802 Mon Sep 17 00:00:00 2001
From: Dan Williams <dan.j.williams@intel.com>
Date: Tue, 13 Oct 2020 16:50:34 -0700
Subject: mm/memremap_pages: support multiple ranges per invocation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In support of device-dax growing the ability to front physically
dis-contiguous ranges of memory, update devm_memremap_pages() to track
multiple ranges with a single reference counter and devm instance.

Convert all [devm_]memremap_pages() users to specify the number of ranges
they are mapping in their 'struct dev_pagemap' instance.

Signed-off-by: Dan Williams <dan.j.williams@intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Paul Mackerras <paulus@ozlabs.org>
Cc: Michael Ellerman <mpe@ellerman.id.au>
Cc: Benjamin Herrenschmidt <benh@kernel.crashing.org>
Cc: Vishal Verma <vishal.l.verma@intel.com>
Cc: Vivek Goyal <vgoyal@redhat.com>
Cc: Dave Jiang <dave.jiang@intel.com>
Cc: Ben Skeggs <bskeggs@redhat.com>
Cc: David Airlie <airlied@linux.ie>
Cc: Daniel Vetter <daniel@ffwll.ch>
Cc: Ira Weiny <ira.weiny@intel.com>
Cc: Bjorn Helgaas <bhelgaas@google.com>
Cc: Boris Ostrovsky <boris.ostrovsky@oracle.com>
Cc: Juergen Gross <jgross@suse.com>
Cc: Stefano Stabellini <sstabellini@kernel.org>
Cc: "Jérôme Glisse" <jglisse@redhat.co
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Ard Biesheuvel <ard.biesheuvel@linaro.org>
Cc: Ard Biesheuvel <ardb@kernel.org>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Brice Goglin <Brice.Goglin@inria.fr>
Cc: Catalin Marinas <catalin.marinas@arm.com>
Cc: Dave Hansen <dave.hansen@linux.intel.com>
Cc: David Hildenbrand <david@redhat.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Hulk Robot <hulkci@huawei.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jason Gunthorpe <jgg@mellanox.com>
Cc: Jason Yan <yanaijie@huawei.com>
Cc: Jeff Moyer <jmoyer@redhat.com>
Cc: "Jérôme Glisse" <jglisse@redhat.com>
Cc: Jia He <justin.he@arm.com>
Cc: Joao Martins <joao.m.martins@oracle.com>
Cc: Jonathan Cameron <Jonathan.Cameron@huawei.com>
Cc: kernel test robot <lkp@intel.com>
Cc: Mike Rapoport <rppt@linux.ibm.com>
Cc: Pavel Tatashin <pasha.tatashin@soleen.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Cc: Randy Dunlap <rdunlap@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Tom Lendacky <thomas.lendacky@amd.com>
Cc: Wei Yang <richard.weiyang@linux.alibaba.com>
Cc: Will Deacon <will@kernel.org>
Link: https://lkml.kernel.org/r/159643103789.4062302.18426128170217903785.stgit@dwillia2-desk3.amr.corp.intel.com
Link: https://lkml.kernel.org/r/160106116293.30709.13350662794915396198.stgit@dwillia2-desk3.amr.corp.intel.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 arch/powerpc/kvm/book3s_hv_uvmem.c     |   1 +
 drivers/dax/device.c                   |   1 +
 drivers/gpu/drm/nouveau/nouveau_dmem.c |   1 +
 drivers/nvdimm/pfn_devs.c              |   1 +
 drivers/nvdimm/pmem.c                  |   1 +
 drivers/pci/p2pdma.c                   |   1 +
 drivers/xen/unpopulated-alloc.c        |   1 +
 include/linux/memremap.h               |  10 +-
 lib/test_hmm.c                         |   1 +
 mm/memremap.c                          | 258 +++++++++++++++++++--------------
 10 files changed, 166 insertions(+), 110 deletions(-)

(limited to 'lib')

diff --git a/arch/powerpc/kvm/book3s_hv_uvmem.c b/arch/powerpc/kvm/book3s_hv_uvmem.c
index 29ec555055c2..84e5a2dc8be5 100644
--- a/arch/powerpc/kvm/book3s_hv_uvmem.c
+++ b/arch/powerpc/kvm/book3s_hv_uvmem.c
@@ -1172,6 +1172,7 @@ int kvmppc_uvmem_init(void)
 	kvmppc_uvmem_pgmap.type = MEMORY_DEVICE_PRIVATE;
 	kvmppc_uvmem_pgmap.range.start = res->start;
 	kvmppc_uvmem_pgmap.range.end = res->end;
+	kvmppc_uvmem_pgmap.nr_range = 1;
 	kvmppc_uvmem_pgmap.ops = &kvmppc_uvmem_ops;
 	/* just one global instance: */
 	kvmppc_uvmem_pgmap.owner = &kvmppc_uvmem_pgmap;
diff --git a/drivers/dax/device.c b/drivers/dax/device.c
index a14448bca83d..5f808617672a 100644
--- a/drivers/dax/device.c
+++ b/drivers/dax/device.c
@@ -417,6 +417,7 @@ int dev_dax_probe(struct dev_dax *dev_dax)
 		if (!pgmap)
 			return -ENOMEM;
 		pgmap->range = *range;
+		pgmap->nr_range = 1;
 	}
 	pgmap->type = MEMORY_DEVICE_GENERIC;
 	addr = devm_memremap_pages(dev, pgmap);
diff --git a/drivers/gpu/drm/nouveau/nouveau_dmem.c b/drivers/gpu/drm/nouveau/nouveau_dmem.c
index 25811ed7e274..a13c6215bba8 100644
--- a/drivers/gpu/drm/nouveau/nouveau_dmem.c
+++ b/drivers/gpu/drm/nouveau/nouveau_dmem.c
@@ -251,6 +251,7 @@ nouveau_dmem_chunk_alloc(struct nouveau_drm *drm, struct page **ppage)
 	chunk->pagemap.type = MEMORY_DEVICE_PRIVATE;
 	chunk->pagemap.range.start = res->start;
 	chunk->pagemap.range.end = res->end;
+	chunk->pagemap.nr_range = 1;
 	chunk->pagemap.ops = &nouveau_dmem_pagemap_ops;
 	chunk->pagemap.owner = drm->dev;
 
diff --git a/drivers/nvdimm/pfn_devs.c b/drivers/nvdimm/pfn_devs.c
index 3c4787b92a6a..b499df630d4d 100644
--- a/drivers/nvdimm/pfn_devs.c
+++ b/drivers/nvdimm/pfn_devs.c
@@ -693,6 +693,7 @@ static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap)
 		.start = nsio->res.start + start_pad,
 		.end = nsio->res.end - end_trunc,
 	};
+	pgmap->nr_range = 1;
 	if (nd_pfn->mode == PFN_MODE_RAM) {
 		if (offset < reserve)
 			return -EINVAL;
diff --git a/drivers/nvdimm/pmem.c b/drivers/nvdimm/pmem.c
index 1f394f44838f..875076b0ea6c 100644
--- a/drivers/nvdimm/pmem.c
+++ b/drivers/nvdimm/pmem.c
@@ -441,6 +441,7 @@ static int pmem_attach_disk(struct device *dev,
 	} else if (pmem_should_map_pages(dev)) {
 		pmem->pgmap.range.start = res->start;
 		pmem->pgmap.range.end = res->end;
+		pmem->pgmap.nr_range = 1;
 		pmem->pgmap.type = MEMORY_DEVICE_FS_DAX;
 		pmem->pgmap.ops = &fsdax_pagemap_ops;
 		addr = devm_memremap_pages(dev, &pmem->pgmap);
diff --git a/drivers/pci/p2pdma.c b/drivers/pci/p2pdma.c
index 256850513813..9d53c16b7329 100644
--- a/drivers/pci/p2pdma.c
+++ b/drivers/pci/p2pdma.c
@@ -187,6 +187,7 @@ int pci_p2pdma_add_resource(struct pci_dev *pdev, int bar, size_t size,
 	pgmap = &p2p_pgmap->pgmap;
 	pgmap->range.start = pci_resource_start(pdev, bar) + offset;
 	pgmap->range.end = pgmap->range.start + size - 1;
+	pgmap->nr_range = 1;
 	pgmap->type = MEMORY_DEVICE_PCI_P2PDMA;
 
 	p2p_pgmap->provider = pdev;
diff --git a/drivers/xen/unpopulated-alloc.c b/drivers/xen/unpopulated-alloc.c
index 091b8669eca3..8c512ea550bb 100644
--- a/drivers/xen/unpopulated-alloc.c
+++ b/drivers/xen/unpopulated-alloc.c
@@ -47,6 +47,7 @@ static int fill_list(unsigned int nr_pages)
 		.start = res->start,
 		.end = res->end,
 	};
+	pgmap->nr_range = 1;
 	pgmap->owner = res;
 
 #ifdef CONFIG_XEN_HAVE_PVMMU
diff --git a/include/linux/memremap.h b/include/linux/memremap.h
index d0dd261d87c0..79c49e7f5c30 100644
--- a/include/linux/memremap.h
+++ b/include/linux/memremap.h
@@ -94,7 +94,6 @@ struct dev_pagemap_ops {
 /**
  * struct dev_pagemap - metadata for ZONE_DEVICE mappings
  * @altmap: pre-allocated/reserved memory for vmemmap allocations
- * @range: physical address range covered by @ref
  * @ref: reference count that pins the devm_memremap_pages() mapping
  * @internal_ref: internal reference if @ref is not provided by the caller
  * @done: completion for @internal_ref
@@ -104,10 +103,12 @@ struct dev_pagemap_ops {
  * @owner: an opaque pointer identifying the entity that manages this
  *	instance.  Used by various helpers to make sure that no
  *	foreign ZONE_DEVICE memory is accessed.
+ * @nr_range: number of ranges to be mapped
+ * @range: range to be mapped when nr_range == 1
+ * @ranges: array of ranges to be mapped when nr_range > 1
  */
 struct dev_pagemap {
 	struct vmem_altmap altmap;
-	struct range range;
 	struct percpu_ref *ref;
 	struct percpu_ref internal_ref;
 	struct completion done;
@@ -115,6 +116,11 @@ struct dev_pagemap {
 	unsigned int flags;
 	const struct dev_pagemap_ops *ops;
 	void *owner;
+	int nr_range;
+	union {
+		struct range range;
+		struct range ranges[0];
+	};
 };
 
 static inline struct vmem_altmap *pgmap_altmap(struct dev_pagemap *pgmap)
diff --git a/lib/test_hmm.c b/lib/test_hmm.c
index e97ca8ec0bce..c710b4c5714d 100644
--- a/lib/test_hmm.c
+++ b/lib/test_hmm.c
@@ -472,6 +472,7 @@ static bool dmirror_allocate_chunk(struct dmirror_device *mdevice,
 	devmem->pagemap.type = MEMORY_DEVICE_PRIVATE;
 	devmem->pagemap.range.start = res->start;
 	devmem->pagemap.range.end = res->end;
+	devmem->pagemap.nr_range = 1;
 	devmem->pagemap.ops = &dmirror_devmem_ops;
 	devmem->pagemap.owner = mdevice;
 
diff --git a/mm/memremap.c b/mm/memremap.c
index d958d348b3ca..532ec3d36ab4 100644
--- a/mm/memremap.c
+++ b/mm/memremap.c
@@ -77,15 +77,19 @@ static void pgmap_array_delete(struct range *range)
 	synchronize_rcu();
 }
 
-static unsigned long pfn_first(struct dev_pagemap *pgmap)
+static unsigned long pfn_first(struct dev_pagemap *pgmap, int range_id)
 {
-	return PHYS_PFN(pgmap->range.start) +
-		vmem_altmap_offset(pgmap_altmap(pgmap));
+	struct range *range = &pgmap->ranges[range_id];
+	unsigned long pfn = PHYS_PFN(range->start);
+
+	if (range_id)
+		return pfn;
+	return pfn + vmem_altmap_offset(pgmap_altmap(pgmap));
 }
 
-static unsigned long pfn_end(struct dev_pagemap *pgmap)
+static unsigned long pfn_end(struct dev_pagemap *pgmap, int range_id)
 {
-	const struct range *range = &pgmap->range;
+	const struct range *range = &pgmap->ranges[range_id];
 
 	return (range->start + range_len(range)) >> PAGE_SHIFT;
 }
@@ -97,8 +101,8 @@ static unsigned long pfn_next(unsigned long pfn)
 	return pfn + 1;
 }
 
-#define for_each_device_pfn(pfn, map) \
-	for (pfn = pfn_first(map); pfn < pfn_end(map); pfn = pfn_next(pfn))
+#define for_each_device_pfn(pfn, map, i) \
+	for (pfn = pfn_first(map, i); pfn < pfn_end(map, i); pfn = pfn_next(pfn))
 
 static void dev_pagemap_kill(struct dev_pagemap *pgmap)
 {
@@ -124,20 +128,14 @@ static void dev_pagemap_cleanup(struct dev_pagemap *pgmap)
 		pgmap->ref = NULL;
 }
 
-void memunmap_pages(struct dev_pagemap *pgmap)
+static void pageunmap_range(struct dev_pagemap *pgmap, int range_id)
 {
-	struct range *range = &pgmap->range;
+	struct range *range = &pgmap->ranges[range_id];
 	struct page *first_page;
-	unsigned long pfn;
 	int nid;
 
-	dev_pagemap_kill(pgmap);
-	for_each_device_pfn(pfn, pgmap)
-		put_page(pfn_to_page(pfn));
-	dev_pagemap_cleanup(pgmap);
-
 	/* make sure to access a memmap that was actually initialized */
-	first_page = pfn_to_page(pfn_first(pgmap));
+	first_page = pfn_to_page(pfn_first(pgmap, range_id));
 
 	/* pages are dead and unused, undo the arch mapping */
 	nid = page_to_nid(first_page);
@@ -157,6 +155,22 @@ void memunmap_pages(struct dev_pagemap *pgmap)
 
 	untrack_pfn(NULL, PHYS_PFN(range->start), range_len(range));
 	pgmap_array_delete(range);
+}
+
+void memunmap_pages(struct dev_pagemap *pgmap)
+{
+	unsigned long pfn;
+	int i;
+
+	dev_pagemap_kill(pgmap);
+	for (i = 0; i < pgmap->nr_range; i++)
+		for_each_device_pfn(pfn, pgmap, i)
+			put_page(pfn_to_page(pfn));
+	dev_pagemap_cleanup(pgmap);
+
+	for (i = 0; i < pgmap->nr_range; i++)
+		pageunmap_range(pgmap, i);
+
 	WARN_ONCE(pgmap->altmap.alloc, "failed to free all reserved pages\n");
 	devmap_managed_enable_put();
 }
@@ -175,96 +189,29 @@ static void dev_pagemap_percpu_release(struct percpu_ref *ref)
 	complete(&pgmap->done);
 }
 
-/*
- * Not device managed version of dev_memremap_pages, undone by
- * memunmap_pages().  Please use dev_memremap_pages if you have a struct
- * device available.
- */
-void *memremap_pages(struct dev_pagemap *pgmap, int nid)
+static int pagemap_range(struct dev_pagemap *pgmap, struct mhp_params *params,
+		int range_id, int nid)
 {
-	struct range *range = &pgmap->range;
+	struct range *range = &pgmap->ranges[range_id];
 	struct dev_pagemap *conflict_pgmap;
-	struct mhp_params params = {
-		/*
-		 * We do not want any optional features only our own memmap
-		 */
-		.altmap = pgmap_altmap(pgmap),
-		.pgprot = PAGE_KERNEL,
-	};
 	int error, is_ram;
-	bool need_devmap_managed = true;
 
-	switch (pgmap->type) {
-	case MEMORY_DEVICE_PRIVATE:
-		if (!IS_ENABLED(CONFIG_DEVICE_PRIVATE)) {
-			WARN(1, "Device private memory not supported\n");
-			return ERR_PTR(-EINVAL);
-		}
-		if (!pgmap->ops || !pgmap->ops->migrate_to_ram) {
-			WARN(1, "Missing migrate_to_ram method\n");
-			return ERR_PTR(-EINVAL);
-		}
-		if (!pgmap->owner) {
-			WARN(1, "Missing owner\n");
-			return ERR_PTR(-EINVAL);
-		}
-		break;
-	case MEMORY_DEVICE_FS_DAX:
-		if (!IS_ENABLED(CONFIG_ZONE_DEVICE) ||
-		    IS_ENABLED(CONFIG_FS_DAX_LIMITED)) {
-			WARN(1, "File system DAX not supported\n");
-			return ERR_PTR(-EINVAL);
-		}
-		break;
-	case MEMORY_DEVICE_GENERIC:
-		need_devmap_managed = false;
-		break;
-	case MEMORY_DEVICE_PCI_P2PDMA:
-		params.pgprot = pgprot_noncached(params.pgprot);
-		need_devmap_managed = false;
-		break;
-	default:
-		WARN(1, "Invalid pgmap type %d\n", pgmap->type);
-		break;
-	}
-
-	if (!pgmap->ref) {
-		if (pgmap->ops && (pgmap->ops->kill || pgmap->ops->cleanup))
-			return ERR_PTR(-EINVAL);
-
-		init_completion(&pgmap->done);
-		error = percpu_ref_init(&pgmap->internal_ref,
-				dev_pagemap_percpu_release, 0, GFP_KERNEL);
-		if (error)
-			return ERR_PTR(error);
-		pgmap->ref = &pgmap->internal_ref;
-	} else {
-		if (!pgmap->ops || !pgmap->ops->kill || !pgmap->ops->cleanup) {
-			WARN(1, "Missing reference count teardown definition\n");
-			return ERR_PTR(-EINVAL);
-		}
-	}
-
-	if (need_devmap_managed) {
-		error = devmap_managed_enable_get(pgmap);
-		if (error)
-			return ERR_PTR(error);
-	}
+	if (WARN_ONCE(pgmap_altmap(pgmap) && range_id > 0,
+				"altmap not supported for multiple ranges\n"))
+		return -EINVAL;
 
 	conflict_pgmap = get_dev_pagemap(PHYS_PFN(range->start), NULL);
 	if (conflict_pgmap) {
 		WARN(1, "Conflicting mapping in same section\n");
 		put_dev_pagemap(conflict_pgmap);
-		error = -ENOMEM;
-		goto err_array;
+		return -ENOMEM;
 	}
 
 	conflict_pgmap = get_dev_pagemap(PHYS_PFN(range->end), NULL);
 	if (conflict_pgmap) {
 		WARN(1, "Conflicting mapping in same section\n");
 		put_dev_pagemap(conflict_pgmap);
-		error = -ENOMEM;
-		goto err_array;
+		return -ENOMEM;
 	}
 
 	is_ram = region_intersects(range->start, range_len(range),
@@ -274,19 +221,18 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid)
 		WARN_ONCE(1, "attempted on %s region %#llx-%#llx\n",
 				is_ram == REGION_MIXED ? "mixed" : "ram",
 				range->start, range->end);
-		error = -ENXIO;
-		goto err_array;
+		return -ENXIO;
 	}
 
 	error = xa_err(xa_store_range(&pgmap_array, PHYS_PFN(range->start),
 				PHYS_PFN(range->end), pgmap, GFP_KERNEL));
 	if (error)
-		goto err_array;
+		return error;
 
 	if (nid < 0)
 		nid = numa_mem_id();
 
-	error = track_pfn_remap(NULL, &params.pgprot, PHYS_PFN(range->start), 0,
+	error = track_pfn_remap(NULL, &params->pgprot, PHYS_PFN(range->start), 0,
 			range_len(range));
 	if (error)
 		goto err_pfn_remap;
@@ -306,7 +252,7 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid)
 	 */
 	if (pgmap->type == MEMORY_DEVICE_PRIVATE) {
 		error = add_pages(nid, PHYS_PFN(range->start),
-				PHYS_PFN(range_len(range)), &params);
+				PHYS_PFN(range_len(range)), params);
 	} else {
 		error = kasan_add_zero_shadow(__va(range->start), range_len(range));
 		if (error) {
@@ -315,7 +261,7 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid)
 		}
 
 		error = arch_add_memory(nid, range->start, range_len(range),
-					&params);
+					params);
 	}
 
 	if (!error) {
@@ -323,7 +269,7 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid)
 
 		zone = &NODE_DATA(nid)->node_zones[ZONE_DEVICE];
 		move_pfn_range_to_zone(zone, PHYS_PFN(range->start),
-				PHYS_PFN(range_len(range)), params.altmap);
+				PHYS_PFN(range_len(range)), params->altmap);
 	}
 
 	mem_hotplug_done();
@@ -337,20 +283,116 @@ void *memremap_pages(struct dev_pagemap *pgmap, int nid)
 	memmap_init_zone_device(&NODE_DATA(nid)->node_zones[ZONE_DEVICE],
 				PHYS_PFN(range->start),
 				PHYS_PFN(range_len(range)), pgmap);
-	percpu_ref_get_many(pgmap->ref, pfn_end(pgmap) - pfn_first(pgmap));
-	return __va(range->start);
+	percpu_ref_get_many(pgmap->ref, pfn_end(pgmap, range_id)
+			- pfn_first(pgmap, range_id));
+	return 0;
 
- err_add_memory:
+err_add_memory:
 	kasan_remove_zero_shadow(__va(range->start), range_len(range));
- err_kasan:
+err_kasan:
 	untrack_pfn(NULL, PHYS_PFN(range->start), range_len(range));
- err_pfn_remap:
+err_pfn_remap:
 	pgmap_array_delete(range);
- err_array:
-	dev_pagemap_kill(pgmap);
-	dev_pagemap_cleanup(pgmap);
-	devmap_managed_enable_put();
-	return ERR_PTR(error);
+	return error;
+}
+
+
+/*
+ * Not device managed version of dev_memremap_pages, undone by
+ * memunmap_pages().  Please use dev_memremap_pages if you have a struct
+ * device available.
+ */
+void *memremap_pages(struct dev_pagemap *pgmap, int nid)
+{
+	struct mhp_params params = {
+		.altmap = pgmap_altmap(pgmap),
+		.pgprot = PAGE_KERNEL,
+	};
+	const int nr_range = pgmap->nr_range;
+	bool need_devmap_managed = true;
+	int error, i;
+
+	if (WARN_ONCE(!nr_range, "nr_range must be specified\n"))
+		return ERR_PTR(-EINVAL);
+
+	switch (pgmap->type) {
+	case MEMORY_DEVICE_PRIVATE:
+		if (!IS_ENABLED(CONFIG_DEVICE_PRIVATE)) {
+			WARN(1, "Device private memory not supported\n");
+			return ERR_PTR(-EINVAL);
+		}
+		if (!pgmap->ops || !pgmap->ops->migrate_to_ram) {
+			WARN(1, "Missing migrate_to_ram method\n");
+			return ERR_PTR(-EINVAL);
+		}
+		if (!pgmap->owner) {
+			WARN(1, "Missing owner\n");
+			return ERR_PTR(-EINVAL);
+		}
+		break;
+	case MEMORY_DEVICE_FS_DAX:
+		if (!IS_ENABLED(CONFIG_ZONE_DEVICE) ||
+		    IS_ENABLED(CONFIG_FS_DAX_LIMITED)) {
+			WARN(1, "File system DAX not supported\n");
+			return ERR_PTR(-EINVAL);
+		}
+		break;
+	case MEMORY_DEVICE_GENERIC:
+		need_devmap_managed = false;
+		break;
+	case MEMORY_DEVICE_PCI_P2PDMA:
+		params.pgprot = pgprot_noncached(params.pgprot);
+		need_devmap_managed = false;
+		break;
+	default:
+		WARN(1, "Invalid pgmap type %d\n", pgmap->type);
+		break;
+	}
+
+	if (!pgmap->ref) {
+		if (pgmap->ops && (pgmap->ops->kill || pgmap->ops->cleanup))
+			return ERR_PTR(-EINVAL);
+
+		init_completion(&pgmap->done);
+		error = percpu_ref_init(&pgmap->internal_ref,
+				dev_pagemap_percpu_release, 0, GFP_KERNEL);
+		if (error)
+			return ERR_PTR(error);
+		pgmap->ref = &pgmap->internal_ref;
+	} else {
+		if (!pgmap->ops || !pgmap->ops->kill || !pgmap->ops->cleanup) {
+			WARN(1, "Missing reference count teardown definition\n");
+			return ERR_PTR(-EINVAL);
+		}
+	}
+
+	if (need_devmap_managed) {
+		error = devmap_managed_enable_get(pgmap);
+		if (error)
+			return ERR_PTR(error);
+	}
+
+	/*
+	 * Clear the pgmap nr_range as it will be incremented for each
+	 * successfully processed range. This communicates how many
+	 * regions to unwind in the abort case.
+	 */
+	pgmap->nr_range = 0;
+	error = 0;
+	for (i = 0; i < nr_range; i++) {
+		error = pagemap_range(pgmap, &params, i, nid);
+		if (error)
+			break;
+		pgmap->nr_range++;
+	}
+
+	if (i < nr_range) {
+		memunmap_pages(pgmap);
+		pgmap->nr_range = nr_range;
+		return ERR_PTR(error);
+	}
+
+	return __va(pgmap->ranges[0].start);
 }
 EXPORT_SYMBOL_GPL(memremap_pages);
 
-- 
cgit v1.2.3


From 9b53122f616a74ddbbd6c97a3c8294c631a13d15 Mon Sep 17 00:00:00 2001
From: Ralph Campbell <rcampbell@nvidia.com>
Date: Tue, 13 Oct 2020 16:54:32 -0700
Subject: lib/test_hmm.c: remove unused dmirror_zero_page

The variable dmirror_zero_page is unused in the HMM self test driver which
was probably intended to demonstrate how a driver could use
migrate_vma_setup() to share a single read-only device private zero page
similar to how the CPU does.  However, this isn't needed for the self
tests so remove it.

Signed-off-by: Ralph Campbell <rcampbell@nvidia.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Jerome Glisse <jglisse@redhat.com>
Link: https://lkml.kernel.org/r/20200914213801.16520-1-rcampbell@nvidia.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/test_hmm.c | 14 --------------
 1 file changed, 14 deletions(-)

(limited to 'lib')

diff --git a/lib/test_hmm.c b/lib/test_hmm.c
index c710b4c5714d..e151a7f10519 100644
--- a/lib/test_hmm.c
+++ b/lib/test_hmm.c
@@ -36,7 +36,6 @@
 static const struct dev_pagemap_ops dmirror_devmem_ops;
 static const struct mmu_interval_notifier_ops dmirror_min_ops;
 static dev_t dmirror_dev;
-static struct page *dmirror_zero_page;
 
 struct dmirror_device;
 
@@ -1127,17 +1126,6 @@ static int __init hmm_dmirror_init(void)
 			goto err_chrdev;
 	}
 
-	/*
-	 * Allocate a zero page to simulate a reserved page of device private
-	 * memory which is always zero. The zero_pfn page isn't used just to
-	 * make the code here simpler (i.e., we need a struct page for it).
-	 */
-	dmirror_zero_page = alloc_page(GFP_HIGHUSER | __GFP_ZERO);
-	if (!dmirror_zero_page) {
-		ret = -ENOMEM;
-		goto err_chrdev;
-	}
-
 	pr_info("HMM test module loaded. This is only for testing HMM.\n");
 	return 0;
 
@@ -1153,8 +1141,6 @@ static void __exit hmm_dmirror_exit(void)
 {
 	int id;
 
-	if (dmirror_zero_page)
-		__free_page(dmirror_zero_page);
 	for (id = 0; id < DMIRROR_NDEVICES; id++)
 		dmirror_device_remove(dmirror_devices + id);
 	unregister_chrdev_region(dmirror_dev, DMIRROR_NDEVICES);
-- 
cgit v1.2.3


From 83c4e7a0363bdb8104f510370907161623e31086 Mon Sep 17 00:00:00 2001
From: Patricia Alfonso <trishalfonso@google.com>
Date: Tue, 13 Oct 2020 16:55:02 -0700
Subject: KUnit: KASAN Integration

Integrate KASAN into KUnit testing framework.

        - Fail tests when KASAN reports an error that is not expected
        - Use KUNIT_EXPECT_KASAN_FAIL to expect a KASAN error in KASAN
	  tests
        - Expected KASAN reports pass tests and are still printed when run
          without kunit_tool (kunit_tool still bypasses the report due to the
          test passing)
	- KUnit struct in current task used to keep track of the current
	  test from KASAN code

Make use of "[PATCH v3 kunit-next 1/2] kunit: generalize kunit_resource
API beyond allocated resources" and "[PATCH v3 kunit-next 2/2] kunit: add
support for named resources" from Alan Maguire [1]

        - A named resource is added to a test when a KASAN report is
          expected
        - This resource contains a struct for kasan_data containing
          booleans representing if a KASAN report is expected and if a
          KASAN report is found

[1] (https://lore.kernel.org/linux-kselftest/1583251361-12748-1-git-send-email-alan.maguire@oracle.com/T/#t)

Signed-off-by: Patricia Alfonso <trishalfonso@google.com>
Signed-off-by: David Gow <davidgow@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Tested-by: Andrey Konovalov <andreyknvl@google.com>
Reviewed-by: Andrey Konovalov <andreyknvl@google.com>
Reviewed-by: Dmitry Vyukov <dvyukov@google.com>
Acked-by: Brendan Higgins <brendanhiggins@google.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Link: https://lkml.kernel.org/r/20200915035828.570483-3-davidgow@google.com
Link: https://lkml.kernel.org/r/20200910070331.3358048-3-davidgow@google.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/kunit/test.h  |  5 +++++
 include/linux/kasan.h |  6 ++++++
 lib/kunit/test.c      | 13 ++++++++-----
 lib/test_kasan.c      | 47 +++++++++++++++++++++++++++++++++++++++++++++--
 mm/kasan/report.c     | 32 ++++++++++++++++++++++++++++++++
 5 files changed, 96 insertions(+), 7 deletions(-)

(limited to 'lib')

diff --git a/include/kunit/test.h b/include/kunit/test.h
index 59f3144f009a..3391f38389f8 100644
--- a/include/kunit/test.h
+++ b/include/kunit/test.h
@@ -224,6 +224,11 @@ struct kunit {
 	struct list_head resources; /* Protected by lock. */
 };
 
+static inline void kunit_set_failure(struct kunit *test)
+{
+	WRITE_ONCE(test->success, false);
+}
+
 void kunit_init_test(struct kunit *test, const char *name, char *log);
 
 int kunit_run_tests(struct kunit_suite *suite);
diff --git a/include/linux/kasan.h b/include/linux/kasan.h
index 087fba34b209..30d343b4a40a 100644
--- a/include/linux/kasan.h
+++ b/include/linux/kasan.h
@@ -14,6 +14,12 @@ struct task_struct;
 #include <linux/pgtable.h>
 #include <asm/kasan.h>
 
+/* kasan_data struct is used in KUnit tests for KASAN expected failures */
+struct kunit_kasan_expectation {
+	bool report_expected;
+	bool report_found;
+};
+
 extern unsigned char kasan_early_shadow_page[PAGE_SIZE];
 extern pte_t kasan_early_shadow_pte[PTRS_PER_PTE];
 extern pmd_t kasan_early_shadow_pmd[PTRS_PER_PMD];
diff --git a/lib/kunit/test.c b/lib/kunit/test.c
index c36037200310..dcc35fd30d95 100644
--- a/lib/kunit/test.c
+++ b/lib/kunit/test.c
@@ -10,16 +10,12 @@
 #include <linux/kernel.h>
 #include <linux/kref.h>
 #include <linux/sched/debug.h>
+#include <linux/sched.h>
 
 #include "debugfs.h"
 #include "string-stream.h"
 #include "try-catch-impl.h"
 
-static void kunit_set_failure(struct kunit *test)
-{
-	WRITE_ONCE(test->success, false);
-}
-
 static void kunit_print_tap_version(void)
 {
 	static bool kunit_has_printed_tap_version;
@@ -288,6 +284,10 @@ static void kunit_try_run_case(void *data)
 	struct kunit_suite *suite = ctx->suite;
 	struct kunit_case *test_case = ctx->test_case;
 
+#if (IS_ENABLED(CONFIG_KASAN) && IS_ENABLED(CONFIG_KUNIT))
+	current->kunit_test = test;
+#endif /* IS_ENABLED(CONFIG_KASAN) && IS_ENABLED(CONFIG_KUNIT) */
+
 	/*
 	 * kunit_run_case_internal may encounter a fatal error; if it does,
 	 * abort will be called, this thread will exit, and finally the parent
@@ -602,6 +602,9 @@ void kunit_cleanup(struct kunit *test)
 		spin_unlock(&test->lock);
 		kunit_remove_resource(test, res);
 	}
+#if (IS_ENABLED(CONFIG_KASAN) && IS_ENABLED(CONFIG_KUNIT))
+	current->kunit_test = NULL;
+#endif /* IS_ENABLED(CONFIG_KASAN) && IS_ENABLED(CONFIG_KUNIT)*/
 }
 EXPORT_SYMBOL_GPL(kunit_cleanup);
 
diff --git a/lib/test_kasan.c b/lib/test_kasan.c
index 53e953bb1d1d..58bffadd8367 100644
--- a/lib/test_kasan.c
+++ b/lib/test_kasan.c
@@ -23,6 +23,8 @@
 
 #include <asm/page.h>
 
+#include <kunit/test.h>
+
 #include "../mm/kasan/kasan.h"
 
 #define OOB_TAG_OFF (IS_ENABLED(CONFIG_KASAN_GENERIC) ? 0 : KASAN_SHADOW_SCALE_SIZE)
@@ -32,14 +34,55 @@
  * are not eliminated as dead code.
  */
 
-int kasan_int_result;
 void *kasan_ptr_result;
+int kasan_int_result;
+
+static struct kunit_resource resource;
+static struct kunit_kasan_expectation fail_data;
+static bool multishot;
+
+static int kasan_test_init(struct kunit *test)
+{
+	/*
+	 * Temporarily enable multi-shot mode and set panic_on_warn=0.
+	 * Otherwise, we'd only get a report for the first case.
+	 */
+	multishot = kasan_save_enable_multi_shot();
+
+	return 0;
+}
+
+static void kasan_test_exit(struct kunit *test)
+{
+	kasan_restore_multi_shot(multishot);
+}
+
+/**
+ * KUNIT_EXPECT_KASAN_FAIL() - Causes a test failure when the expression does
+ * not cause a KASAN error. This uses a KUnit resource named "kasan_data." Do
+ * Do not use this name for a KUnit resource outside here.
+ *
+ */
+#define KUNIT_EXPECT_KASAN_FAIL(test, condition) do { \
+	fail_data.report_expected = true; \
+	fail_data.report_found = false; \
+	kunit_add_named_resource(test, \
+				NULL, \
+				NULL, \
+				&resource, \
+				"kasan_data", &fail_data); \
+	condition; \
+	KUNIT_EXPECT_EQ(test, \
+			fail_data.report_expected, \
+			fail_data.report_found); \
+} while (0)
+
+
 
 /*
  * Note: test functions are marked noinline so that their names appear in
  * reports.
  */
-
 static noinline void __init kmalloc_oob_right(void)
 {
 	char *ptr;
diff --git a/mm/kasan/report.c b/mm/kasan/report.c
index 4f49fa6cd1aa..e2c14b10bc81 100644
--- a/mm/kasan/report.c
+++ b/mm/kasan/report.c
@@ -33,6 +33,8 @@
 
 #include <asm/sections.h>
 
+#include <kunit/test.h>
+
 #include "kasan.h"
 #include "../slab.h"
 
@@ -464,12 +466,37 @@ static bool report_enabled(void)
 	return !test_and_set_bit(KASAN_BIT_REPORTED, &kasan_flags);
 }
 
+#if IS_ENABLED(CONFIG_KUNIT)
+static void kasan_update_kunit_status(struct kunit *cur_test)
+{
+	struct kunit_resource *resource;
+	struct kunit_kasan_expectation *kasan_data;
+
+	resource = kunit_find_named_resource(cur_test, "kasan_data");
+
+	if (!resource) {
+		kunit_set_failure(cur_test);
+		return;
+	}
+
+	kasan_data = (struct kunit_kasan_expectation *)resource->data;
+	kasan_data->report_found = true;
+	kunit_put_resource(resource);
+}
+#endif /* IS_ENABLED(CONFIG_KUNIT) */
+
 void kasan_report_invalid_free(void *object, unsigned long ip)
 {
 	unsigned long flags;
 	u8 tag = get_tag(object);
 
 	object = reset_tag(object);
+
+#if IS_ENABLED(CONFIG_KUNIT)
+	if (current->kunit_test)
+		kasan_update_kunit_status(current->kunit_test);
+#endif /* IS_ENABLED(CONFIG_KUNIT) */
+
 	start_report(&flags);
 	pr_err("BUG: KASAN: double-free or invalid-free in %pS\n", (void *)ip);
 	print_tags(tag, object);
@@ -488,6 +515,11 @@ static void __kasan_report(unsigned long addr, size_t size, bool is_write,
 	void *untagged_addr;
 	unsigned long flags;
 
+#if IS_ENABLED(CONFIG_KUNIT)
+	if (current->kunit_test)
+		kasan_update_kunit_status(current->kunit_test);
+#endif /* IS_ENABLED(CONFIG_KUNIT) */
+
 	disable_trace_on_warning();
 
 	tagged_addr = (void *)addr;
-- 
cgit v1.2.3


From 73228c7ecc5e40c0851c4703c5ec6ed38123e989 Mon Sep 17 00:00:00 2001
From: Patricia Alfonso <trishalfonso@google.com>
Date: Tue, 13 Oct 2020 16:55:06 -0700
Subject: KASAN: port KASAN Tests to KUnit

Transfer all previous tests for KASAN to KUnit so they can be run more
easily.  Using kunit_tool, developers can run these tests with their other
KUnit tests and see "pass" or "fail" with the appropriate KASAN report
instead of needing to parse each KASAN report to test KASAN
functionalities.  All KASAN reports are still printed to dmesg.

Stack tests do not work properly when KASAN_STACK is enabled so those
tests use a check for "if IS_ENABLED(CONFIG_KASAN_STACK)" so they only run
if stack instrumentation is enabled.  If KASAN_STACK is not enabled, KUnit
will print a statement to let the user know this test was not run with
KASAN_STACK enabled.

copy_user_test and kasan_rcu_uaf cannot be run in KUnit so there is a
separate test file for those tests, which can be run as before as a
module.

[trishalfonso@google.com: v14]
  Link: https://lkml.kernel.org/r/20200915035828.570483-4-davidgow@google.com

Signed-off-by: Patricia Alfonso <trishalfonso@google.com>
Signed-off-by: David Gow <davidgow@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Tested-by: Andrey Konovalov <andreyknvl@google.com>
Reviewed-by: Brendan Higgins <brendanhiggins@google.com>
Reviewed-by: Andrey Konovalov <andreyknvl@google.com>
Reviewed-by: Dmitry Vyukov <dvyukov@google.com>
Cc: Andrey Ryabinin <aryabinin@virtuozzo.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Juri Lelli <juri.lelli@redhat.com>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Vincent Guittot <vincent.guittot@linaro.org>
Link: https://lkml.kernel.org/r/20200910070331.3358048-4-davidgow@google.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/Kconfig.kasan       |  22 +-
 lib/Makefile            |   4 +-
 lib/test_kasan.c        | 687 ++++++++++++++++++------------------------------
 lib/test_kasan_module.c | 111 ++++++++
 4 files changed, 386 insertions(+), 438 deletions(-)
 create mode 100644 lib/test_kasan_module.c

(limited to 'lib')

diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan
index 033a5bc67ac4..542a9c18398e 100644
--- a/lib/Kconfig.kasan
+++ b/lib/Kconfig.kasan
@@ -166,12 +166,24 @@ config KASAN_VMALLOC
 	  for KASAN to detect more sorts of errors (and to support vmapped
 	  stacks), but at the cost of higher memory usage.
 
-config TEST_KASAN
-	tristate "Module for testing KASAN for bug detection"
-	depends on m
+config KASAN_KUNIT_TEST
+	tristate "KUnit-compatible tests of KASAN bug detection capabilities" if !KUNIT_ALL_TESTS
+	depends on KASAN && KUNIT
+	default KUNIT_ALL_TESTS
 	help
-	  This is a test module doing various nasty things like
-	  out of bounds accesses, use after free. It is useful for testing
+	  This is a KUnit test suite doing various nasty things like
+	  out of bounds and use after free accesses. It is useful for testing
 	  kernel debugging features like KASAN.
 
+	  For more information on KUnit and unit tests in general, please refer
+	  to the KUnit documentation in Documentation/dev-tools/kunit
+
+config TEST_KASAN_MODULE
+	tristate "KUnit-incompatible tests of KASAN bug detection capabilities"
+	depends on m && KASAN
+	help
+	  This is a part of the KASAN test suite that is incompatible with
+	  KUnit. Currently includes tests that do bad copy_from/to_user
+	  accesses.
+
 endif # KASAN
diff --git a/lib/Makefile b/lib/Makefile
index a4a4c6864f51..d4af75136c54 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -65,9 +65,11 @@ CFLAGS_test_bitops.o += -Werror
 obj-$(CONFIG_TEST_SYSCTL) += test_sysctl.o
 obj-$(CONFIG_TEST_HASH) += test_hash.o test_siphash.o
 obj-$(CONFIG_TEST_IDA) += test_ida.o
-obj-$(CONFIG_TEST_KASAN) += test_kasan.o
+obj-$(CONFIG_KASAN_KUNIT_TEST) += test_kasan.o
 CFLAGS_test_kasan.o += -fno-builtin
 CFLAGS_test_kasan.o += $(call cc-disable-warning, vla)
+obj-$(CONFIG_TEST_KASAN_MODULE) += test_kasan_module.o
+CFLAGS_test_kasan_module.o += -fno-builtin
 obj-$(CONFIG_TEST_UBSAN) += test_ubsan.o
 CFLAGS_test_ubsan.o += $(call cc-disable-warning, vla)
 UBSAN_SANITIZE_test_ubsan.o := y
diff --git a/lib/test_kasan.c b/lib/test_kasan.c
index 58bffadd8367..63c26171a791 100644
--- a/lib/test_kasan.c
+++ b/lib/test_kasan.c
@@ -5,8 +5,6 @@
  * Author: Andrey Ryabinin <a.ryabinin@samsung.com>
  */
 
-#define pr_fmt(fmt) "kasan test: %s " fmt, __func__
-
 #include <linux/bitops.h>
 #include <linux/delay.h>
 #include <linux/kasan.h>
@@ -77,416 +75,327 @@ static void kasan_test_exit(struct kunit *test)
 			fail_data.report_found); \
 } while (0)
 
-
-
-/*
- * Note: test functions are marked noinline so that their names appear in
- * reports.
- */
-static noinline void __init kmalloc_oob_right(void)
+static void kmalloc_oob_right(struct kunit *test)
 {
 	char *ptr;
 	size_t size = 123;
 
-	pr_info("out-of-bounds to right\n");
 	ptr = kmalloc(size, GFP_KERNEL);
-	if (!ptr) {
-		pr_err("Allocation failed\n");
-		return;
-	}
-
-	ptr[size + OOB_TAG_OFF] = 'x';
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
+	KUNIT_EXPECT_KASAN_FAIL(test, ptr[size + OOB_TAG_OFF] = 'x');
 	kfree(ptr);
 }
 
-static noinline void __init kmalloc_oob_left(void)
+static void kmalloc_oob_left(struct kunit *test)
 {
 	char *ptr;
 	size_t size = 15;
 
-	pr_info("out-of-bounds to left\n");
 	ptr = kmalloc(size, GFP_KERNEL);
-	if (!ptr) {
-		pr_err("Allocation failed\n");
-		return;
-	}
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
-	*ptr = *(ptr - 1);
+	KUNIT_EXPECT_KASAN_FAIL(test, *ptr = *(ptr - 1));
 	kfree(ptr);
 }
 
-static noinline void __init kmalloc_node_oob_right(void)
+static void kmalloc_node_oob_right(struct kunit *test)
 {
 	char *ptr;
 	size_t size = 4096;
 
-	pr_info("kmalloc_node(): out-of-bounds to right\n");
 	ptr = kmalloc_node(size, GFP_KERNEL, 0);
-	if (!ptr) {
-		pr_err("Allocation failed\n");
-		return;
-	}
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
-	ptr[size] = 0;
+	KUNIT_EXPECT_KASAN_FAIL(test, ptr[size] = 0);
 	kfree(ptr);
 }
 
-#ifdef CONFIG_SLUB
-static noinline void __init kmalloc_pagealloc_oob_right(void)
+static void kmalloc_pagealloc_oob_right(struct kunit *test)
 {
 	char *ptr;
 	size_t size = KMALLOC_MAX_CACHE_SIZE + 10;
 
+	if (!IS_ENABLED(CONFIG_SLUB)) {
+		kunit_info(test, "CONFIG_SLUB is not enabled.");
+		return;
+	}
+
 	/* Allocate a chunk that does not fit into a SLUB cache to trigger
 	 * the page allocator fallback.
 	 */
-	pr_info("kmalloc pagealloc allocation: out-of-bounds to right\n");
 	ptr = kmalloc(size, GFP_KERNEL);
-	if (!ptr) {
-		pr_err("Allocation failed\n");
-		return;
-	}
-
-	ptr[size + OOB_TAG_OFF] = 0;
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
+	KUNIT_EXPECT_KASAN_FAIL(test, ptr[size + OOB_TAG_OFF] = 0);
 	kfree(ptr);
 }
 
-static noinline void __init kmalloc_pagealloc_uaf(void)
+static void kmalloc_pagealloc_uaf(struct kunit *test)
 {
 	char *ptr;
 	size_t size = KMALLOC_MAX_CACHE_SIZE + 10;
 
-	pr_info("kmalloc pagealloc allocation: use-after-free\n");
-	ptr = kmalloc(size, GFP_KERNEL);
-	if (!ptr) {
-		pr_err("Allocation failed\n");
+	if (!IS_ENABLED(CONFIG_SLUB)) {
+		kunit_info(test, "CONFIG_SLUB is not enabled.");
 		return;
 	}
 
+	ptr = kmalloc(size, GFP_KERNEL);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
+
 	kfree(ptr);
-	ptr[0] = 0;
+	KUNIT_EXPECT_KASAN_FAIL(test, ptr[0] = 0);
 }
 
-static noinline void __init kmalloc_pagealloc_invalid_free(void)
+static void kmalloc_pagealloc_invalid_free(struct kunit *test)
 {
 	char *ptr;
 	size_t size = KMALLOC_MAX_CACHE_SIZE + 10;
 
-	pr_info("kmalloc pagealloc allocation: invalid-free\n");
-	ptr = kmalloc(size, GFP_KERNEL);
-	if (!ptr) {
-		pr_err("Allocation failed\n");
+	if (!IS_ENABLED(CONFIG_SLUB)) {
+		kunit_info(test, "CONFIG_SLUB is not enabled.");
 		return;
 	}
 
-	kfree(ptr + 1);
+	ptr = kmalloc(size, GFP_KERNEL);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
+
+	KUNIT_EXPECT_KASAN_FAIL(test, kfree(ptr + 1));
 }
-#endif
 
-static noinline void __init kmalloc_large_oob_right(void)
+static void kmalloc_large_oob_right(struct kunit *test)
 {
 	char *ptr;
 	size_t size = KMALLOC_MAX_CACHE_SIZE - 256;
 	/* Allocate a chunk that is large enough, but still fits into a slab
 	 * and does not trigger the page allocator fallback in SLUB.
 	 */
-	pr_info("kmalloc large allocation: out-of-bounds to right\n");
 	ptr = kmalloc(size, GFP_KERNEL);
-	if (!ptr) {
-		pr_err("Allocation failed\n");
-		return;
-	}
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
-	ptr[size] = 0;
+	KUNIT_EXPECT_KASAN_FAIL(test, ptr[size] = 0);
 	kfree(ptr);
 }
 
-static noinline void __init kmalloc_oob_krealloc_more(void)
+static void kmalloc_oob_krealloc_more(struct kunit *test)
 {
 	char *ptr1, *ptr2;
 	size_t size1 = 17;
 	size_t size2 = 19;
 
-	pr_info("out-of-bounds after krealloc more\n");
 	ptr1 = kmalloc(size1, GFP_KERNEL);
-	ptr2 = krealloc(ptr1, size2, GFP_KERNEL);
-	if (!ptr1 || !ptr2) {
-		pr_err("Allocation failed\n");
-		kfree(ptr1);
-		kfree(ptr2);
-		return;
-	}
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr1);
 
-	ptr2[size2 + OOB_TAG_OFF] = 'x';
+	ptr2 = krealloc(ptr1, size2, GFP_KERNEL);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr2);
 
+	KUNIT_EXPECT_KASAN_FAIL(test, ptr2[size2 + OOB_TAG_OFF] = 'x');
 	kfree(ptr2);
 }
 
-static noinline void __init kmalloc_oob_krealloc_less(void)
+static void kmalloc_oob_krealloc_less(struct kunit *test)
 {
 	char *ptr1, *ptr2;
 	size_t size1 = 17;
 	size_t size2 = 15;
 
-	pr_info("out-of-bounds after krealloc less\n");
 	ptr1 = kmalloc(size1, GFP_KERNEL);
-	ptr2 = krealloc(ptr1, size2, GFP_KERNEL);
-	if (!ptr1 || !ptr2) {
-		pr_err("Allocation failed\n");
-		kfree(ptr1);
-		return;
-	}
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr1);
 
-	ptr2[size2 + OOB_TAG_OFF] = 'x';
+	ptr2 = krealloc(ptr1, size2, GFP_KERNEL);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr2);
 
+	KUNIT_EXPECT_KASAN_FAIL(test, ptr2[size2 + OOB_TAG_OFF] = 'x');
 	kfree(ptr2);
 }
 
-static noinline void __init kmalloc_oob_16(void)
+static void kmalloc_oob_16(struct kunit *test)
 {
 	struct {
 		u64 words[2];
 	} *ptr1, *ptr2;
 
-	pr_info("kmalloc out-of-bounds for 16-bytes access\n");
 	ptr1 = kmalloc(sizeof(*ptr1) - 3, GFP_KERNEL);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr1);
+
 	ptr2 = kmalloc(sizeof(*ptr2), GFP_KERNEL);
-	if (!ptr1 || !ptr2) {
-		pr_err("Allocation failed\n");
-		kfree(ptr1);
-		kfree(ptr2);
-		return;
-	}
-	*ptr1 = *ptr2;
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr2);
+
+	KUNIT_EXPECT_KASAN_FAIL(test, *ptr1 = *ptr2);
 	kfree(ptr1);
 	kfree(ptr2);
 }
 
-static noinline void __init kmalloc_oob_memset_2(void)
+static void kmalloc_oob_memset_2(struct kunit *test)
 {
 	char *ptr;
 	size_t size = 8;
 
-	pr_info("out-of-bounds in memset2\n");
 	ptr = kmalloc(size, GFP_KERNEL);
-	if (!ptr) {
-		pr_err("Allocation failed\n");
-		return;
-	}
-
-	memset(ptr + 7 + OOB_TAG_OFF, 0, 2);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
+	KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr + 7 + OOB_TAG_OFF, 0, 2));
 	kfree(ptr);
 }
 
-static noinline void __init kmalloc_oob_memset_4(void)
+static void kmalloc_oob_memset_4(struct kunit *test)
 {
 	char *ptr;
 	size_t size = 8;
 
-	pr_info("out-of-bounds in memset4\n");
 	ptr = kmalloc(size, GFP_KERNEL);
-	if (!ptr) {
-		pr_err("Allocation failed\n");
-		return;
-	}
-
-	memset(ptr + 5 + OOB_TAG_OFF, 0, 4);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
+	KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr + 5 + OOB_TAG_OFF, 0, 4));
 	kfree(ptr);
 }
 
 
-static noinline void __init kmalloc_oob_memset_8(void)
+static void kmalloc_oob_memset_8(struct kunit *test)
 {
 	char *ptr;
 	size_t size = 8;
 
-	pr_info("out-of-bounds in memset8\n");
 	ptr = kmalloc(size, GFP_KERNEL);
-	if (!ptr) {
-		pr_err("Allocation failed\n");
-		return;
-	}
-
-	memset(ptr + 1 + OOB_TAG_OFF, 0, 8);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
+	KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr + 1 + OOB_TAG_OFF, 0, 8));
 	kfree(ptr);
 }
 
-static noinline void __init kmalloc_oob_memset_16(void)
+static void kmalloc_oob_memset_16(struct kunit *test)
 {
 	char *ptr;
 	size_t size = 16;
 
-	pr_info("out-of-bounds in memset16\n");
 	ptr = kmalloc(size, GFP_KERNEL);
-	if (!ptr) {
-		pr_err("Allocation failed\n");
-		return;
-	}
-
-	memset(ptr + 1 + OOB_TAG_OFF, 0, 16);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
+	KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr + 1 + OOB_TAG_OFF, 0, 16));
 	kfree(ptr);
 }
 
-static noinline void __init kmalloc_oob_in_memset(void)
+static void kmalloc_oob_in_memset(struct kunit *test)
 {
 	char *ptr;
 	size_t size = 666;
 
-	pr_info("out-of-bounds in memset\n");
 	ptr = kmalloc(size, GFP_KERNEL);
-	if (!ptr) {
-		pr_err("Allocation failed\n");
-		return;
-	}
-
-	memset(ptr, 0, size + 5 + OOB_TAG_OFF);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
+	KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr, 0, size + 5 + OOB_TAG_OFF));
 	kfree(ptr);
 }
 
-static noinline void __init kmalloc_memmove_invalid_size(void)
+static void kmalloc_memmove_invalid_size(struct kunit *test)
 {
 	char *ptr;
 	size_t size = 64;
 	volatile size_t invalid_size = -2;
 
-	pr_info("invalid size in memmove\n");
 	ptr = kmalloc(size, GFP_KERNEL);
-	if (!ptr) {
-		pr_err("Allocation failed\n");
-		return;
-	}
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
 	memset((char *)ptr, 0, 64);
-	memmove((char *)ptr, (char *)ptr + 4, invalid_size);
+
+	KUNIT_EXPECT_KASAN_FAIL(test,
+		memmove((char *)ptr, (char *)ptr + 4, invalid_size));
 	kfree(ptr);
 }
 
-static noinline void __init kmalloc_uaf(void)
+static void kmalloc_uaf(struct kunit *test)
 {
 	char *ptr;
 	size_t size = 10;
 
-	pr_info("use-after-free\n");
 	ptr = kmalloc(size, GFP_KERNEL);
-	if (!ptr) {
-		pr_err("Allocation failed\n");
-		return;
-	}
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
 	kfree(ptr);
-	*(ptr + 8) = 'x';
+	KUNIT_EXPECT_KASAN_FAIL(test, *(ptr + 8) = 'x');
 }
 
-static noinline void __init kmalloc_uaf_memset(void)
+static void kmalloc_uaf_memset(struct kunit *test)
 {
 	char *ptr;
 	size_t size = 33;
 
-	pr_info("use-after-free in memset\n");
 	ptr = kmalloc(size, GFP_KERNEL);
-	if (!ptr) {
-		pr_err("Allocation failed\n");
-		return;
-	}
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
 	kfree(ptr);
-	memset(ptr, 0, size);
+	KUNIT_EXPECT_KASAN_FAIL(test, memset(ptr, 0, size));
 }
 
-static noinline void __init kmalloc_uaf2(void)
+static void kmalloc_uaf2(struct kunit *test)
 {
 	char *ptr1, *ptr2;
 	size_t size = 43;
 
-	pr_info("use-after-free after another kmalloc\n");
 	ptr1 = kmalloc(size, GFP_KERNEL);
-	if (!ptr1) {
-		pr_err("Allocation failed\n");
-		return;
-	}
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr1);
 
 	kfree(ptr1);
+
 	ptr2 = kmalloc(size, GFP_KERNEL);
-	if (!ptr2) {
-		pr_err("Allocation failed\n");
-		return;
-	}
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr2);
+
+	KUNIT_EXPECT_KASAN_FAIL(test, ptr1[40] = 'x');
+	KUNIT_EXPECT_PTR_NE(test, ptr1, ptr2);
 
-	ptr1[40] = 'x';
-	if (ptr1 == ptr2)
-		pr_err("Could not detect use-after-free: ptr1 == ptr2\n");
 	kfree(ptr2);
 }
 
-static noinline void __init kfree_via_page(void)
+static void kfree_via_page(struct kunit *test)
 {
 	char *ptr;
 	size_t size = 8;
 	struct page *page;
 	unsigned long offset;
 
-	pr_info("invalid-free false positive (via page)\n");
 	ptr = kmalloc(size, GFP_KERNEL);
-	if (!ptr) {
-		pr_err("Allocation failed\n");
-		return;
-	}
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
 	page = virt_to_page(ptr);
 	offset = offset_in_page(ptr);
 	kfree(page_address(page) + offset);
 }
 
-static noinline void __init kfree_via_phys(void)
+static void kfree_via_phys(struct kunit *test)
 {
 	char *ptr;
 	size_t size = 8;
 	phys_addr_t phys;
 
-	pr_info("invalid-free false positive (via phys)\n");
 	ptr = kmalloc(size, GFP_KERNEL);
-	if (!ptr) {
-		pr_err("Allocation failed\n");
-		return;
-	}
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
 	phys = virt_to_phys(ptr);
 	kfree(phys_to_virt(phys));
 }
 
-static noinline void __init kmem_cache_oob(void)
+static void kmem_cache_oob(struct kunit *test)
 {
 	char *p;
 	size_t size = 200;
 	struct kmem_cache *cache = kmem_cache_create("test_cache",
 						size, 0,
 						0, NULL);
-	if (!cache) {
-		pr_err("Cache allocation failed\n");
-		return;
-	}
-	pr_info("out-of-bounds in kmem_cache_alloc\n");
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cache);
 	p = kmem_cache_alloc(cache, GFP_KERNEL);
 	if (!p) {
-		pr_err("Allocation failed\n");
+		kunit_err(test, "Allocation failed: %s\n", __func__);
 		kmem_cache_destroy(cache);
 		return;
 	}
 
-	*p = p[size + OOB_TAG_OFF];
-
+	KUNIT_EXPECT_KASAN_FAIL(test, *p = p[size + OOB_TAG_OFF]);
 	kmem_cache_free(cache, p);
 	kmem_cache_destroy(cache);
 }
 
-static noinline void __init memcg_accounted_kmem_cache(void)
+static void memcg_accounted_kmem_cache(struct kunit *test)
 {
 	int i;
 	char *p;
@@ -494,12 +403,8 @@ static noinline void __init memcg_accounted_kmem_cache(void)
 	struct kmem_cache *cache;
 
 	cache = kmem_cache_create("test_cache", size, 0, SLAB_ACCOUNT, NULL);
-	if (!cache) {
-		pr_err("Cache allocation failed\n");
-		return;
-	}
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cache);
 
-	pr_info("allocate memcg accounted object\n");
 	/*
 	 * Several allocations with a delay to allow for lazy per memcg kmem
 	 * cache creation.
@@ -519,134 +424,93 @@ free_cache:
 
 static char global_array[10];
 
-static noinline void __init kasan_global_oob(void)
+static void kasan_global_oob(struct kunit *test)
 {
 	volatile int i = 3;
 	char *p = &global_array[ARRAY_SIZE(global_array) + i];
 
-	pr_info("out-of-bounds global variable\n");
-	*(volatile char *)p;
-}
-
-static noinline void __init kasan_stack_oob(void)
-{
-	char stack_array[10];
-	volatile int i = OOB_TAG_OFF;
-	char *p = &stack_array[ARRAY_SIZE(stack_array) + i];
-
-	pr_info("out-of-bounds on stack\n");
-	*(volatile char *)p;
+	KUNIT_EXPECT_KASAN_FAIL(test, *(volatile char *)p);
 }
 
-static noinline void __init ksize_unpoisons_memory(void)
+static void ksize_unpoisons_memory(struct kunit *test)
 {
 	char *ptr;
 	size_t size = 123, real_size;
 
-	pr_info("ksize() unpoisons the whole allocated chunk\n");
 	ptr = kmalloc(size, GFP_KERNEL);
-	if (!ptr) {
-		pr_err("Allocation failed\n");
-		return;
-	}
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 	real_size = ksize(ptr);
 	/* This access doesn't trigger an error. */
 	ptr[size] = 'x';
 	/* This one does. */
-	ptr[real_size] = 'y';
+	KUNIT_EXPECT_KASAN_FAIL(test, ptr[real_size] = 'y');
 	kfree(ptr);
 }
 
-static noinline void __init copy_user_test(void)
+static void kasan_stack_oob(struct kunit *test)
 {
-	char *kmem;
-	char __user *usermem;
-	size_t size = 10;
-	int unused;
-
-	kmem = kmalloc(size, GFP_KERNEL);
-	if (!kmem)
-		return;
+	char stack_array[10];
+	volatile int i = OOB_TAG_OFF;
+	char *p = &stack_array[ARRAY_SIZE(stack_array) + i];
 
-	usermem = (char __user *)vm_mmap(NULL, 0, PAGE_SIZE,
-			    PROT_READ | PROT_WRITE | PROT_EXEC,
-			    MAP_ANONYMOUS | MAP_PRIVATE, 0);
-	if (IS_ERR(usermem)) {
-		pr_err("Failed to allocate user memory\n");
-		kfree(kmem);
+	if (!IS_ENABLED(CONFIG_KASAN_STACK)) {
+		kunit_info(test, "CONFIG_KASAN_STACK is not enabled");
 		return;
 	}
 
-	pr_info("out-of-bounds in copy_from_user()\n");
-	unused = copy_from_user(kmem, usermem, size + 1 + OOB_TAG_OFF);
-
-	pr_info("out-of-bounds in copy_to_user()\n");
-	unused = copy_to_user(usermem, kmem, size + 1 + OOB_TAG_OFF);
-
-	pr_info("out-of-bounds in __copy_from_user()\n");
-	unused = __copy_from_user(kmem, usermem, size + 1 + OOB_TAG_OFF);
-
-	pr_info("out-of-bounds in __copy_to_user()\n");
-	unused = __copy_to_user(usermem, kmem, size + 1 + OOB_TAG_OFF);
-
-	pr_info("out-of-bounds in __copy_from_user_inatomic()\n");
-	unused = __copy_from_user_inatomic(kmem, usermem, size + 1 + OOB_TAG_OFF);
-
-	pr_info("out-of-bounds in __copy_to_user_inatomic()\n");
-	unused = __copy_to_user_inatomic(usermem, kmem, size + 1 + OOB_TAG_OFF);
-
-	pr_info("out-of-bounds in strncpy_from_user()\n");
-	unused = strncpy_from_user(kmem, usermem, size + 1 + OOB_TAG_OFF);
-
-	vm_munmap((unsigned long)usermem, PAGE_SIZE);
-	kfree(kmem);
+	KUNIT_EXPECT_KASAN_FAIL(test, *(volatile char *)p);
 }
 
-static noinline void __init kasan_alloca_oob_left(void)
+static void kasan_alloca_oob_left(struct kunit *test)
 {
 	volatile int i = 10;
 	char alloca_array[i];
 	char *p = alloca_array - 1;
 
-	pr_info("out-of-bounds to left on alloca\n");
-	*(volatile char *)p;
+	if (!IS_ENABLED(CONFIG_KASAN_STACK)) {
+		kunit_info(test, "CONFIG_KASAN_STACK is not enabled");
+		return;
+	}
+
+	KUNIT_EXPECT_KASAN_FAIL(test, *(volatile char *)p);
 }
 
-static noinline void __init kasan_alloca_oob_right(void)
+static void kasan_alloca_oob_right(struct kunit *test)
 {
 	volatile int i = 10;
 	char alloca_array[i];
 	char *p = alloca_array + i;
 
-	pr_info("out-of-bounds to right on alloca\n");
-	*(volatile char *)p;
+	if (!IS_ENABLED(CONFIG_KASAN_STACK)) {
+		kunit_info(test, "CONFIG_KASAN_STACK is not enabled");
+		return;
+	}
+
+	KUNIT_EXPECT_KASAN_FAIL(test, *(volatile char *)p);
 }
 
-static noinline void __init kmem_cache_double_free(void)
+static void kmem_cache_double_free(struct kunit *test)
 {
 	char *p;
 	size_t size = 200;
 	struct kmem_cache *cache;
 
 	cache = kmem_cache_create("test_cache", size, 0, 0, NULL);
-	if (!cache) {
-		pr_err("Cache allocation failed\n");
-		return;
-	}
-	pr_info("double-free on heap object\n");
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cache);
+
 	p = kmem_cache_alloc(cache, GFP_KERNEL);
 	if (!p) {
-		pr_err("Allocation failed\n");
+		kunit_err(test, "Allocation failed: %s\n", __func__);
 		kmem_cache_destroy(cache);
 		return;
 	}
 
 	kmem_cache_free(cache, p);
-	kmem_cache_free(cache, p);
+	KUNIT_EXPECT_KASAN_FAIL(test, kmem_cache_free(cache, p));
 	kmem_cache_destroy(cache);
 }
 
-static noinline void __init kmem_cache_invalid_free(void)
+static void kmem_cache_invalid_free(struct kunit *test)
 {
 	char *p;
 	size_t size = 200;
@@ -654,20 +518,17 @@ static noinline void __init kmem_cache_invalid_free(void)
 
 	cache = kmem_cache_create("test_cache", size, 0, SLAB_TYPESAFE_BY_RCU,
 				  NULL);
-	if (!cache) {
-		pr_err("Cache allocation failed\n");
-		return;
-	}
-	pr_info("invalid-free of heap object\n");
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, cache);
+
 	p = kmem_cache_alloc(cache, GFP_KERNEL);
 	if (!p) {
-		pr_err("Allocation failed\n");
+		kunit_err(test, "Allocation failed: %s\n", __func__);
 		kmem_cache_destroy(cache);
 		return;
 	}
 
 	/* Trigger invalid free, the object doesn't get freed */
-	kmem_cache_free(cache, p + 1);
+	KUNIT_EXPECT_KASAN_FAIL(test, kmem_cache_free(cache, p + 1));
 
 	/*
 	 * Properly free the object to prevent the "Objects remaining in
@@ -678,45 +539,63 @@ static noinline void __init kmem_cache_invalid_free(void)
 	kmem_cache_destroy(cache);
 }
 
-static noinline void __init kasan_memchr(void)
+static void kasan_memchr(struct kunit *test)
 {
 	char *ptr;
 	size_t size = 24;
 
-	pr_info("out-of-bounds in memchr\n");
-	ptr = kmalloc(size, GFP_KERNEL | __GFP_ZERO);
-	if (!ptr)
+	/* See https://bugzilla.kernel.org/show_bug.cgi?id=206337 */
+	if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) {
+		kunit_info(test,
+			"str* functions are not instrumented with CONFIG_AMD_MEM_ENCRYPT");
 		return;
+	}
+
+	ptr = kmalloc(size, GFP_KERNEL | __GFP_ZERO);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
+
+	KUNIT_EXPECT_KASAN_FAIL(test,
+		kasan_ptr_result = memchr(ptr, '1', size + 1));
 
-	kasan_ptr_result = memchr(ptr, '1', size + 1);
 	kfree(ptr);
 }
 
-static noinline void __init kasan_memcmp(void)
+static void kasan_memcmp(struct kunit *test)
 {
 	char *ptr;
 	size_t size = 24;
 	int arr[9];
 
-	pr_info("out-of-bounds in memcmp\n");
-	ptr = kmalloc(size, GFP_KERNEL | __GFP_ZERO);
-	if (!ptr)
+	/* See https://bugzilla.kernel.org/show_bug.cgi?id=206337 */
+	if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) {
+		kunit_info(test,
+			"str* functions are not instrumented with CONFIG_AMD_MEM_ENCRYPT");
 		return;
+	}
 
+	ptr = kmalloc(size, GFP_KERNEL | __GFP_ZERO);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 	memset(arr, 0, sizeof(arr));
-	kasan_int_result = memcmp(ptr, arr, size + 1);
+
+	KUNIT_EXPECT_KASAN_FAIL(test,
+		kasan_int_result = memcmp(ptr, arr, size+1));
 	kfree(ptr);
 }
 
-static noinline void __init kasan_strings(void)
+static void kasan_strings(struct kunit *test)
 {
 	char *ptr;
 	size_t size = 24;
 
-	pr_info("use-after-free in strchr\n");
-	ptr = kmalloc(size, GFP_KERNEL | __GFP_ZERO);
-	if (!ptr)
+	/* See https://bugzilla.kernel.org/show_bug.cgi?id=206337 */
+	if (IS_ENABLED(CONFIG_AMD_MEM_ENCRYPT)) {
+		kunit_info(test,
+			"str* functions are not instrumented with CONFIG_AMD_MEM_ENCRYPT");
 		return;
+	}
+
+	ptr = kmalloc(size, GFP_KERNEL | __GFP_ZERO);
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
 	kfree(ptr);
 
@@ -727,220 +606,164 @@ static noinline void __init kasan_strings(void)
 	 * will likely point to zeroed byte.
 	 */
 	ptr += 16;
-	kasan_ptr_result = strchr(ptr, '1');
+	KUNIT_EXPECT_KASAN_FAIL(test, kasan_ptr_result = strchr(ptr, '1'));
 
-	pr_info("use-after-free in strrchr\n");
-	kasan_ptr_result = strrchr(ptr, '1');
+	KUNIT_EXPECT_KASAN_FAIL(test, kasan_ptr_result = strrchr(ptr, '1'));
 
-	pr_info("use-after-free in strcmp\n");
-	kasan_int_result = strcmp(ptr, "2");
+	KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = strcmp(ptr, "2"));
 
-	pr_info("use-after-free in strncmp\n");
-	kasan_int_result = strncmp(ptr, "2", 1);
+	KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = strncmp(ptr, "2", 1));
 
-	pr_info("use-after-free in strlen\n");
-	kasan_int_result = strlen(ptr);
+	KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = strlen(ptr));
 
-	pr_info("use-after-free in strnlen\n");
-	kasan_int_result = strnlen(ptr, 1);
+	KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = strnlen(ptr, 1));
 }
 
-static noinline void __init kasan_bitops(void)
+static void kasan_bitops(struct kunit *test)
 {
 	/*
 	 * Allocate 1 more byte, which causes kzalloc to round up to 16-bytes;
 	 * this way we do not actually corrupt other memory.
 	 */
 	long *bits = kzalloc(sizeof(*bits) + 1, GFP_KERNEL);
-	if (!bits)
-		return;
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bits);
 
 	/*
 	 * Below calls try to access bit within allocated memory; however, the
 	 * below accesses are still out-of-bounds, since bitops are defined to
 	 * operate on the whole long the bit is in.
 	 */
-	pr_info("out-of-bounds in set_bit\n");
-	set_bit(BITS_PER_LONG, bits);
+	KUNIT_EXPECT_KASAN_FAIL(test, set_bit(BITS_PER_LONG, bits));
 
-	pr_info("out-of-bounds in __set_bit\n");
-	__set_bit(BITS_PER_LONG, bits);
+	KUNIT_EXPECT_KASAN_FAIL(test, __set_bit(BITS_PER_LONG, bits));
 
-	pr_info("out-of-bounds in clear_bit\n");
-	clear_bit(BITS_PER_LONG, bits);
+	KUNIT_EXPECT_KASAN_FAIL(test, clear_bit(BITS_PER_LONG, bits));
 
-	pr_info("out-of-bounds in __clear_bit\n");
-	__clear_bit(BITS_PER_LONG, bits);
+	KUNIT_EXPECT_KASAN_FAIL(test, __clear_bit(BITS_PER_LONG, bits));
 
-	pr_info("out-of-bounds in clear_bit_unlock\n");
-	clear_bit_unlock(BITS_PER_LONG, bits);
+	KUNIT_EXPECT_KASAN_FAIL(test, clear_bit_unlock(BITS_PER_LONG, bits));
 
-	pr_info("out-of-bounds in __clear_bit_unlock\n");
-	__clear_bit_unlock(BITS_PER_LONG, bits);
+	KUNIT_EXPECT_KASAN_FAIL(test, __clear_bit_unlock(BITS_PER_LONG, bits));
 
-	pr_info("out-of-bounds in change_bit\n");
-	change_bit(BITS_PER_LONG, bits);
+	KUNIT_EXPECT_KASAN_FAIL(test, change_bit(BITS_PER_LONG, bits));
 
-	pr_info("out-of-bounds in __change_bit\n");
-	__change_bit(BITS_PER_LONG, bits);
+	KUNIT_EXPECT_KASAN_FAIL(test, __change_bit(BITS_PER_LONG, bits));
 
 	/*
 	 * Below calls try to access bit beyond allocated memory.
 	 */
-	pr_info("out-of-bounds in test_and_set_bit\n");
-	test_and_set_bit(BITS_PER_LONG + BITS_PER_BYTE, bits);
+	KUNIT_EXPECT_KASAN_FAIL(test,
+		test_and_set_bit(BITS_PER_LONG + BITS_PER_BYTE, bits));
 
-	pr_info("out-of-bounds in __test_and_set_bit\n");
-	__test_and_set_bit(BITS_PER_LONG + BITS_PER_BYTE, bits);
+	KUNIT_EXPECT_KASAN_FAIL(test,
+		__test_and_set_bit(BITS_PER_LONG + BITS_PER_BYTE, bits));
 
-	pr_info("out-of-bounds in test_and_set_bit_lock\n");
-	test_and_set_bit_lock(BITS_PER_LONG + BITS_PER_BYTE, bits);
+	KUNIT_EXPECT_KASAN_FAIL(test,
+		test_and_set_bit_lock(BITS_PER_LONG + BITS_PER_BYTE, bits));
 
-	pr_info("out-of-bounds in test_and_clear_bit\n");
-	test_and_clear_bit(BITS_PER_LONG + BITS_PER_BYTE, bits);
+	KUNIT_EXPECT_KASAN_FAIL(test,
+		test_and_clear_bit(BITS_PER_LONG + BITS_PER_BYTE, bits));
 
-	pr_info("out-of-bounds in __test_and_clear_bit\n");
-	__test_and_clear_bit(BITS_PER_LONG + BITS_PER_BYTE, bits);
+	KUNIT_EXPECT_KASAN_FAIL(test,
+		__test_and_clear_bit(BITS_PER_LONG + BITS_PER_BYTE, bits));
 
-	pr_info("out-of-bounds in test_and_change_bit\n");
-	test_and_change_bit(BITS_PER_LONG + BITS_PER_BYTE, bits);
+	KUNIT_EXPECT_KASAN_FAIL(test,
+		test_and_change_bit(BITS_PER_LONG + BITS_PER_BYTE, bits));
 
-	pr_info("out-of-bounds in __test_and_change_bit\n");
-	__test_and_change_bit(BITS_PER_LONG + BITS_PER_BYTE, bits);
+	KUNIT_EXPECT_KASAN_FAIL(test,
+		__test_and_change_bit(BITS_PER_LONG + BITS_PER_BYTE, bits));
 
-	pr_info("out-of-bounds in test_bit\n");
-	kasan_int_result = test_bit(BITS_PER_LONG + BITS_PER_BYTE, bits);
+	KUNIT_EXPECT_KASAN_FAIL(test,
+		kasan_int_result =
+			test_bit(BITS_PER_LONG + BITS_PER_BYTE, bits));
 
 #if defined(clear_bit_unlock_is_negative_byte)
-	pr_info("out-of-bounds in clear_bit_unlock_is_negative_byte\n");
-	kasan_int_result = clear_bit_unlock_is_negative_byte(BITS_PER_LONG +
-		BITS_PER_BYTE, bits);
+	KUNIT_EXPECT_KASAN_FAIL(test,
+		kasan_int_result = clear_bit_unlock_is_negative_byte(
+			BITS_PER_LONG + BITS_PER_BYTE, bits));
 #endif
 	kfree(bits);
 }
 
-static noinline void __init kmalloc_double_kzfree(void)
+static void kmalloc_double_kzfree(struct kunit *test)
 {
 	char *ptr;
 	size_t size = 16;
 
-	pr_info("double-free (kfree_sensitive)\n");
 	ptr = kmalloc(size, GFP_KERNEL);
-	if (!ptr) {
-		pr_err("Allocation failed\n");
-		return;
-	}
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr);
 
 	kfree_sensitive(ptr);
-	kfree_sensitive(ptr);
+	KUNIT_EXPECT_KASAN_FAIL(test, kfree_sensitive(ptr));
 }
 
-#ifdef CONFIG_KASAN_VMALLOC
-static noinline void __init vmalloc_oob(void)
+static void vmalloc_oob(struct kunit *test)
 {
 	void *area;
 
-	pr_info("vmalloc out-of-bounds\n");
+	if (!IS_ENABLED(CONFIG_KASAN_VMALLOC)) {
+		kunit_info(test, "CONFIG_KASAN_VMALLOC is not enabled.");
+		return;
+	}
 
 	/*
 	 * We have to be careful not to hit the guard page.
 	 * The MMU will catch that and crash us.
 	 */
 	area = vmalloc(3000);
-	if (!area) {
-		pr_err("Allocation failed\n");
-		return;
-	}
+	KUNIT_ASSERT_NOT_ERR_OR_NULL(test, area);
 
-	((volatile char *)area)[3100];
+	KUNIT_EXPECT_KASAN_FAIL(test, ((volatile char *)area)[3100]);
 	vfree(area);
 }
-#else
-static void __init vmalloc_oob(void) {}
-#endif
-
-static struct kasan_rcu_info {
-	int i;
-	struct rcu_head rcu;
-} *global_rcu_ptr;
-
-static noinline void __init kasan_rcu_reclaim(struct rcu_head *rp)
-{
-	struct kasan_rcu_info *fp = container_of(rp,
-						struct kasan_rcu_info, rcu);
-
-	kfree(fp);
-	fp->i = 1;
-}
-
-static noinline void __init kasan_rcu_uaf(void)
-{
-	struct kasan_rcu_info *ptr;
 
-	pr_info("use-after-free in kasan_rcu_reclaim\n");
-	ptr = kmalloc(sizeof(struct kasan_rcu_info), GFP_KERNEL);
-	if (!ptr) {
-		pr_err("Allocation failed\n");
-		return;
-	}
-
-	global_rcu_ptr = rcu_dereference_protected(ptr, NULL);
-	call_rcu(&global_rcu_ptr->rcu, kasan_rcu_reclaim);
-}
-
-static int __init kmalloc_tests_init(void)
-{
-	/*
-	 * Temporarily enable multi-shot mode. Otherwise, we'd only get a
-	 * report for the first case.
-	 */
-	bool multishot = kasan_save_enable_multi_shot();
-
-	kmalloc_oob_right();
-	kmalloc_oob_left();
-	kmalloc_node_oob_right();
-#ifdef CONFIG_SLUB
-	kmalloc_pagealloc_oob_right();
-	kmalloc_pagealloc_uaf();
-	kmalloc_pagealloc_invalid_free();
-#endif
-	kmalloc_large_oob_right();
-	kmalloc_oob_krealloc_more();
-	kmalloc_oob_krealloc_less();
-	kmalloc_oob_16();
-	kmalloc_oob_in_memset();
-	kmalloc_oob_memset_2();
-	kmalloc_oob_memset_4();
-	kmalloc_oob_memset_8();
-	kmalloc_oob_memset_16();
-	kmalloc_memmove_invalid_size();
-	kmalloc_uaf();
-	kmalloc_uaf_memset();
-	kmalloc_uaf2();
-	kfree_via_page();
-	kfree_via_phys();
-	kmem_cache_oob();
-	memcg_accounted_kmem_cache();
-	kasan_stack_oob();
-	kasan_global_oob();
-	kasan_alloca_oob_left();
-	kasan_alloca_oob_right();
-	ksize_unpoisons_memory();
-	copy_user_test();
-	kmem_cache_double_free();
-	kmem_cache_invalid_free();
-	kasan_memchr();
-	kasan_memcmp();
-	kasan_strings();
-	kasan_bitops();
-	kmalloc_double_kzfree();
-	vmalloc_oob();
-	kasan_rcu_uaf();
-
-	kasan_restore_multi_shot(multishot);
-
-	return -EAGAIN;
-}
+static struct kunit_case kasan_kunit_test_cases[] = {
+	KUNIT_CASE(kmalloc_oob_right),
+	KUNIT_CASE(kmalloc_oob_left),
+	KUNIT_CASE(kmalloc_node_oob_right),
+	KUNIT_CASE(kmalloc_pagealloc_oob_right),
+	KUNIT_CASE(kmalloc_pagealloc_uaf),
+	KUNIT_CASE(kmalloc_pagealloc_invalid_free),
+	KUNIT_CASE(kmalloc_large_oob_right),
+	KUNIT_CASE(kmalloc_oob_krealloc_more),
+	KUNIT_CASE(kmalloc_oob_krealloc_less),
+	KUNIT_CASE(kmalloc_oob_16),
+	KUNIT_CASE(kmalloc_oob_in_memset),
+	KUNIT_CASE(kmalloc_oob_memset_2),
+	KUNIT_CASE(kmalloc_oob_memset_4),
+	KUNIT_CASE(kmalloc_oob_memset_8),
+	KUNIT_CASE(kmalloc_oob_memset_16),
+	KUNIT_CASE(kmalloc_memmove_invalid_size),
+	KUNIT_CASE(kmalloc_uaf),
+	KUNIT_CASE(kmalloc_uaf_memset),
+	KUNIT_CASE(kmalloc_uaf2),
+	KUNIT_CASE(kfree_via_page),
+	KUNIT_CASE(kfree_via_phys),
+	KUNIT_CASE(kmem_cache_oob),
+	KUNIT_CASE(memcg_accounted_kmem_cache),
+	KUNIT_CASE(kasan_global_oob),
+	KUNIT_CASE(kasan_stack_oob),
+	KUNIT_CASE(kasan_alloca_oob_left),
+	KUNIT_CASE(kasan_alloca_oob_right),
+	KUNIT_CASE(ksize_unpoisons_memory),
+	KUNIT_CASE(kmem_cache_double_free),
+	KUNIT_CASE(kmem_cache_invalid_free),
+	KUNIT_CASE(kasan_memchr),
+	KUNIT_CASE(kasan_memcmp),
+	KUNIT_CASE(kasan_strings),
+	KUNIT_CASE(kasan_bitops),
+	KUNIT_CASE(kmalloc_double_kzfree),
+	KUNIT_CASE(vmalloc_oob),
+	{}
+};
+
+static struct kunit_suite kasan_kunit_test_suite = {
+	.name = "kasan",
+	.init = kasan_test_init,
+	.test_cases = kasan_kunit_test_cases,
+	.exit = kasan_test_exit,
+};
+
+kunit_test_suite(kasan_kunit_test_suite);
 
-module_init(kmalloc_tests_init);
 MODULE_LICENSE("GPL");
diff --git a/lib/test_kasan_module.c b/lib/test_kasan_module.c
new file mode 100644
index 000000000000..2d68db6ae67b
--- /dev/null
+++ b/lib/test_kasan_module.c
@@ -0,0 +1,111 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ *
+ * Copyright (c) 2014 Samsung Electronics Co., Ltd.
+ * Author: Andrey Ryabinin <a.ryabinin@samsung.com>
+ */
+
+#define pr_fmt(fmt) "kasan test: %s " fmt, __func__
+
+#include <linux/mman.h>
+#include <linux/module.h>
+#include <linux/printk.h>
+#include <linux/slab.h>
+#include <linux/uaccess.h>
+
+#include "../mm/kasan/kasan.h"
+
+#define OOB_TAG_OFF (IS_ENABLED(CONFIG_KASAN_GENERIC) ? 0 : KASAN_SHADOW_SCALE_SIZE)
+
+static noinline void __init copy_user_test(void)
+{
+	char *kmem;
+	char __user *usermem;
+	size_t size = 10;
+	int unused;
+
+	kmem = kmalloc(size, GFP_KERNEL);
+	if (!kmem)
+		return;
+
+	usermem = (char __user *)vm_mmap(NULL, 0, PAGE_SIZE,
+			    PROT_READ | PROT_WRITE | PROT_EXEC,
+			    MAP_ANONYMOUS | MAP_PRIVATE, 0);
+	if (IS_ERR(usermem)) {
+		pr_err("Failed to allocate user memory\n");
+		kfree(kmem);
+		return;
+	}
+
+	pr_info("out-of-bounds in copy_from_user()\n");
+	unused = copy_from_user(kmem, usermem, size + 1 + OOB_TAG_OFF);
+
+	pr_info("out-of-bounds in copy_to_user()\n");
+	unused = copy_to_user(usermem, kmem, size + 1 + OOB_TAG_OFF);
+
+	pr_info("out-of-bounds in __copy_from_user()\n");
+	unused = __copy_from_user(kmem, usermem, size + 1 + OOB_TAG_OFF);
+
+	pr_info("out-of-bounds in __copy_to_user()\n");
+	unused = __copy_to_user(usermem, kmem, size + 1 + OOB_TAG_OFF);
+
+	pr_info("out-of-bounds in __copy_from_user_inatomic()\n");
+	unused = __copy_from_user_inatomic(kmem, usermem, size + 1 + OOB_TAG_OFF);
+
+	pr_info("out-of-bounds in __copy_to_user_inatomic()\n");
+	unused = __copy_to_user_inatomic(usermem, kmem, size + 1 + OOB_TAG_OFF);
+
+	pr_info("out-of-bounds in strncpy_from_user()\n");
+	unused = strncpy_from_user(kmem, usermem, size + 1 + OOB_TAG_OFF);
+
+	vm_munmap((unsigned long)usermem, PAGE_SIZE);
+	kfree(kmem);
+}
+
+static struct kasan_rcu_info {
+	int i;
+	struct rcu_head rcu;
+} *global_rcu_ptr;
+
+static noinline void __init kasan_rcu_reclaim(struct rcu_head *rp)
+{
+	struct kasan_rcu_info *fp = container_of(rp,
+						struct kasan_rcu_info, rcu);
+
+	kfree(fp);
+	fp->i = 1;
+}
+
+static noinline void __init kasan_rcu_uaf(void)
+{
+	struct kasan_rcu_info *ptr;
+
+	pr_info("use-after-free in kasan_rcu_reclaim\n");
+	ptr = kmalloc(sizeof(struct kasan_rcu_info), GFP_KERNEL);
+	if (!ptr) {
+		pr_err("Allocation failed\n");
+		return;
+	}
+
+	global_rcu_ptr = rcu_dereference_protected(ptr, NULL);
+	call_rcu(&global_rcu_ptr->rcu, kasan_rcu_reclaim);
+}
+
+
+static int __init test_kasan_module_init(void)
+{
+	/*
+	 * Temporarily enable multi-shot mode. Otherwise, we'd only get a
+	 * report for the first case.
+	 */
+	bool multishot = kasan_save_enable_multi_shot();
+
+	copy_user_test();
+	kasan_rcu_uaf();
+
+	kasan_restore_multi_shot(multishot);
+	return -EAGAIN;
+}
+
+module_init(test_kasan_module_init);
+MODULE_LICENSE("GPL");
-- 
cgit v1.2.3


From e320d3012d25b1fb5f3df4edb7bd44a1c362ec10 Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Tue, 13 Oct 2020 16:56:04 -0700
Subject: mm/page_alloc.c: fix freeing non-compound pages

Here is a very rare race which leaks memory:

Page P0 is allocated to the page cache.  Page P1 is free.

Thread A                Thread B                Thread C
find_get_entry():
xas_load() returns P0
						Removes P0 from page cache
						P0 finds its buddy P1
			alloc_pages(GFP_KERNEL, 1) returns P0
			P0 has refcount 1
page_cache_get_speculative(P0)
P0 has refcount 2
			__free_pages(P0)
			P0 has refcount 1
put_page(P0)
P1 is not freed

Fix this by freeing all the pages in __free_pages() that won't be freed
by the call to put_page().  It's usually not a good idea to split a page,
but this is a very unlikely scenario.

Fixes: e286781d5f2e ("mm: speculative page references")
Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Acked-by: Mike Rapoport <rppt@linux.ibm.com>
Cc: Nick Piggin <npiggin@gmail.com>
Cc: Hugh Dickins <hughd@google.com>
Cc: Peter Zijlstra <peterz@infradead.org>
Link: https://lkml.kernel.org/r/20200926213919.26642-1-willy@infradead.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/Kconfig.debug     |  9 +++++++++
 lib/Makefile          |  1 +
 lib/test_free_pages.c | 42 ++++++++++++++++++++++++++++++++++++++++++
 mm/page_alloc.c       |  3 +++
 4 files changed, 55 insertions(+)
 create mode 100644 lib/test_free_pages.c

(limited to 'lib')

diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 0c781f912f9f..491789a793ae 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -2367,6 +2367,15 @@ config TEST_HMM
 
 	  If unsure, say N.
 
+config TEST_FREE_PAGES
+	tristate "Test freeing pages"
+	help
+	  Test that a memory leak does not occur due to a race between
+	  freeing a block of pages and a speculative page reference.
+	  Loading this module is safe if your kernel has the bug fixed.
+	  If the bug is not fixed, it will leak gigabytes of memory and
+	  probably OOM your system.
+
 config TEST_FPU
 	tristate "Test floating point operations in kernel space"
 	depends on X86 && !KCOV_INSTRUMENT_ALL
diff --git a/lib/Makefile b/lib/Makefile
index d4af75136c54..49a2a9e36224 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -101,6 +101,7 @@ obj-$(CONFIG_TEST_BLACKHOLE_DEV) += test_blackhole_dev.o
 obj-$(CONFIG_TEST_MEMINIT) += test_meminit.o
 obj-$(CONFIG_TEST_LOCKUP) += test_lockup.o
 obj-$(CONFIG_TEST_HMM) += test_hmm.o
+obj-$(CONFIG_TEST_FREE_PAGES) += test_free_pages.o
 
 #
 # CFLAGS for compiling floating point code inside the kernel. x86/Makefile turns
diff --git a/lib/test_free_pages.c b/lib/test_free_pages.c
new file mode 100644
index 000000000000..074e76bd76b2
--- /dev/null
+++ b/lib/test_free_pages.c
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0+
+/*
+ * test_free_pages.c: Check that free_pages() doesn't leak memory
+ * Copyright (c) 2020 Oracle
+ * Author: Matthew Wilcox <willy@infradead.org>
+ */
+
+#include <linux/gfp.h>
+#include <linux/mm.h>
+#include <linux/module.h>
+
+static void test_free_pages(gfp_t gfp)
+{
+	unsigned int i;
+
+	for (i = 0; i < 1000 * 1000; i++) {
+		unsigned long addr = __get_free_pages(gfp, 3);
+		struct page *page = virt_to_page(addr);
+
+		/* Simulate page cache getting a speculative reference */
+		get_page(page);
+		free_pages(addr, 3);
+		put_page(page);
+	}
+}
+
+static int m_in(void)
+{
+	test_free_pages(GFP_KERNEL);
+	test_free_pages(GFP_KERNEL | __GFP_COMP);
+
+	return 0;
+}
+
+static void m_ex(void)
+{
+}
+
+module_init(m_in);
+module_exit(m_ex);
+MODULE_AUTHOR("Matthew Wilcox <willy@infradead.org>");
+MODULE_LICENSE("GPL");
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 6263a97e39c6..73e33ab6d249 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -4952,6 +4952,9 @@ void __free_pages(struct page *page, unsigned int order)
 {
 	if (put_page_testzero(page))
 		free_the_page(page, order);
+	else if (!PageHead(page))
+		while (order-- > 0)
+			free_the_page(page + (1 << order), order);
 }
 EXPORT_SYMBOL(__free_pages);
 
-- 
cgit v1.2.3


From b2a182a40278bc5849730e66bca01a762188ed86 Mon Sep 17 00:00:00 2001
From: Douglas Gilbert <dgilbert@interlog.com>
Date: Thu, 15 Oct 2020 14:57:35 -0400
Subject: sgl_alloc_order: fix memory leak

sgl_alloc_order() can fail when 'length' is large on a memory
constrained system. When order > 0 it will potentially be
making several multi-page allocations with the later ones more
likely to fail than the earlier one. So it is important that
sgl_alloc_order() frees up any pages it has obtained before
returning NULL. In the case when order > 0 it calls the wrong
free page function and leaks. In testing the leak was
sufficient to bring down my 8 GiB laptop with OOM.

Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Signed-off-by: Douglas Gilbert <dgilbert@interlog.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 lib/scatterlist.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index 5d63a8857f36..c448642e0f78 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -514,7 +514,7 @@ struct scatterlist *sgl_alloc_order(unsigned long long length,
 		elem_len = min_t(u64, length, PAGE_SIZE << order);
 		page = alloc_pages(gfp, order);
 		if (!page) {
-			sgl_free(sgl);
+			sgl_free_order(sgl, order);
 			return NULL;
 		}
 
-- 
cgit v1.2.3


From 9a40401cfa1356b0d169be8470ed7b2edc33b98f Mon Sep 17 00:00:00 2001
From: Jason Gunthorpe <jgg@nvidia.com>
Date: Fri, 16 Oct 2020 08:46:01 -0300
Subject: lib/scatterlist: Do not limit max_segment to PAGE_ALIGNED values

The main intention of the max_segment argument to
__sg_alloc_table_from_pages() is to match the DMA layer segment size set
by dma_set_max_seg_size().

Restricting the input to be page aligned makes it impossible to just
connect the DMA layer to this API.

The only reason for a page alignment here is because the algorithm will
overshoot the max_segment if it is not a multiple of PAGE_SIZE. Simply fix
the alignment before starting and don't expose this implementation detail
to the callers.

A future patch will completely remove SCATTERLIST_MAX_SEGMENT.

Signed-off-by: Jason Gunthorpe <jgg@nvidia.com>
---
 lib/scatterlist.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

(limited to 'lib')

diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index e102fdfaa75b..ed2497c79a21 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -404,7 +404,7 @@ static struct scatterlist *get_next_sg(struct sg_table *table,
  * @n_pages:	 Number of pages in the pages array
  * @offset:      Offset from start of the first page to the start of a buffer
  * @size:        Number of valid bytes in the buffer (after offset)
- * @max_segment: Maximum size of a scatterlist node in bytes (page aligned)
+ * @max_segment: Maximum size of a scatterlist element in bytes
  * @prv:	 Last populated sge in sgt
  * @left_pages:  Left pages caller have to set after this call
  * @gfp_mask:	 GFP allocation mask
@@ -435,7 +435,12 @@ struct scatterlist *__sg_alloc_table_from_pages(struct sg_table *sgt,
 	unsigned int added_nents = 0;
 	struct scatterlist *s = prv;
 
-	if (WARN_ON(!max_segment || offset_in_page(max_segment)))
+	/*
+	 * The algorithm below requires max_segment to be aligned to PAGE_SIZE
+	 * otherwise it can overshoot.
+	 */
+	max_segment = ALIGN_DOWN(max_segment, PAGE_SIZE);
+	if (WARN_ON(max_segment < PAGE_SIZE))
 		return ERR_PTR(-EINVAL);
 
 	if (IS_ENABLED(CONFIG_ARCH_NO_SG_CHAIN) && prv)
@@ -542,8 +547,7 @@ int sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages,
 			      unsigned long size, gfp_t gfp_mask)
 {
 	return PTR_ERR_OR_ZERO(__sg_alloc_table_from_pages(sgt, pages, n_pages,
-			offset, size, SCATTERLIST_MAX_SEGMENT,
-			NULL, 0, gfp_mask));
+			offset, size, UINT_MAX, NULL, 0, gfp_mask));
 }
 EXPORT_SYMBOL(sg_alloc_table_from_pages);
 
-- 
cgit v1.2.3


From 57417cebc96b57122a2207fc84a6077d20c84b4b Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Thu, 15 Oct 2020 20:05:13 -0700
Subject: XArray: add xa_get_order

Patch series "Fix read-only THP for non-tmpfs filesystems".

As described more verbosely in the [3/3] changelog, we can inadvertently
put an order-0 page in the page cache which occupies 512 consecutive
entries.  Users are running into this if they enable the
READ_ONLY_THP_FOR_FS config option; see
https://bugzilla.kernel.org/show_bug.cgi?id=206569 and Qian Cai has also
reported it here:
https://lore.kernel.org/lkml/20200616013309.GB815@lca.pw/

This is a rather intrusive way of fixing the problem, but has the
advantage that I've actually been testing it with the THP patches, which
means that it sees far more use than it does upstream -- indeed, Song has
been entirely unable to reproduce it.  It also has the advantage that it
removes a few patches from my gargantuan backlog of THP patches.

This patch (of 3):

This function returns the order of the entry at the index.  We need this
because there isn't space in the shadow entry to encode its order.

[akpm@linux-foundation.org: export xa_get_order to modules]

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Cc: "Kirill A . Shutemov" <kirill@shutemov.name>
Cc: Qian Cai <cai@lca.pw>
Cc: Song Liu <songliubraving@fb.com>
Link: https://lkml.kernel.org/r/20200903183029.14930-1-willy@infradead.org
Link: https://lkml.kernel.org/r/20200903183029.14930-2-willy@infradead.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/xarray.h |  9 +++++++++
 lib/test_xarray.c      | 21 +++++++++++++++++++++
 lib/xarray.c           | 40 ++++++++++++++++++++++++++++++++++++++++
 3 files changed, 70 insertions(+)

(limited to 'lib')

diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index b4d70e7568b2..5b7f4ebcf4ff 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -1505,6 +1505,15 @@ void xas_pause(struct xa_state *);
 
 void xas_create_range(struct xa_state *);
 
+#ifdef CONFIG_XARRAY_MULTI
+int xa_get_order(struct xarray *, unsigned long index);
+#else
+static inline int xa_get_order(struct xarray *xa, unsigned long index)
+{
+	return 0;
+}
+#endif
+
 /**
  * xas_reload() - Refetch an entry from the xarray.
  * @xas: XArray operation state.
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index d4f97925dbd8..bdd4d7995f79 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -1649,6 +1649,26 @@ static noinline void check_account(struct xarray *xa)
 #endif
 }
 
+static noinline void check_get_order(struct xarray *xa)
+{
+	unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 20 : 1;
+	unsigned int order;
+	unsigned long i, j;
+
+	for (i = 0; i < 3; i++)
+		XA_BUG_ON(xa, xa_get_order(xa, i) != 0);
+
+	for (order = 0; order < max_order; order++) {
+		for (i = 0; i < 10; i++) {
+			xa_store_order(xa, i << order, order,
+					xa_mk_index(i << order), GFP_KERNEL);
+			for (j = i << order; j < (i + 1) << order; j++)
+				XA_BUG_ON(xa, xa_get_order(xa, j) != order);
+			xa_erase(xa, i << order);
+		}
+	}
+}
+
 static noinline void check_destroy(struct xarray *xa)
 {
 	unsigned long index;
@@ -1697,6 +1717,7 @@ static int xarray_checks(void)
 	check_reserve(&array);
 	check_reserve(&xa0);
 	check_multi_store(&array);
+	check_get_order(&array);
 	check_xa_alloc();
 	check_find(&array);
 	check_find_entry(&array);
diff --git a/lib/xarray.c b/lib/xarray.c
index e9e641d3c0c3..f24cb9a43af8 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -1592,6 +1592,46 @@ unlock:
 	return xas_result(&xas, NULL);
 }
 EXPORT_SYMBOL(xa_store_range);
+
+/**
+ * xa_get_order() - Get the order of an entry.
+ * @xa: XArray.
+ * @index: Index of the entry.
+ *
+ * Return: A number between 0 and 63 indicating the order of the entry.
+ */
+int xa_get_order(struct xarray *xa, unsigned long index)
+{
+	XA_STATE(xas, xa, index);
+	void *entry;
+	int order = 0;
+
+	rcu_read_lock();
+	entry = xas_load(&xas);
+
+	if (!entry)
+		goto unlock;
+
+	if (!xas.xa_node)
+		goto unlock;
+
+	for (;;) {
+		unsigned int slot = xas.xa_offset + (1 << order);
+
+		if (slot >= XA_CHUNK_SIZE)
+			break;
+		if (!xa_is_sibling(xas.xa_node->slots[slot]))
+			break;
+		order++;
+	}
+
+	order += xas.xa_node->shift;
+unlock:
+	rcu_read_unlock();
+
+	return order;
+}
+EXPORT_SYMBOL(xa_get_order);
 #endif /* CONFIG_XARRAY_MULTI */
 
 /**
-- 
cgit v1.2.3


From 8fc75643c5e14574c8be59b69182452ece28315a Mon Sep 17 00:00:00 2001
From: "Matthew Wilcox (Oracle)" <willy@infradead.org>
Date: Thu, 15 Oct 2020 20:05:16 -0700
Subject: XArray: add xas_split

In order to use multi-index entries for huge pages in the page cache, we
need to be able to split a multi-index entry (eg if a file is truncated in
the middle of a huge page entry).  This version does not support splitting
more than one level of the tree at a time.  This is an acceptable
limitation for the page cache as we do not expect to support order-12
pages in the near future.

[akpm@linux-foundation.org: export xas_split_alloc() to modules]
[willy@infradead.org: fix xarray split]
  Link: https://lkml.kernel.org/r/20200910175450.GV6583@casper.infradead.org
[willy@infradead.org: fix xarray]
  Link: https://lkml.kernel.org/r/20201001233943.GW20115@casper.infradead.org

Signed-off-by: Matthew Wilcox (Oracle) <willy@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Cc: "Kirill A . Shutemov" <kirill@shutemov.name>
Cc: Qian Cai <cai@lca.pw>
Cc: Song Liu <songliubraving@fb.com>
Link: https://lkml.kernel.org/r/20200903183029.14930-3-willy@infradead.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/core-api/xarray.rst |  16 ++--
 include/linux/xarray.h            |  13 +++
 lib/test_xarray.c                 |  44 ++++++++++
 lib/xarray.c                      | 168 ++++++++++++++++++++++++++++++++++++--
 4 files changed, 225 insertions(+), 16 deletions(-)

(limited to 'lib')

diff --git a/Documentation/core-api/xarray.rst b/Documentation/core-api/xarray.rst
index 640934b6f7b4..a137a0e6d068 100644
--- a/Documentation/core-api/xarray.rst
+++ b/Documentation/core-api/xarray.rst
@@ -475,13 +475,15 @@ or iterations will move the index to the first index in the range.
 Each entry will only be returned once, no matter how many indices it
 occupies.
 
-Using xas_next() or xas_prev() with a multi-index xa_state
-is not supported.  Using either of these functions on a multi-index entry
-will reveal sibling entries; these should be skipped over by the caller.
-
-Storing ``NULL`` into any index of a multi-index entry will set the entry
-at every index to ``NULL`` and dissolve the tie.  Splitting a multi-index
-entry into entries occupying smaller ranges is not yet supported.
+Using xas_next() or xas_prev() with a multi-index xa_state is not
+supported.  Using either of these functions on a multi-index entry will
+reveal sibling entries; these should be skipped over by the caller.
+
+Storing ``NULL`` into any index of a multi-index entry will set the
+entry at every index to ``NULL`` and dissolve the tie.  A multi-index
+entry can be split into entries occupying smaller ranges by calling
+xas_split_alloc() without the xa_lock held, followed by taking the lock
+and calling xas_split().
 
 Functions and structures
 ========================
diff --git a/include/linux/xarray.h b/include/linux/xarray.h
index 5b7f4ebcf4ff..5cdf441f6377 100644
--- a/include/linux/xarray.h
+++ b/include/linux/xarray.h
@@ -1507,11 +1507,24 @@ void xas_create_range(struct xa_state *);
 
 #ifdef CONFIG_XARRAY_MULTI
 int xa_get_order(struct xarray *, unsigned long index);
+void xas_split(struct xa_state *, void *entry, unsigned int order);
+void xas_split_alloc(struct xa_state *, void *entry, unsigned int order, gfp_t);
 #else
 static inline int xa_get_order(struct xarray *xa, unsigned long index)
 {
 	return 0;
 }
+
+static inline void xas_split(struct xa_state *xas, void *entry,
+		unsigned int order)
+{
+	xas_store(xas, entry);
+}
+
+static inline void xas_split_alloc(struct xa_state *xas, void *entry,
+		unsigned int order, gfp_t gfp)
+{
+}
 #endif
 
 /**
diff --git a/lib/test_xarray.c b/lib/test_xarray.c
index bdd4d7995f79..8262c3f05a5d 100644
--- a/lib/test_xarray.c
+++ b/lib/test_xarray.c
@@ -1503,6 +1503,49 @@ static noinline void check_store_range(struct xarray *xa)
 	}
 }
 
+#ifdef CONFIG_XARRAY_MULTI
+static void check_split_1(struct xarray *xa, unsigned long index,
+							unsigned int order)
+{
+	XA_STATE(xas, xa, index);
+	void *entry;
+	unsigned int i = 0;
+
+	xa_store_order(xa, index, order, xa, GFP_KERNEL);
+
+	xas_split_alloc(&xas, xa, order, GFP_KERNEL);
+	xas_lock(&xas);
+	xas_split(&xas, xa, order);
+	xas_unlock(&xas);
+
+	xa_for_each(xa, index, entry) {
+		XA_BUG_ON(xa, entry != xa);
+		i++;
+	}
+	XA_BUG_ON(xa, i != 1 << order);
+
+	xa_set_mark(xa, index, XA_MARK_0);
+	XA_BUG_ON(xa, !xa_get_mark(xa, index, XA_MARK_0));
+
+	xa_destroy(xa);
+}
+
+static noinline void check_split(struct xarray *xa)
+{
+	unsigned int order;
+
+	XA_BUG_ON(xa, !xa_empty(xa));
+
+	for (order = 1; order < 2 * XA_CHUNK_SHIFT; order++) {
+		check_split_1(xa, 0, order);
+		check_split_1(xa, 1UL << order, order);
+		check_split_1(xa, 3UL << order, order);
+	}
+}
+#else
+static void check_split(struct xarray *xa) { }
+#endif
+
 static void check_align_1(struct xarray *xa, char *name)
 {
 	int i;
@@ -1729,6 +1772,7 @@ static int xarray_checks(void)
 	check_store_range(&array);
 	check_store_iter(&array);
 	check_align(&xa0);
+	check_split(&array);
 
 	check_workingset(&array, 0);
 	check_workingset(&array, 64);
diff --git a/lib/xarray.c b/lib/xarray.c
index f24cb9a43af8..b76eea7b314c 100644
--- a/lib/xarray.c
+++ b/lib/xarray.c
@@ -266,13 +266,14 @@ static void xa_node_free(struct xa_node *node)
  */
 static void xas_destroy(struct xa_state *xas)
 {
-	struct xa_node *node = xas->xa_alloc;
+	struct xa_node *next, *node = xas->xa_alloc;
 
-	if (!node)
-		return;
-	XA_NODE_BUG_ON(node, !list_empty(&node->private_list));
-	kmem_cache_free(radix_tree_node_cachep, node);
-	xas->xa_alloc = NULL;
+	while (node) {
+		XA_NODE_BUG_ON(node, !list_empty(&node->private_list));
+		next = rcu_dereference_raw(node->parent);
+		radix_tree_node_rcu_free(&node->rcu_head);
+		xas->xa_alloc = node = next;
+	}
 }
 
 /**
@@ -304,6 +305,7 @@ bool xas_nomem(struct xa_state *xas, gfp_t gfp)
 	xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp);
 	if (!xas->xa_alloc)
 		return false;
+	xas->xa_alloc->parent = NULL;
 	XA_NODE_BUG_ON(xas->xa_alloc, !list_empty(&xas->xa_alloc->private_list));
 	xas->xa_node = XAS_RESTART;
 	return true;
@@ -339,6 +341,7 @@ static bool __xas_nomem(struct xa_state *xas, gfp_t gfp)
 	}
 	if (!xas->xa_alloc)
 		return false;
+	xas->xa_alloc->parent = NULL;
 	XA_NODE_BUG_ON(xas->xa_alloc, !list_empty(&xas->xa_alloc->private_list));
 	xas->xa_node = XAS_RESTART;
 	return true;
@@ -403,7 +406,7 @@ static unsigned long xas_size(const struct xa_state *xas)
 /*
  * Use this to calculate the maximum index that will need to be created
  * in order to add the entry described by @xas.  Because we cannot store a
- * multiple-index entry at index 0, the calculation is a little more complex
+ * multi-index entry at index 0, the calculation is a little more complex
  * than you might expect.
  */
 static unsigned long xas_max(struct xa_state *xas)
@@ -946,6 +949,153 @@ void xas_init_marks(const struct xa_state *xas)
 }
 EXPORT_SYMBOL_GPL(xas_init_marks);
 
+#ifdef CONFIG_XARRAY_MULTI
+static unsigned int node_get_marks(struct xa_node *node, unsigned int offset)
+{
+	unsigned int marks = 0;
+	xa_mark_t mark = XA_MARK_0;
+
+	for (;;) {
+		if (node_get_mark(node, offset, mark))
+			marks |= 1 << (__force unsigned int)mark;
+		if (mark == XA_MARK_MAX)
+			break;
+		mark_inc(mark);
+	}
+
+	return marks;
+}
+
+static void node_set_marks(struct xa_node *node, unsigned int offset,
+			struct xa_node *child, unsigned int marks)
+{
+	xa_mark_t mark = XA_MARK_0;
+
+	for (;;) {
+		if (marks & (1 << (__force unsigned int)mark)) {
+			node_set_mark(node, offset, mark);
+			if (child)
+				node_mark_all(child, mark);
+		}
+		if (mark == XA_MARK_MAX)
+			break;
+		mark_inc(mark);
+	}
+}
+
+/**
+ * xas_split_alloc() - Allocate memory for splitting an entry.
+ * @xas: XArray operation state.
+ * @entry: New entry which will be stored in the array.
+ * @order: New entry order.
+ * @gfp: Memory allocation flags.
+ *
+ * This function should be called before calling xas_split().
+ * If necessary, it will allocate new nodes (and fill them with @entry)
+ * to prepare for the upcoming split of an entry of @order size into
+ * entries of the order stored in the @xas.
+ *
+ * Context: May sleep if @gfp flags permit.
+ */
+void xas_split_alloc(struct xa_state *xas, void *entry, unsigned int order,
+		gfp_t gfp)
+{
+	unsigned int sibs = (1 << (order % XA_CHUNK_SHIFT)) - 1;
+	unsigned int mask = xas->xa_sibs;
+
+	/* XXX: no support for splitting really large entries yet */
+	if (WARN_ON(xas->xa_shift + 2 * XA_CHUNK_SHIFT < order))
+		goto nomem;
+	if (xas->xa_shift + XA_CHUNK_SHIFT > order)
+		return;
+
+	do {
+		unsigned int i;
+		void *sibling;
+		struct xa_node *node;
+
+		node = kmem_cache_alloc(radix_tree_node_cachep, gfp);
+		if (!node)
+			goto nomem;
+		node->array = xas->xa;
+		for (i = 0; i < XA_CHUNK_SIZE; i++) {
+			if ((i & mask) == 0) {
+				RCU_INIT_POINTER(node->slots[i], entry);
+				sibling = xa_mk_sibling(0);
+			} else {
+				RCU_INIT_POINTER(node->slots[i], sibling);
+			}
+		}
+		RCU_INIT_POINTER(node->parent, xas->xa_alloc);
+		xas->xa_alloc = node;
+	} while (sibs-- > 0);
+
+	return;
+nomem:
+	xas_destroy(xas);
+	xas_set_err(xas, -ENOMEM);
+}
+EXPORT_SYMBOL_GPL(xas_split_alloc);
+
+/**
+ * xas_split() - Split a multi-index entry into smaller entries.
+ * @xas: XArray operation state.
+ * @entry: New entry to store in the array.
+ * @order: New entry order.
+ *
+ * The value in the entry is copied to all the replacement entries.
+ *
+ * Context: Any context.  The caller should hold the xa_lock.
+ */
+void xas_split(struct xa_state *xas, void *entry, unsigned int order)
+{
+	unsigned int sibs = (1 << (order % XA_CHUNK_SHIFT)) - 1;
+	unsigned int offset, marks;
+	struct xa_node *node;
+	void *curr = xas_load(xas);
+	int values = 0;
+
+	node = xas->xa_node;
+	if (xas_top(node))
+		return;
+
+	marks = node_get_marks(node, xas->xa_offset);
+
+	offset = xas->xa_offset + sibs;
+	do {
+		if (xas->xa_shift < node->shift) {
+			struct xa_node *child = xas->xa_alloc;
+
+			xas->xa_alloc = rcu_dereference_raw(child->parent);
+			child->shift = node->shift - XA_CHUNK_SHIFT;
+			child->offset = offset;
+			child->count = XA_CHUNK_SIZE;
+			child->nr_values = xa_is_value(entry) ?
+					XA_CHUNK_SIZE : 0;
+			RCU_INIT_POINTER(child->parent, node);
+			node_set_marks(node, offset, child, marks);
+			rcu_assign_pointer(node->slots[offset],
+					xa_mk_node(child));
+			if (xa_is_value(curr))
+				values--;
+		} else {
+			unsigned int canon = offset - xas->xa_sibs;
+
+			node_set_marks(node, canon, NULL, marks);
+			rcu_assign_pointer(node->slots[canon], entry);
+			while (offset > canon)
+				rcu_assign_pointer(node->slots[offset--],
+						xa_mk_sibling(canon));
+			values += (xa_is_value(entry) - xa_is_value(curr)) *
+					(xas->xa_sibs + 1);
+		}
+	} while (offset-- > xas->xa_offset);
+
+	node->nr_values += values;
+}
+EXPORT_SYMBOL_GPL(xas_split);
+#endif
+
 /**
  * xas_pause() - Pause a walk to drop a lock.
  * @xas: XArray operation state.
@@ -1407,7 +1557,7 @@ EXPORT_SYMBOL(__xa_store);
  * @gfp: Memory allocation flags.
  *
  * After this function returns, loads from this index will return @entry.
- * Storing into an existing multislot entry updates the entry of every index.
+ * Storing into an existing multi-index entry updates the entry of every index.
  * The marks associated with @index are unaffected unless @entry is %NULL.
  *
  * Context: Any context.  Takes and releases the xa_lock.
@@ -1549,7 +1699,7 @@ static void xas_set_range(struct xa_state *xas, unsigned long first,
  *
  * After this function returns, loads from any index between @first and @last,
  * inclusive will return @entry.
- * Storing into an existing multislot entry updates the entry of every index.
+ * Storing into an existing multi-index entry updates the entry of every index.
  * The marks associated with @index are unaffected unless @entry is %NULL.
  *
  * Context: Process context.  Takes and releases the xa_lock.  May sleep
-- 
cgit v1.2.3


From b296a6d53339a79082c1d2c1761e948e8b3def69 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 15 Oct 2020 20:10:21 -0700
Subject: kernel.h: split out min()/max() et al. helpers

kernel.h is being used as a dump for all kinds of stuff for a long time.
Here is the attempt to start cleaning it up by splitting out min()/max()
et al.  helpers.

At the same time convert users in header and lib folder to use new header.
Though for time being include new header back to kernel.h to avoid
twisted indirected includes for other existing users.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Cc: "Rafael J. Wysocki" <rjw@rjwysocki.net>
Cc: Steven Rostedt <rostedt@goodmis.org>
Cc: Rasmus Villemoes <linux@rasmusvillemoes.dk>
Cc: Joe Perches <joe@perches.com>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Link: https://lkml.kernel.org/r/20200910164152.GA1891694@smile.fi.intel.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/blkdev.h    |   1 +
 include/linux/bvec.h      |   6 +-
 include/linux/jiffies.h   |   3 +-
 include/linux/kernel.h    | 150 +--------------------------------------------
 include/linux/minmax.h    | 153 ++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/nodemask.h  |   2 +-
 include/linux/uaccess.h   |   1 +
 kernel/range.c            |   3 +-
 lib/find_bit.c            |   1 +
 lib/hexdump.c             |   1 +
 lib/math/rational.c       |   2 +-
 lib/math/reciprocal_div.c |   1 +
 12 files changed, 170 insertions(+), 154 deletions(-)
 create mode 100644 include/linux/minmax.h

(limited to 'lib')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index c09375e0a0eb..639cae2c158b 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -8,6 +8,7 @@
 #include <linux/genhd.h>
 #include <linux/list.h>
 #include <linux/llist.h>
+#include <linux/minmax.h>
 #include <linux/timer.h>
 #include <linux/workqueue.h>
 #include <linux/pagemap.h>
diff --git a/include/linux/bvec.h b/include/linux/bvec.h
index dd74503f7e5e..2efec10bf792 100644
--- a/include/linux/bvec.h
+++ b/include/linux/bvec.h
@@ -7,10 +7,14 @@
 #ifndef __LINUX_BVEC_ITER_H
 #define __LINUX_BVEC_ITER_H
 
-#include <linux/kernel.h>
 #include <linux/bug.h>
 #include <linux/errno.h>
+#include <linux/limits.h>
+#include <linux/minmax.h>
 #include <linux/mm.h>
+#include <linux/types.h>
+
+struct page;
 
 /**
  * struct bio_vec - a contiguous range of physical memory addresses
diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h
index fed6ba96c527..5e13f801c902 100644
--- a/include/linux/jiffies.h
+++ b/include/linux/jiffies.h
@@ -3,8 +3,9 @@
 #define _LINUX_JIFFIES_H
 
 #include <linux/cache.h>
+#include <linux/limits.h>
 #include <linux/math64.h>
-#include <linux/kernel.h>
+#include <linux/minmax.h>
 #include <linux/types.h>
 #include <linux/time.h>
 #include <linux/timex.h>
diff --git a/include/linux/kernel.h b/include/linux/kernel.h
index e4aa29b1ad62..c629215fdad9 100644
--- a/include/linux/kernel.h
+++ b/include/linux/kernel.h
@@ -11,6 +11,7 @@
 #include <linux/compiler.h>
 #include <linux/bitops.h>
 #include <linux/log2.h>
+#include <linux/minmax.h>
 #include <linux/typecheck.h>
 #include <linux/printk.h>
 #include <linux/build_bug.h>
@@ -833,155 +834,6 @@ ftrace_vprintk(const char *fmt, va_list ap)
 static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { }
 #endif /* CONFIG_TRACING */
 
-/*
- * min()/max()/clamp() macros must accomplish three things:
- *
- * - avoid multiple evaluations of the arguments (so side-effects like
- *   "x++" happen only once) when non-constant.
- * - perform strict type-checking (to generate warnings instead of
- *   nasty runtime surprises). See the "unnecessary" pointer comparison
- *   in __typecheck().
- * - retain result as a constant expressions when called with only
- *   constant expressions (to avoid tripping VLA warnings in stack
- *   allocation usage).
- */
-#define __typecheck(x, y) \
-		(!!(sizeof((typeof(x) *)1 == (typeof(y) *)1)))
-
-/*
- * This returns a constant expression while determining if an argument is
- * a constant expression, most importantly without evaluating the argument.
- * Glory to Martin Uecker <Martin.Uecker@med.uni-goettingen.de>
- */
-#define __is_constexpr(x) \
-	(sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8)))
-
-#define __no_side_effects(x, y) \
-		(__is_constexpr(x) && __is_constexpr(y))
-
-#define __safe_cmp(x, y) \
-		(__typecheck(x, y) && __no_side_effects(x, y))
-
-#define __cmp(x, y, op)	((x) op (y) ? (x) : (y))
-
-#define __cmp_once(x, y, unique_x, unique_y, op) ({	\
-		typeof(x) unique_x = (x);		\
-		typeof(y) unique_y = (y);		\
-		__cmp(unique_x, unique_y, op); })
-
-#define __careful_cmp(x, y, op) \
-	__builtin_choose_expr(__safe_cmp(x, y), \
-		__cmp(x, y, op), \
-		__cmp_once(x, y, __UNIQUE_ID(__x), __UNIQUE_ID(__y), op))
-
-/**
- * min - return minimum of two values of the same or compatible types
- * @x: first value
- * @y: second value
- */
-#define min(x, y)	__careful_cmp(x, y, <)
-
-/**
- * max - return maximum of two values of the same or compatible types
- * @x: first value
- * @y: second value
- */
-#define max(x, y)	__careful_cmp(x, y, >)
-
-/**
- * min3 - return minimum of three values
- * @x: first value
- * @y: second value
- * @z: third value
- */
-#define min3(x, y, z) min((typeof(x))min(x, y), z)
-
-/**
- * max3 - return maximum of three values
- * @x: first value
- * @y: second value
- * @z: third value
- */
-#define max3(x, y, z) max((typeof(x))max(x, y), z)
-
-/**
- * min_not_zero - return the minimum that is _not_ zero, unless both are zero
- * @x: value1
- * @y: value2
- */
-#define min_not_zero(x, y) ({			\
-	typeof(x) __x = (x);			\
-	typeof(y) __y = (y);			\
-	__x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); })
-
-/**
- * clamp - return a value clamped to a given range with strict typechecking
- * @val: current value
- * @lo: lowest allowable value
- * @hi: highest allowable value
- *
- * This macro does strict typechecking of @lo/@hi to make sure they are of the
- * same type as @val.  See the unnecessary pointer comparisons.
- */
-#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi)
-
-/*
- * ..and if you can't take the strict
- * types, you can specify one yourself.
- *
- * Or not use min/max/clamp at all, of course.
- */
-
-/**
- * min_t - return minimum of two values, using the specified type
- * @type: data type to use
- * @x: first value
- * @y: second value
- */
-#define min_t(type, x, y)	__careful_cmp((type)(x), (type)(y), <)
-
-/**
- * max_t - return maximum of two values, using the specified type
- * @type: data type to use
- * @x: first value
- * @y: second value
- */
-#define max_t(type, x, y)	__careful_cmp((type)(x), (type)(y), >)
-
-/**
- * clamp_t - return a value clamped to a given range using a given type
- * @type: the type of variable to use
- * @val: current value
- * @lo: minimum allowable value
- * @hi: maximum allowable value
- *
- * This macro does no typechecking and uses temporary variables of type
- * @type to make all the comparisons.
- */
-#define clamp_t(type, val, lo, hi) min_t(type, max_t(type, val, lo), hi)
-
-/**
- * clamp_val - return a value clamped to a given range using val's type
- * @val: current value
- * @lo: minimum allowable value
- * @hi: maximum allowable value
- *
- * This macro does no typechecking and uses temporary variables of whatever
- * type the input argument @val is.  This is useful when @val is an unsigned
- * type and @lo and @hi are literals that will otherwise be assigned a signed
- * integer type.
- */
-#define clamp_val(val, lo, hi) clamp_t(typeof(val), val, lo, hi)
-
-
-/**
- * swap - swap values of @a and @b
- * @a: first value
- * @b: second value
- */
-#define swap(a, b) \
-	do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
-
 /* This counts to 12. Any more, it will return 13th argument. */
 #define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _n, X...) _n
 #define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)
diff --git a/include/linux/minmax.h b/include/linux/minmax.h
new file mode 100644
index 000000000000..c0f57b0c64d9
--- /dev/null
+++ b/include/linux/minmax.h
@@ -0,0 +1,153 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _LINUX_MINMAX_H
+#define _LINUX_MINMAX_H
+
+/*
+ * min()/max()/clamp() macros must accomplish three things:
+ *
+ * - avoid multiple evaluations of the arguments (so side-effects like
+ *   "x++" happen only once) when non-constant.
+ * - perform strict type-checking (to generate warnings instead of
+ *   nasty runtime surprises). See the "unnecessary" pointer comparison
+ *   in __typecheck().
+ * - retain result as a constant expressions when called with only
+ *   constant expressions (to avoid tripping VLA warnings in stack
+ *   allocation usage).
+ */
+#define __typecheck(x, y) \
+	(!!(sizeof((typeof(x) *)1 == (typeof(y) *)1)))
+
+/*
+ * This returns a constant expression while determining if an argument is
+ * a constant expression, most importantly without evaluating the argument.
+ * Glory to Martin Uecker <Martin.Uecker@med.uni-goettingen.de>
+ */
+#define __is_constexpr(x) \
+	(sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8)))
+
+#define __no_side_effects(x, y) \
+		(__is_constexpr(x) && __is_constexpr(y))
+
+#define __safe_cmp(x, y) \
+		(__typecheck(x, y) && __no_side_effects(x, y))
+
+#define __cmp(x, y, op)	((x) op (y) ? (x) : (y))
+
+#define __cmp_once(x, y, unique_x, unique_y, op) ({	\
+		typeof(x) unique_x = (x);		\
+		typeof(y) unique_y = (y);		\
+		__cmp(unique_x, unique_y, op); })
+
+#define __careful_cmp(x, y, op) \
+	__builtin_choose_expr(__safe_cmp(x, y), \
+		__cmp(x, y, op), \
+		__cmp_once(x, y, __UNIQUE_ID(__x), __UNIQUE_ID(__y), op))
+
+/**
+ * min - return minimum of two values of the same or compatible types
+ * @x: first value
+ * @y: second value
+ */
+#define min(x, y)	__careful_cmp(x, y, <)
+
+/**
+ * max - return maximum of two values of the same or compatible types
+ * @x: first value
+ * @y: second value
+ */
+#define max(x, y)	__careful_cmp(x, y, >)
+
+/**
+ * min3 - return minimum of three values
+ * @x: first value
+ * @y: second value
+ * @z: third value
+ */
+#define min3(x, y, z) min((typeof(x))min(x, y), z)
+
+/**
+ * max3 - return maximum of three values
+ * @x: first value
+ * @y: second value
+ * @z: third value
+ */
+#define max3(x, y, z) max((typeof(x))max(x, y), z)
+
+/**
+ * min_not_zero - return the minimum that is _not_ zero, unless both are zero
+ * @x: value1
+ * @y: value2
+ */
+#define min_not_zero(x, y) ({			\
+	typeof(x) __x = (x);			\
+	typeof(y) __y = (y);			\
+	__x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); })
+
+/**
+ * clamp - return a value clamped to a given range with strict typechecking
+ * @val: current value
+ * @lo: lowest allowable value
+ * @hi: highest allowable value
+ *
+ * This macro does strict typechecking of @lo/@hi to make sure they are of the
+ * same type as @val.  See the unnecessary pointer comparisons.
+ */
+#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi)
+
+/*
+ * ..and if you can't take the strict
+ * types, you can specify one yourself.
+ *
+ * Or not use min/max/clamp at all, of course.
+ */
+
+/**
+ * min_t - return minimum of two values, using the specified type
+ * @type: data type to use
+ * @x: first value
+ * @y: second value
+ */
+#define min_t(type, x, y)	__careful_cmp((type)(x), (type)(y), <)
+
+/**
+ * max_t - return maximum of two values, using the specified type
+ * @type: data type to use
+ * @x: first value
+ * @y: second value
+ */
+#define max_t(type, x, y)	__careful_cmp((type)(x), (type)(y), >)
+
+/**
+ * clamp_t - return a value clamped to a given range using a given type
+ * @type: the type of variable to use
+ * @val: current value
+ * @lo: minimum allowable value
+ * @hi: maximum allowable value
+ *
+ * This macro does no typechecking and uses temporary variables of type
+ * @type to make all the comparisons.
+ */
+#define clamp_t(type, val, lo, hi) min_t(type, max_t(type, val, lo), hi)
+
+/**
+ * clamp_val - return a value clamped to a given range using val's type
+ * @val: current value
+ * @lo: minimum allowable value
+ * @hi: maximum allowable value
+ *
+ * This macro does no typechecking and uses temporary variables of whatever
+ * type the input argument @val is.  This is useful when @val is an unsigned
+ * type and @lo and @hi are literals that will otherwise be assigned a signed
+ * integer type.
+ */
+#define clamp_val(val, lo, hi) clamp_t(typeof(val), val, lo, hi)
+
+/**
+ * swap - swap values of @a and @b
+ * @a: first value
+ * @b: second value
+ */
+#define swap(a, b) \
+	do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0)
+
+#endif	/* _LINUX_MINMAX_H */
diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h
index 3334ce056335..ac398e143c9a 100644
--- a/include/linux/nodemask.h
+++ b/include/linux/nodemask.h
@@ -90,9 +90,9 @@
  * for such situations. See below and CPUMASK_ALLOC also.
  */
 
-#include <linux/kernel.h>
 #include <linux/threads.h>
 #include <linux/bitmap.h>
+#include <linux/minmax.h>
 #include <linux/numa.h>
 
 typedef struct { DECLARE_BITMAP(bits, MAX_NUMNODES); } nodemask_t;
diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index 1ae36bc8db35..ef084eacaa7c 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -3,6 +3,7 @@
 #define __LINUX_UACCESS_H__
 
 #include <linux/instrumented.h>
+#include <linux/minmax.h>
 #include <linux/sched.h>
 #include <linux/thread_info.h>
 
diff --git a/kernel/range.c b/kernel/range.c
index d84de6766472..56435f96da73 100644
--- a/kernel/range.c
+++ b/kernel/range.c
@@ -2,8 +2,9 @@
 /*
  * Range add and subtract
  */
-#include <linux/kernel.h>
 #include <linux/init.h>
+#include <linux/minmax.h>
+#include <linux/printk.h>
 #include <linux/sort.h>
 #include <linux/string.h>
 #include <linux/range.h>
diff --git a/lib/find_bit.c b/lib/find_bit.c
index 49f875f1baf7..4a8751010d59 100644
--- a/lib/find_bit.c
+++ b/lib/find_bit.c
@@ -16,6 +16,7 @@
 #include <linux/bitmap.h>
 #include <linux/export.h>
 #include <linux/kernel.h>
+#include <linux/minmax.h>
 
 #if !defined(find_next_bit) || !defined(find_next_zero_bit) ||			\
 	!defined(find_next_bit_le) || !defined(find_next_zero_bit_le) ||	\
diff --git a/lib/hexdump.c b/lib/hexdump.c
index 147133f8eb2f..9301578f98e8 100644
--- a/lib/hexdump.c
+++ b/lib/hexdump.c
@@ -7,6 +7,7 @@
 #include <linux/ctype.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
+#include <linux/minmax.h>
 #include <linux/export.h>
 #include <asm/unaligned.h>
 
diff --git a/lib/math/rational.c b/lib/math/rational.c
index df75c8809693..9781d521963d 100644
--- a/lib/math/rational.c
+++ b/lib/math/rational.c
@@ -11,7 +11,7 @@
 #include <linux/rational.h>
 #include <linux/compiler.h>
 #include <linux/export.h>
-#include <linux/kernel.h>
+#include <linux/minmax.h>
 
 /*
  * calculate best rational approximation for a given fraction
diff --git a/lib/math/reciprocal_div.c b/lib/math/reciprocal_div.c
index bf043258fa00..32436dd4171e 100644
--- a/lib/math/reciprocal_div.c
+++ b/lib/math/reciprocal_div.c
@@ -4,6 +4,7 @@
 #include <asm/div64.h>
 #include <linux/reciprocal_div.h>
 #include <linux/export.h>
+#include <linux/minmax.h>
 
 /*
  * For a description of the algorithm please have a look at
-- 
cgit v1.2.3


From 197d6c1dde4e19517e8cf843eec7fc6417241969 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Thu, 15 Oct 2020 20:10:45 -0700
Subject: lib: bitmap: delete duplicated words

Drop the repeated word "an".

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Link: https://lkml.kernel.org/r/20200823040424.25760-1-rdunlap@infradead.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/bitmap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/bitmap.c b/lib/bitmap.c
index 61394890788e..75006c4036e9 100644
--- a/lib/bitmap.c
+++ b/lib/bitmap.c
@@ -23,7 +23,7 @@
 /**
  * DOC: bitmap introduction
  *
- * bitmaps provide an array of bits, implemented using an an
+ * bitmaps provide an array of bits, implemented using an
  * array of unsigned longs.  The number of valid bits in a
  * given bitmap does _not_ need to be an exact multiple of
  * BITS_PER_LONG.
-- 
cgit v1.2.3


From f1e594acb1bdf29b583fe205793699a8e7c35fc0 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Thu, 15 Oct 2020 20:10:48 -0700
Subject: lib: libcrc32c: delete duplicated words

Drop the repeated word "the".

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Link: https://lkml.kernel.org/r/20200823040430.25807-1-rdunlap@infradead.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/libcrc32c.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/libcrc32c.c b/lib/libcrc32c.c
index 77ab839644c5..5ca0d815a95d 100644
--- a/lib/libcrc32c.c
+++ b/lib/libcrc32c.c
@@ -12,7 +12,7 @@
  * pages =        {},
  * month =        {June},
  *}
- * Used by the iSCSI driver, possibly others, and derived from the
+ * Used by the iSCSI driver, possibly others, and derived from
  * the iscsi-crc.c module of the linux-iscsi driver at
  * http://linux-iscsi.sourceforge.net.
  *
-- 
cgit v1.2.3


From 2f22385fb1211a90f53ee197e0f41ae0d35b391c Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Thu, 15 Oct 2020 20:10:51 -0700
Subject: lib: decompress_bunzip2: delete duplicated words

Drop the repeated word "how".

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Link: https://lkml.kernel.org/r/20200823040436.25852-1-rdunlap@infradead.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/decompress_bunzip2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c
index f9628f3924ce..c72c865032fa 100644
--- a/lib/decompress_bunzip2.c
+++ b/lib/decompress_bunzip2.c
@@ -390,7 +390,7 @@ static int INIT get_next_block(struct bunzip_data *bd)
 		j = (bd->inbufBits >> bd->inbufBitCount)&
 			((1 << hufGroup->maxLen)-1);
 got_huff_bits:
-		/* Figure how how many bits are in next symbol and
+		/* Figure how many bits are in next symbol and
 		 * unget extras */
 		i = hufGroup->minLen;
 		while (j > limit[i])
-- 
cgit v1.2.3


From dde57fe01a0a39ce131ad5611e0a4430cec66bd8 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Thu, 15 Oct 2020 20:10:57 -0700
Subject: lib: dynamic_queue_limits: delete duplicated words + fix typo

Drop the repeated word "the".
Fix spelling of "excess".

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Link: https://lkml.kernel.org/r/20200823040449.25946-1-rdunlap@infradead.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/dynamic_queue_limits.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'lib')

diff --git a/lib/dynamic_queue_limits.c b/lib/dynamic_queue_limits.c
index e659a027036e..fde0aa244148 100644
--- a/lib/dynamic_queue_limits.c
+++ b/lib/dynamic_queue_limits.c
@@ -60,8 +60,8 @@ void dql_completed(struct dql *dql, unsigned int count)
 		 * A decrease is only considered if the queue has been busy in
 		 * the whole interval (the check above).
 		 *
-		 * If there is slack, the amount of execess data queued above
-		 * the the amount needed to prevent starvation, the queue limit
+		 * If there is slack, the amount of excess data queued above
+		 * the amount needed to prevent starvation, the queue limit
 		 * can be decreased.  To avoid hysteresis we consider the
 		 * minimum amount of slack found over several iterations of the
 		 * completion routine.
-- 
cgit v1.2.3


From 4e20ace06f705404df09316b859c267e9ec99354 Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Thu, 15 Oct 2020 20:11:01 -0700
Subject: lib: earlycpio: delete duplicated words

Drop the repeated word "the".

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Link: https://lkml.kernel.org/r/20200823040455.25995-1-rdunlap@infradead.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/earlycpio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/earlycpio.c b/lib/earlycpio.c
index c001e084829e..e83628882001 100644
--- a/lib/earlycpio.c
+++ b/lib/earlycpio.c
@@ -42,7 +42,7 @@ enum cpio_fields {
 /**
  * cpio_data find_cpio_data - Search for files in an uncompressed cpio
  * @path:       The directory to search for, including a slash at the end
- * @data:       Pointer to the the cpio archive or a header inside
+ * @data:       Pointer to the cpio archive or a header inside
  * @len:        Remaining length of the cpio based on data pointer
  * @nextoff:    When a matching file is found, this is the offset from the
  *              beginning of the cpio to the beginning of the next file, not the
-- 
cgit v1.2.3


From e0656501a6192ebced36764a007f91cc014d896d Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Thu, 15 Oct 2020 20:11:04 -0700
Subject: lib: radix-tree: delete duplicated words

Drop the repeated word "be".

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Link: https://lkml.kernel.org/r/20200823040508.26086-1-rdunlap@infradead.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/radix-tree.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/radix-tree.c b/lib/radix-tree.c
index 8e4a3a4397f2..005ced92a4a9 100644
--- a/lib/radix-tree.c
+++ b/lib/radix-tree.c
@@ -325,7 +325,7 @@ static __must_check int __radix_tree_preload(gfp_t gfp_mask, unsigned nr)
 	int ret = -ENOMEM;
 
 	/*
-	 * Nodes preloaded by one cgroup can be be used by another cgroup, so
+	 * Nodes preloaded by one cgroup can be used by another cgroup, so
 	 * they should never be accounted to any particular memory cgroup.
 	 */
 	gfp_mask &= ~__GFP_ACCOUNT;
-- 
cgit v1.2.3


From 408a93a2bb4f276f28059b51fbe0bbd06be350fc Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Thu, 15 Oct 2020 20:11:07 -0700
Subject: lib: syscall: delete duplicated words

Drop the repeated word "the".

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Link: https://lkml.kernel.org/r/20200823040514.26136-1-rdunlap@infradead.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/syscall.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/syscall.c b/lib/syscall.c
index fb328e7ccb08..8533d2fea2d7 100644
--- a/lib/syscall.c
+++ b/lib/syscall.c
@@ -44,7 +44,7 @@ static int collect_syscall(struct task_struct *target, struct syscall_info *info
  *			 .data.instruction_pointer - filled with user PC
  *
  * If @target is blocked in a system call, returns zero with @info.data.nr
- * set to the the call's number and @info.data.args filled in with its
+ * set to the call's number and @info.data.args filled in with its
  * arguments. Registers not used for system call arguments may not be available
  * and it is not kosher to use &struct user_regset calls while the system
  * call is still in progress.  Note we may get this result if @target
-- 
cgit v1.2.3


From 2d0469814ade60efb5cd8ca9af93c43bc3ba831e Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Thu, 15 Oct 2020 20:11:10 -0700
Subject: lib: test_sysctl: delete duplicated words

Drop the repeated word "the".

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Link: https://lkml.kernel.org/r/20200823040520.1999-1-rdunlap@infradead.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/test_sysctl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/test_sysctl.c b/lib/test_sysctl.c
index 98bc92a91662..3750323973f4 100644
--- a/lib/test_sysctl.c
+++ b/lib/test_sysctl.c
@@ -16,7 +16,7 @@
  */
 
 /*
- * This module provides an interface to the the proc sysctl interfaces.  This
+ * This module provides an interface to the proc sysctl interfaces.  This
  * driver requires CONFIG_PROC_SYSCTL. It will not normally be loaded by the
  * system unless explicitly requested by name. You can also build this driver
  * into your kernel.
-- 
cgit v1.2.3


From 8d8472cfdefa0f5c839f5c4a507af5e26ee6f5bf Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Thu, 15 Oct 2020 20:11:14 -0700
Subject: lib/mpi/mpi-bit.c: fix spello of "functions"

Fix typo/spello of "functions".

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Link: https://lkml.kernel.org/r/8df15173-a6df-9426-7cad-a2d279bf1170@infradead.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/mpi/mpi-bit.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/mpi/mpi-bit.c b/lib/mpi/mpi-bit.c
index a5119a2bcdd4..142b680835df 100644
--- a/lib/mpi/mpi-bit.c
+++ b/lib/mpi/mpi-bit.c
@@ -1,4 +1,4 @@
-/* mpi-bit.c  -  MPI bit level fucntions
+/* mpi-bit.c  -  MPI bit level functions
  * Copyright (C) 1998, 1999 Free Software Foundation, Inc.
  *
  * This file is part of GnuPG.
-- 
cgit v1.2.3


From 3b6742618ed9216dd6caad968fe8c83b32dff485 Mon Sep 17 00:00:00 2001
From: Stephen Boyd <swboyd@chromium.org>
Date: Thu, 15 Oct 2020 20:11:17 -0700
Subject: lib/idr.c: document calling context for IDA APIs mustn't use locks

The documentation for these functions indicates that callers don't need to
hold a lock while calling them, but that documentation is only in one
place under "IDA Usage".  Let's state the same information on each IDA
function so that it's clear what the calling context requires.
Furthermore, let's document ida_simple_get() with the same information so
that callers know how this API works.

Signed-off-by: Stephen Boyd <swboyd@chromium.org>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: Tri Vo <trong@android.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Matthew Wilcox <willy@infradead.org>
Link: https://lkml.kernel.org/r/20200910055246.2297797-1-swboyd@chromium.org
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/idr.h | 9 ++++++---
 lib/idr.c           | 9 ++++++---
 2 files changed, 12 insertions(+), 6 deletions(-)

(limited to 'lib')

diff --git a/include/linux/idr.h b/include/linux/idr.h
index 3ade03e5c7af..b235ed987021 100644
--- a/include/linux/idr.h
+++ b/include/linux/idr.h
@@ -263,7 +263,8 @@ void ida_destroy(struct ida *ida);
  *
  * Allocate an ID between 0 and %INT_MAX, inclusive.
  *
- * Context: Any context.
+ * Context: Any context. It is safe to call this function without
+ * locking in your code.
  * Return: The allocated ID, or %-ENOMEM if memory could not be allocated,
  * or %-ENOSPC if there are no free IDs.
  */
@@ -280,7 +281,8 @@ static inline int ida_alloc(struct ida *ida, gfp_t gfp)
  *
  * Allocate an ID between @min and %INT_MAX, inclusive.
  *
- * Context: Any context.
+ * Context: Any context. It is safe to call this function without
+ * locking in your code.
  * Return: The allocated ID, or %-ENOMEM if memory could not be allocated,
  * or %-ENOSPC if there are no free IDs.
  */
@@ -297,7 +299,8 @@ static inline int ida_alloc_min(struct ida *ida, unsigned int min, gfp_t gfp)
  *
  * Allocate an ID between 0 and @max, inclusive.
  *
- * Context: Any context.
+ * Context: Any context. It is safe to call this function without
+ * locking in your code.
  * Return: The allocated ID, or %-ENOMEM if memory could not be allocated,
  * or %-ENOSPC if there are no free IDs.
  */
diff --git a/lib/idr.c b/lib/idr.c
index c2cf2c52bbde..3fa8be43696f 100644
--- a/lib/idr.c
+++ b/lib/idr.c
@@ -372,7 +372,8 @@ EXPORT_SYMBOL(idr_replace);
  * Allocate an ID between @min and @max, inclusive.  The allocated ID will
  * not exceed %INT_MAX, even if @max is larger.
  *
- * Context: Any context.
+ * Context: Any context. It is safe to call this function without
+ * locking in your code.
  * Return: The allocated ID, or %-ENOMEM if memory could not be allocated,
  * or %-ENOSPC if there are no free IDs.
  */
@@ -479,7 +480,8 @@ EXPORT_SYMBOL(ida_alloc_range);
  * @ida: IDA handle.
  * @id: Previously allocated ID.
  *
- * Context: Any context.
+ * Context: Any context. It is safe to call this function without
+ * locking in your code.
  */
 void ida_free(struct ida *ida, unsigned int id)
 {
@@ -531,7 +533,8 @@ EXPORT_SYMBOL(ida_free);
  * or freed.  If the IDA is already empty, there is no need to call this
  * function.
  *
- * Context: Any context.
+ * Context: Any context. It is safe to call this function without
+ * locking in your code.
  */
 void ida_destroy(struct ida *ida)
 {
-- 
cgit v1.2.3


From 6ed9b92e290b530197c2dda2271f5312abc475e6 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Thu, 15 Oct 2020 20:11:25 -0700
Subject: lib/scatterlist.c: avoid a double memset

'sgl' is zeroed a few lines below in 'sg_init_table()'. There is no need to
clear it twice.

Remove the redundant initialization.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Link: https://lkml.kernel.org/r/20200920071544.368841-1-christophe.jaillet@wanadoo.fr
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/scatterlist.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/scatterlist.c b/lib/scatterlist.c
index 5d63a8857f36..d94628fa3349 100644
--- a/lib/scatterlist.c
+++ b/lib/scatterlist.c
@@ -504,7 +504,7 @@ struct scatterlist *sgl_alloc_order(unsigned long long length,
 		nalloc++;
 	}
 	sgl = kmalloc_array(nalloc, sizeof(struct scatterlist),
-			    (gfp & ~GFP_DMA) | __GFP_ZERO);
+			    gfp & ~GFP_DMA);
 	if (!sgl)
 		return NULL;
 
-- 
cgit v1.2.3


From 1d339638a954edce06ffcaa15d883d322e9e8291 Mon Sep 17 00:00:00 2001
From: Miaohe Lin <linmiaohe@huawei.com>
Date: Thu, 15 Oct 2020 20:11:28 -0700
Subject: lib/percpu_counter.c: use helper macro abs()

Use helper macro abs() to simplify the "x >= t || x <= -t" cmp.

Signed-off-by: Miaohe Lin <linmiaohe@huawei.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Andrew Morton <akpm@linux-foundation.org>
Link: https://lkml.kernel.org/r/20200927122746.5964-1-linmiaohe@huawei.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/percpu_counter.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c
index f61689a96e85..00f666d94486 100644
--- a/lib/percpu_counter.c
+++ b/lib/percpu_counter.c
@@ -85,7 +85,7 @@ void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch)
 
 	preempt_disable();
 	count = __this_cpu_read(*fbc->counters) + amount;
-	if (count >= batch || count <= -batch) {
+	if (abs(count) >= batch) {
 		unsigned long flags;
 		raw_spin_lock_irqsave(&fbc->lock, flags);
 		fbc->count += count;
-- 
cgit v1.2.3


From f3c9d0a3fe97a8b05548d27e9a8e7a5e6875004c Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 15 Oct 2020 20:11:34 -0700
Subject: lib/test_hmm.c: fix an error code in dmirror_allocate_chunk()

This is supposed to return false on failure, not a negative error code.

Fixes: 170e38548b81 ("mm/hmm/test: use after free in dmirror_allocate_chunk()")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Ralph Campbell <rcampbell@nvidia.com>
Cc: Jerome Glisse <jglisse@redhat.com>
Cc: Stephen Rothwell <sfr@canb.auug.org.au>
Cc: Jason Gunthorpe <jgg@ziepe.ca>
Cc: Dan Williams <dan.j.williams@intel.com>
Link: https://lkml.kernel.org/r/20201010200812.GA1886610@mwanda
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/test_hmm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/test_hmm.c b/lib/test_hmm.c
index e151a7f10519..80a78877bd93 100644
--- a/lib/test_hmm.c
+++ b/lib/test_hmm.c
@@ -461,7 +461,7 @@ static bool dmirror_allocate_chunk(struct dmirror_device *mdevice,
 
 	devmem = kzalloc(sizeof(*devmem), GFP_KERNEL);
 	if (!devmem)
-		return -ENOMEM;
+		return false;
 
 	res = request_free_mem_region(&iomem_resource, DEVMEM_CHUNK_SIZE,
 				      "hmm_dmirror");
-- 
cgit v1.2.3


From 904542dc56524f921a6bab0639ff6249c01e775f Mon Sep 17 00:00:00 2001
From: Tobias Jordan <kernel@cdqe.de>
Date: Thu, 15 Oct 2020 20:11:38 -0700
Subject: lib/crc32.c: fix trivial typo in preprocessor condition

Whether crc32_be needs a lookup table is chosen based on CRC_LE_BITS.
Obviously, the _be function should be governed by the _BE_ define.

This probably never pops up as it's hard to come up with a configuration
where CRC_BE_BITS isn't the same as CRC_LE_BITS and as nobody is using
bitwise CRC anyway.

Fixes: 46c5801eaf86 ("crc32: bolt on crc32c")
Signed-off-by: Tobias Jordan <kernel@cdqe.de>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Cc: Krzysztof Kozlowski <krzk@kernel.org>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Link: https://lkml.kernel.org/r/20200923182122.GA3338@agrajag.zerfleddert.de
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/crc32.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/crc32.c b/lib/crc32.c
index 35a03d03f973..2a68dfd3b96c 100644
--- a/lib/crc32.c
+++ b/lib/crc32.c
@@ -331,7 +331,7 @@ static inline u32 __pure crc32_be_generic(u32 crc, unsigned char const *p,
 	return crc;
 }
 
-#if CRC_LE_BITS == 1
+#if CRC_BE_BITS == 1
 u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len)
 {
 	return crc32_be_generic(crc, p, len, NULL, CRC32_POLY_BE);
-- 
cgit v1.2.3


From 6a6155f664e31c9be43cd690541a9a682ba3dc22 Mon Sep 17 00:00:00 2001
From: George Popescu <georgepope@android.com>
Date: Thu, 15 Oct 2020 20:13:38 -0700
Subject: ubsan: introduce CONFIG_UBSAN_LOCAL_BOUNDS for Clang

When the kernel is compiled with Clang, -fsanitize=bounds expands to
-fsanitize=array-bounds and -fsanitize=local-bounds.

Enabling -fsanitize=local-bounds with Clang has the unfortunate
side-effect of inserting traps; this goes back to its original intent,
which was as a hardening and not a debugging feature [1].  The same
feature made its way into -fsanitize=bounds, but the traps remained.  For
that reason, -fsanitize=bounds was split into 'array-bounds' and
'local-bounds' [2].

Since 'local-bounds' doesn't behave like a normal sanitizer, enable it
with Clang only if trapping behaviour was requested by
CONFIG_UBSAN_TRAP=y.

Add the UBSAN_BOUNDS_LOCAL config to Kconfig.ubsan to enable the
'local-bounds' option by default when UBSAN_TRAP is enabled.

[1] http://lists.llvm.org/pipermail/llvm-dev/2012-May/049972.html
[2] http://lists.llvm.org/pipermail/cfe-commits/Week-of-Mon-20131021/091536.html

Suggested-by: Marco Elver <elver@google.com>
Signed-off-by: George Popescu <georgepope@android.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: David Brazdil <dbrazdil@google.com>
Reviewed-by: Marco Elver <elver@google.com>
Cc: Masahiro Yamada <masahiroy@kernel.org>
Cc: Michal Marek <michal.lkml@markovi.net>
Cc: Nathan Chancellor <natechancellor@gmail.com>
Cc: Nick Desaulniers <ndesaulniers@google.com>
Cc: Kees Cook <keescook@chromium.org>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Peter Zijlstra <a.p.zijlstra@chello.nl>
Link: https://lkml.kernel.org/r/20200922074330.2549523-1-georgepope@google.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 lib/Kconfig.ubsan      | 14 ++++++++++++++
 scripts/Makefile.ubsan | 10 +++++++++-
 2 files changed, 23 insertions(+), 1 deletion(-)

(limited to 'lib')

diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan
index 774315de555a..58f8d03d037b 100644
--- a/lib/Kconfig.ubsan
+++ b/lib/Kconfig.ubsan
@@ -47,6 +47,20 @@ config UBSAN_BOUNDS
 	  to the {str,mem}*cpy() family of functions (that is addressed
 	  by CONFIG_FORTIFY_SOURCE).
 
+config UBSAN_LOCAL_BOUNDS
+	bool "Perform array local bounds checking"
+	depends on UBSAN_TRAP
+	depends on CC_IS_CLANG
+	depends on !UBSAN_KCOV_BROKEN
+	help
+	  This option enables -fsanitize=local-bounds which traps when an
+	  exception/error is detected. Therefore, it should be enabled only
+	  if trapping is expected.
+	  Enabling this option detects errors due to accesses through a
+	  pointer that is derived from an object of a statically-known size,
+	  where an added offset (which may not be known statically) is
+	  out-of-bounds.
+
 config UBSAN_MISC
 	bool "Enable all other Undefined Behavior sanity checks"
 	default UBSAN
diff --git a/scripts/Makefile.ubsan b/scripts/Makefile.ubsan
index 27348029b2b8..4e3fff0745e8 100644
--- a/scripts/Makefile.ubsan
+++ b/scripts/Makefile.ubsan
@@ -4,7 +4,15 @@ ifdef CONFIG_UBSAN_ALIGNMENT
 endif
 
 ifdef CONFIG_UBSAN_BOUNDS
-      CFLAGS_UBSAN += $(call cc-option, -fsanitize=bounds)
+      ifdef CONFIG_CC_IS_CLANG
+            CFLAGS_UBSAN += -fsanitize=array-bounds
+      else
+            CFLAGS_UBSAN += $(call cc-option, -fsanitize=bounds)
+      endif
+endif
+
+ifdef CONFIG_UBSAN_LOCAL_BOUNDS
+      CFLAGS_UBSAN += -fsanitize=local-bounds
 endif
 
 ifdef CONFIG_UBSAN_MISC
-- 
cgit v1.2.3


From 2c739ced5886cd8c8361faa79a9522ec05174ed0 Mon Sep 17 00:00:00 2001
From: Albert van der Linde <alinde@google.com>
Date: Thu, 15 Oct 2020 20:13:46 -0700
Subject: lib, include/linux: add usercopy failure capability

Patch series "add fault injection to user memory access", v3.

The goal of this series is to improve testing of fault-tolerance in usages
of user memory access functions, by adding support for fault injection.

syzkaller/syzbot are using the existing fault injection modes and will use
this particular feature also.

The first patch adds failure injection capability for usercopy functions.
The second changes usercopy functions to use this new failure capability
(copy_from_user, ...).  The third patch adds get/put/clear_user failures
to x86.

This patch (of 3):

Add a failure injection capability to improve testing of fault-tolerance
in usages of user memory access functions.

Add CONFIG_FAULT_INJECTION_USERCOPY to enable faults in usercopy
functions.  The should_fail_usercopy function is to be called by these
functions (copy_from_user, get_user, ...) in order to fail or not.

Signed-off-by: Albert van der Linde <alinde@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Akinobu Mita <akinobu.mita@gmail.com>
Reviewed-by: Alexander Potapenko <glider@google.com>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andrey Konovalov <andreyknvl@google.com>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Marco Elver <elver@google.com>
Cc: Christoph Hellwig <hch@lst.de>
Link: http://lkml.kernel.org/r/20200831171733.955393-1-alinde@google.com
Link: http://lkml.kernel.org/r/20200831171733.955393-2-alinde@google.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 Documentation/admin-guide/kernel-parameters.txt   |  1 +
 Documentation/fault-injection/fault-injection.rst |  7 +++-
 include/linux/fault-inject-usercopy.h             | 22 +++++++++++++
 lib/Kconfig.debug                                 |  7 ++++
 lib/Makefile                                      |  1 +
 lib/fault-inject-usercopy.c                       | 39 +++++++++++++++++++++++
 6 files changed, 76 insertions(+), 1 deletion(-)
 create mode 100644 include/linux/fault-inject-usercopy.h
 create mode 100644 lib/fault-inject-usercopy.c

(limited to 'lib')

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index f7ac0e663976..6d9afb221ff7 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -1343,6 +1343,7 @@
 			current integrity status.
 
 	failslab=
+	fail_usercopy=
 	fail_page_alloc=
 	fail_make_request=[KNL]
 			General fault injection mechanism.
diff --git a/Documentation/fault-injection/fault-injection.rst b/Documentation/fault-injection/fault-injection.rst
index f850ad018b70..31ecfe44e5b4 100644
--- a/Documentation/fault-injection/fault-injection.rst
+++ b/Documentation/fault-injection/fault-injection.rst
@@ -16,6 +16,10 @@ Available fault injection capabilities
 
   injects page allocation failures. (alloc_pages(), get_free_pages(), ...)
 
+- fail_usercopy
+
+  injects failures in user memory access functions. (copy_from_user(), get_user(), ...)
+
 - fail_futex
 
   injects futex deadlock and uaddr fault errors.
@@ -177,6 +181,7 @@ use the boot option::
 
 	failslab=
 	fail_page_alloc=
+	fail_usercopy=
 	fail_make_request=
 	fail_futex=
 	mmc_core.fail_request=<interval>,<probability>,<space>,<times>
@@ -222,7 +227,7 @@ How to add new fault injection capability
 
 - debugfs entries
 
-  failslab, fail_page_alloc, and fail_make_request use this way.
+  failslab, fail_page_alloc, fail_usercopy, and fail_make_request use this way.
   Helper functions:
 
 	fault_create_debugfs_attr(name, parent, attr);
diff --git a/include/linux/fault-inject-usercopy.h b/include/linux/fault-inject-usercopy.h
new file mode 100644
index 000000000000..56c3a693fdd9
--- /dev/null
+++ b/include/linux/fault-inject-usercopy.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __LINUX_FAULT_INJECT_USERCOPY_H__
+#define __LINUX_FAULT_INJECT_USERCOPY_H__
+
+/*
+ * This header provides a wrapper for injecting failures to user space memory
+ * access functions.
+ */
+
+#include <linux/types.h>
+
+#ifdef CONFIG_FAULT_INJECTION_USERCOPY
+
+bool should_fail_usercopy(void);
+
+#else
+
+static inline bool should_fail_usercopy(void) { return false; }
+
+#endif /* CONFIG_FAULT_INJECTION_USERCOPY */
+
+#endif /* __LINUX_FAULT_INJECT_USERCOPY_H__ */
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug
index 491789a793ae..ebe5ab111e65 100644
--- a/lib/Kconfig.debug
+++ b/lib/Kconfig.debug
@@ -1768,6 +1768,13 @@ config FAIL_PAGE_ALLOC
 	help
 	  Provide fault-injection capability for alloc_pages().
 
+config FAULT_INJECTION_USERCOPY
+	bool "Fault injection capability for usercopy functions"
+	depends on FAULT_INJECTION
+	help
+	  Provides fault-injection capability to inject failures
+	  in usercopy functions (copy_from_user(), get_user(), ...).
+
 config FAIL_MAKE_REQUEST
 	bool "Fault-injection capability for disk IO"
 	depends on FAULT_INJECTION && BLOCK
diff --git a/lib/Makefile b/lib/Makefile
index 49a2a9e36224..1c7577b2e86a 100644
--- a/lib/Makefile
+++ b/lib/Makefile
@@ -210,6 +210,7 @@ obj-$(CONFIG_AUDIT_COMPAT_GENERIC) += compat_audit.o
 
 obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o
 obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o
+obj-$(CONFIG_FAULT_INJECTION_USERCOPY) += fault-inject-usercopy.o
 obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o
 obj-$(CONFIG_PM_NOTIFIER_ERROR_INJECT) += pm-notifier-error-inject.o
 obj-$(CONFIG_NETDEV_NOTIFIER_ERROR_INJECT) += netdev-notifier-error-inject.o
diff --git a/lib/fault-inject-usercopy.c b/lib/fault-inject-usercopy.c
new file mode 100644
index 000000000000..77558b6c29ca
--- /dev/null
+++ b/lib/fault-inject-usercopy.c
@@ -0,0 +1,39 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <linux/fault-inject.h>
+#include <linux/fault-inject-usercopy.h>
+
+static struct {
+	struct fault_attr attr;
+} fail_usercopy = {
+	.attr = FAULT_ATTR_INITIALIZER,
+};
+
+static int __init setup_fail_usercopy(char *str)
+{
+	return setup_fault_attr(&fail_usercopy.attr, str);
+}
+__setup("fail_usercopy=", setup_fail_usercopy);
+
+#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS
+
+static int __init fail_usercopy_debugfs(void)
+{
+	struct dentry *dir;
+
+	dir = fault_create_debugfs_attr("fail_usercopy", NULL,
+					&fail_usercopy.attr);
+	if (IS_ERR(dir))
+		return PTR_ERR(dir);
+
+	return 0;
+}
+
+late_initcall(fail_usercopy_debugfs);
+
+#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */
+
+bool should_fail_usercopy(void)
+{
+	return should_fail(&fail_usercopy.attr, 1);
+}
+EXPORT_SYMBOL_GPL(should_fail_usercopy);
-- 
cgit v1.2.3


From 4d0e9df5e43dba52d38b251e3b909df8fa1110be Mon Sep 17 00:00:00 2001
From: Albert van der Linde <alinde@google.com>
Date: Thu, 15 Oct 2020 20:13:50 -0700
Subject: lib, uaccess: add failure injection to usercopy functions

To test fault-tolerance of user memory access functions, introduce fault
injection to usercopy functions.

If a failure is expected return either -EFAULT or the total amount of
bytes that were not copied.

Signed-off-by: Albert van der Linde <alinde@google.com>
Signed-off-by: Andrew Morton <akpm@linux-foundation.org>
Reviewed-by: Akinobu Mita <akinobu.mita@gmail.com>
Reviewed-by: Alexander Potapenko <glider@google.com>
Cc: Al Viro <viro@zeniv.linux.org.uk>
Cc: Andrey Konovalov <andreyknvl@google.com>
Cc: Arnd Bergmann <arnd@arndb.de>
Cc: Borislav Petkov <bp@alien8.de>
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: "H. Peter Anvin" <hpa@zytor.com>
Cc: Ingo Molnar <mingo@redhat.com>
Cc: Jonathan Corbet <corbet@lwn.net>
Cc: Marco Elver <elver@google.com>
Cc: Peter Zijlstra (Intel) <peterz@infradead.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Christoph Hellwig <hch@lst.de>
Link: http://lkml.kernel.org/r/20200831171733.955393-3-alinde@google.com
Signed-off-by: Linus Torvalds <torvalds@linux-foundation.org>
---
 include/linux/uaccess.h | 11 ++++++++++-
 lib/iov_iter.c          |  5 +++++
 lib/strncpy_from_user.c |  3 +++
 lib/usercopy.c          |  5 ++++-
 4 files changed, 22 insertions(+), 2 deletions(-)

(limited to 'lib')

diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h
index ef084eacaa7c..1b8c9d6162bc 100644
--- a/include/linux/uaccess.h
+++ b/include/linux/uaccess.h
@@ -2,6 +2,7 @@
 #ifndef __LINUX_UACCESS_H__
 #define __LINUX_UACCESS_H__
 
+#include <linux/fault-inject-usercopy.h>
 #include <linux/instrumented.h>
 #include <linux/minmax.h>
 #include <linux/sched.h>
@@ -84,6 +85,8 @@ static __always_inline __must_check unsigned long
 __copy_from_user(void *to, const void __user *from, unsigned long n)
 {
 	might_fault();
+	if (should_fail_usercopy())
+		return n;
 	instrument_copy_from_user(to, from, n);
 	check_object_size(to, n, false);
 	return raw_copy_from_user(to, from, n);
@@ -105,6 +108,8 @@ __copy_from_user(void *to, const void __user *from, unsigned long n)
 static __always_inline __must_check unsigned long
 __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n)
 {
+	if (should_fail_usercopy())
+		return n;
 	instrument_copy_to_user(to, from, n);
 	check_object_size(from, n, true);
 	return raw_copy_to_user(to, from, n);
@@ -114,6 +119,8 @@ static __always_inline __must_check unsigned long
 __copy_to_user(void __user *to, const void *from, unsigned long n)
 {
 	might_fault();
+	if (should_fail_usercopy())
+		return n;
 	instrument_copy_to_user(to, from, n);
 	check_object_size(from, n, true);
 	return raw_copy_to_user(to, from, n);
@@ -125,7 +132,7 @@ _copy_from_user(void *to, const void __user *from, unsigned long n)
 {
 	unsigned long res = n;
 	might_fault();
-	if (likely(access_ok(from, n))) {
+	if (!should_fail_usercopy() && likely(access_ok(from, n))) {
 		instrument_copy_from_user(to, from, n);
 		res = raw_copy_from_user(to, from, n);
 	}
@@ -143,6 +150,8 @@ static inline __must_check unsigned long
 _copy_to_user(void __user *to, const void *from, unsigned long n)
 {
 	might_fault();
+	if (should_fail_usercopy())
+		return n;
 	if (access_ok(to, n)) {
 		instrument_copy_to_user(to, from, n);
 		n = raw_copy_to_user(to, from, n);
diff --git a/lib/iov_iter.c b/lib/iov_iter.c
index 14cae2584db4..1635111c5bd2 100644
--- a/lib/iov_iter.c
+++ b/lib/iov_iter.c
@@ -2,6 +2,7 @@
 #include <crypto/hash.h>
 #include <linux/export.h>
 #include <linux/bvec.h>
+#include <linux/fault-inject-usercopy.h>
 #include <linux/uio.h>
 #include <linux/pagemap.h>
 #include <linux/slab.h>
@@ -140,6 +141,8 @@
 
 static int copyout(void __user *to, const void *from, size_t n)
 {
+	if (should_fail_usercopy())
+		return n;
 	if (access_ok(to, n)) {
 		instrument_copy_to_user(to, from, n);
 		n = raw_copy_to_user(to, from, n);
@@ -149,6 +152,8 @@ static int copyout(void __user *to, const void *from, size_t n)
 
 static int copyin(void *to, const void __user *from, size_t n)
 {
+	if (should_fail_usercopy())
+		return n;
 	if (access_ok(from, n)) {
 		instrument_copy_from_user(to, from, n);
 		n = raw_copy_from_user(to, from, n);
diff --git a/lib/strncpy_from_user.c b/lib/strncpy_from_user.c
index 34696a348864..e6d5fcc2cdf3 100644
--- a/lib/strncpy_from_user.c
+++ b/lib/strncpy_from_user.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/compiler.h>
 #include <linux/export.h>
+#include <linux/fault-inject-usercopy.h>
 #include <linux/kasan-checks.h>
 #include <linux/thread_info.h>
 #include <linux/uaccess.h>
@@ -99,6 +100,8 @@ long strncpy_from_user(char *dst, const char __user *src, long count)
 	unsigned long max_addr, src_addr;
 
 	might_fault();
+	if (should_fail_usercopy())
+		return -EFAULT;
 	if (unlikely(count <= 0))
 		return 0;
 
diff --git a/lib/usercopy.c b/lib/usercopy.c
index b26509f112f9..7413dd300516 100644
--- a/lib/usercopy.c
+++ b/lib/usercopy.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <linux/bitops.h>
+#include <linux/fault-inject-usercopy.h>
 #include <linux/instrumented.h>
 #include <linux/uaccess.h>
 
@@ -10,7 +11,7 @@ unsigned long _copy_from_user(void *to, const void __user *from, unsigned long n
 {
 	unsigned long res = n;
 	might_fault();
-	if (likely(access_ok(from, n))) {
+	if (!should_fail_usercopy() && likely(access_ok(from, n))) {
 		instrument_copy_from_user(to, from, n);
 		res = raw_copy_from_user(to, from, n);
 	}
@@ -25,6 +26,8 @@ EXPORT_SYMBOL(_copy_from_user);
 unsigned long _copy_to_user(void __user *to, const void *from, unsigned long n)
 {
 	might_fault();
+	if (should_fail_usercopy())
+		return n;
 	if (likely(access_ok(to, n))) {
 		instrument_copy_to_user(to, from, n);
 		n = raw_copy_to_user(to, from, n);
-- 
cgit v1.2.3


From 294a7f1613ee49a608361bd319519561c0ca7e72 Mon Sep 17 00:00:00 2001
From: Vitor Massaru Iha <vitor@massaru.org>
Date: Thu, 15 Oct 2020 09:08:51 -0300
Subject: lib: kunit: Fix compilation test when using TEST_BIT_FIELD_COMPILE

A build condition was missing around a compilation test, this compilation
test comes from the original test_bitfield code.

And removed unnecessary code for this test.

Fixes: d2585f5164c2 ("lib: kunit: add bitfield test conversion to KUnit")
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Vitor Massaru Iha <vitor@massaru.org>
Link: https://lore.kernel.org/linux-next/20201015163056.56fcc835@canb.auug.org.au/
Reviewed-by: Brendan Higgins <brendanhiggins@google.com>
Signed-off-by: Shuah Khan <skhan@linuxfoundation.org>
---
 lib/bitfield_kunit.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'lib')

diff --git a/lib/bitfield_kunit.c b/lib/bitfield_kunit.c
index d63a2be5aff8..1473d8b4bf0f 100644
--- a/lib/bitfield_kunit.c
+++ b/lib/bitfield_kunit.c
@@ -125,7 +125,7 @@ static void __init test_bitfields_variables(struct kunit *context)
 	CHECK(u64, 0x0000001f8000000ull);
 }
 
-
+#ifdef TEST_BITFIELD_COMPILE
 static void __init test_bitfields_compile(struct kunit *context)
 {
 	/* these should fail compilation */
@@ -135,13 +135,11 @@ static void __init test_bitfields_compile(struct kunit *context)
 	/* this should at least give a warning */
 	u16_encode_bits(0, 0x60000);
 }
+#endif
 
 static struct kunit_case __refdata bitfields_test_cases[] = {
 	KUNIT_CASE(test_bitfields_constants),
 	KUNIT_CASE(test_bitfields_variables),
-#ifdef TEST_BITFIELD_COMPILE
-	KUNIT_CASE(test_bitfields_compile),
-#endif
 	{}
 };
 
-- 
cgit v1.2.3


From 272d70895113ef00c03ab325787d159ee51718c8 Mon Sep 17 00:00:00 2001
From: Peilin Ye <yepeilin.cs@gmail.com>
Date: Sun, 18 Oct 2020 14:12:04 -0400
Subject: Fonts: Support FONT_EXTRA_WORDS macros for font_6x8

Recently, in commit 6735b4632def ("Fonts: Support FONT_EXTRA_WORDS macros
for built-in fonts"), we wrapped each of our built-in data buffers in a
`font_data` structure, in order to use the following macros on them, see
include/linux/font.h:

	#define REFCOUNT(fd)	(((int *)(fd))[-1])
	#define FNTSIZE(fd)	(((int *)(fd))[-2])
	#define FNTCHARCNT(fd)	(((int *)(fd))[-3])
	#define FNTSUM(fd)	(((int *)(fd))[-4])

	#define FONT_EXTRA_WORDS 4

Do the same thing to our new 6x8 font. For built-in fonts, currently we
only use FNTSIZE(). Since this is only a temporary solution for an
out-of-bounds issue in the framebuffer layer (see commit 5af08640795b
("fbcon: Fix global-out-of-bounds read in fbcon_get_font()")), all the
three other fields are intentionally set to zero in order to discourage
using these negative-indexing macros.

Signed-off-by: Peilin Ye <yepeilin.cs@gmail.com>
Signed-off-by: Daniel Vetter <daniel.vetter@ffwll.ch>
Link: https://patchwork.freedesktop.org/patch/msgid/926453876c92caac34cba8545716a491754d04d5.1603037079.git.yepeilin.cs@gmail.com
---
 lib/fonts/font_6x8.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

(limited to 'lib')

diff --git a/lib/fonts/font_6x8.c b/lib/fonts/font_6x8.c
index e06447788418..700039a9ceae 100644
--- a/lib/fonts/font_6x8.c
+++ b/lib/fonts/font_6x8.c
@@ -3,8 +3,8 @@
 
 #define FONTDATAMAX 2048
 
-static const unsigned char fontdata_6x8[FONTDATAMAX] = {
-
+static struct font_data fontdata_6x8 = {
+	{ 0, 0, FONTDATAMAX, 0 }, {
 	/* 0 0x00 '^@' */
 	0x00, /* 000000 */
 	0x00, /* 000000 */
@@ -2564,13 +2564,13 @@ static const unsigned char fontdata_6x8[FONTDATAMAX] = {
 	0x00, /* 000000 */
 	0x00, /* 000000 */
 	0x00, /* 000000 */
-};
+} };
 
 const struct font_desc font_6x8 = {
 	.idx	= FONT6x8_IDX,
 	.name	= "6x8",
 	.width	= 6,
 	.height	= 8,
-	.data	= fontdata_6x8,
+	.data	= fontdata_6x8.data,
 	.pref	= 0,
 };
-- 
cgit v1.2.3


From c51f8f88d705e06bd696d7510aff22b33eb8e638 Mon Sep 17 00:00:00 2001
From: George Spelvin <lkml@sdf.org>
Date: Sun, 9 Aug 2020 06:57:44 +0000
Subject: random32: make prandom_u32() output unpredictable

Non-cryptographic PRNGs may have great statistical properties, but
are usually trivially predictable to someone who knows the algorithm,
given a small sample of their output.  An LFSR like prandom_u32() is
particularly simple, even if the sample is widely scattered bits.

It turns out the network stack uses prandom_u32() for some things like
random port numbers which it would prefer are *not* trivially predictable.
Predictability led to a practical DNS spoofing attack.  Oops.

This patch replaces the LFSR with a homebrew cryptographic PRNG based
on the SipHash round function, which is in turn seeded with 128 bits
of strong random key.  (The authors of SipHash have *not* been consulted
about this abuse of their algorithm.)  Speed is prioritized over security;
attacks are rare, while performance is always wanted.

Replacing all callers of prandom_u32() is the quick fix.
Whether to reinstate a weaker PRNG for uses which can tolerate it
is an open question.

Commit f227e3ec3b5c ("random32: update the net random state on interrupt
and activity") was an earlier attempt at a solution.  This patch replaces
it.

Reported-by: Amit Klein <aksecurity@gmail.com>
Cc: Willy Tarreau <w@1wt.eu>
Cc: Eric Dumazet <edumazet@google.com>
Cc: "Jason A. Donenfeld" <Jason@zx2c4.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: tytso@mit.edu
Cc: Florian Westphal <fw@strlen.de>
Cc: Marc Plumb <lkml.mplumb@gmail.com>
Fixes: f227e3ec3b5c ("random32: update the net random state on interrupt and activity")
Signed-off-by: George Spelvin <lkml@sdf.org>
Link: https://lore.kernel.org/netdev/20200808152628.GA27941@SDF.ORG/
[ willy: partial reversal of f227e3ec3b5c; moved SIPROUND definitions
  to prandom.h for later use; merged George's prandom_seed() proposal;
  inlined siprand_u32(); replaced the net_rand_state[] array with 4
  members to fix a build issue; cosmetic cleanups to make checkpatch
  happy; fixed RANDOM32_SELFTEST build ]
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 drivers/char/random.c   |   1 -
 include/linux/prandom.h |  36 +++-
 kernel/time/timer.c     |   7 -
 lib/random32.c          | 464 +++++++++++++++++++++++++++++-------------------
 4 files changed, 318 insertions(+), 190 deletions(-)

(limited to 'lib')

diff --git a/drivers/char/random.c b/drivers/char/random.c
index d20ba1b104ca..2a41b21623ae 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -1277,7 +1277,6 @@ void add_interrupt_randomness(int irq, int irq_flags)
 
 	fast_mix(fast_pool);
 	add_interrupt_bench(cycles);
-	this_cpu_add(net_rand_state.s1, fast_pool->pool[cycles & 3]);
 
 	if (unlikely(crng_init == 0)) {
 		if ((fast_pool->count >= 64) &&
diff --git a/include/linux/prandom.h b/include/linux/prandom.h
index aa16e6468f91..cc1e71334e53 100644
--- a/include/linux/prandom.h
+++ b/include/linux/prandom.h
@@ -16,12 +16,44 @@ void prandom_bytes(void *buf, size_t nbytes);
 void prandom_seed(u32 seed);
 void prandom_reseed_late(void);
 
+#if BITS_PER_LONG == 64
+/*
+ * The core SipHash round function.  Each line can be executed in
+ * parallel given enough CPU resources.
+ */
+#define PRND_SIPROUND(v0, v1, v2, v3) ( \
+	v0 += v1, v1 = rol64(v1, 13),  v2 += v3, v3 = rol64(v3, 16), \
+	v1 ^= v0, v0 = rol64(v0, 32),  v3 ^= v2,                     \
+	v0 += v3, v3 = rol64(v3, 21),  v2 += v1, v1 = rol64(v1, 17), \
+	v3 ^= v0,                      v1 ^= v2, v2 = rol64(v2, 32)  \
+)
+
+#define PRND_K0 (0x736f6d6570736575 ^ 0x6c7967656e657261)
+#define PRND_K1 (0x646f72616e646f6d ^ 0x7465646279746573)
+
+#elif BITS_PER_LONG == 32
+/*
+ * On 32-bit machines, we use HSipHash, a reduced-width version of SipHash.
+ * This is weaker, but 32-bit machines are not used for high-traffic
+ * applications, so there is less output for an attacker to analyze.
+ */
+#define PRND_SIPROUND(v0, v1, v2, v3) ( \
+	v0 += v1, v1 = rol32(v1,  5),  v2 += v3, v3 = rol32(v3,  8), \
+	v1 ^= v0, v0 = rol32(v0, 16),  v3 ^= v2,                     \
+	v0 += v3, v3 = rol32(v3,  7),  v2 += v1, v1 = rol32(v1, 13), \
+	v3 ^= v0,                      v1 ^= v2, v2 = rol32(v2, 16)  \
+)
+#define PRND_K0 0x6c796765
+#define PRND_K1 0x74656462
+
+#else
+#error Unsupported BITS_PER_LONG
+#endif
+
 struct rnd_state {
 	__u32 s1, s2, s3, s4;
 };
 
-DECLARE_PER_CPU(struct rnd_state, net_rand_state);
-
 u32 prandom_u32_state(struct rnd_state *state);
 void prandom_bytes_state(struct rnd_state *state, void *buf, size_t nbytes);
 void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state);
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index dda05f4b7a1f..3e341af741b9 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1717,13 +1717,6 @@ void update_process_times(int user_tick)
 	scheduler_tick();
 	if (IS_ENABLED(CONFIG_POSIX_TIMERS))
 		run_posix_cpu_timers();
-
-	/* The current CPU might make use of net randoms without receiving IRQs
-	 * to renew them often enough. Let's update the net_rand_state from a
-	 * non-constant value that's not affine to the number of calls to make
-	 * sure it's updated when there's some activity (we don't care in idle).
-	 */
-	this_cpu_add(net_rand_state.s1, rol32(jiffies, 24) + user_tick);
 }
 
 /**
diff --git a/lib/random32.c b/lib/random32.c
index dfb9981ab798..be9f242a4207 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -41,16 +41,6 @@
 #include <asm/unaligned.h>
 #include <trace/events/random.h>
 
-#ifdef CONFIG_RANDOM32_SELFTEST
-static void __init prandom_state_selftest(void);
-#else
-static inline void prandom_state_selftest(void)
-{
-}
-#endif
-
-DEFINE_PER_CPU(struct rnd_state, net_rand_state)  __latent_entropy;
-
 /**
  *	prandom_u32_state - seeded pseudo-random number generator.
  *	@state: pointer to state structure holding seeded state.
@@ -70,26 +60,6 @@ u32 prandom_u32_state(struct rnd_state *state)
 }
 EXPORT_SYMBOL(prandom_u32_state);
 
-/**
- *	prandom_u32 - pseudo random number generator
- *
- *	A 32 bit pseudo-random number is generated using a fast
- *	algorithm suitable for simulation. This algorithm is NOT
- *	considered safe for cryptographic use.
- */
-u32 prandom_u32(void)
-{
-	struct rnd_state *state = &get_cpu_var(net_rand_state);
-	u32 res;
-
-	res = prandom_u32_state(state);
-	trace_prandom_u32(res);
-	put_cpu_var(net_rand_state);
-
-	return res;
-}
-EXPORT_SYMBOL(prandom_u32);
-
 /**
  *	prandom_bytes_state - get the requested number of pseudo-random bytes
  *
@@ -121,20 +91,6 @@ void prandom_bytes_state(struct rnd_state *state, void *buf, size_t bytes)
 }
 EXPORT_SYMBOL(prandom_bytes_state);
 
-/**
- *	prandom_bytes - get the requested number of pseudo-random bytes
- *	@buf: where to copy the pseudo-random bytes to
- *	@bytes: the requested number of bytes
- */
-void prandom_bytes(void *buf, size_t bytes)
-{
-	struct rnd_state *state = &get_cpu_var(net_rand_state);
-
-	prandom_bytes_state(state, buf, bytes);
-	put_cpu_var(net_rand_state);
-}
-EXPORT_SYMBOL(prandom_bytes);
-
 static void prandom_warmup(struct rnd_state *state)
 {
 	/* Calling RNG ten times to satisfy recurrence condition */
@@ -150,96 +106,6 @@ static void prandom_warmup(struct rnd_state *state)
 	prandom_u32_state(state);
 }
 
-static u32 __extract_hwseed(void)
-{
-	unsigned int val = 0;
-
-	(void)(arch_get_random_seed_int(&val) ||
-	       arch_get_random_int(&val));
-
-	return val;
-}
-
-static void prandom_seed_early(struct rnd_state *state, u32 seed,
-			       bool mix_with_hwseed)
-{
-#define LCG(x)	 ((x) * 69069U)	/* super-duper LCG */
-#define HWSEED() (mix_with_hwseed ? __extract_hwseed() : 0)
-	state->s1 = __seed(HWSEED() ^ LCG(seed),        2U);
-	state->s2 = __seed(HWSEED() ^ LCG(state->s1),   8U);
-	state->s3 = __seed(HWSEED() ^ LCG(state->s2),  16U);
-	state->s4 = __seed(HWSEED() ^ LCG(state->s3), 128U);
-}
-
-/**
- *	prandom_seed - add entropy to pseudo random number generator
- *	@entropy: entropy value
- *
- *	Add some additional entropy to the prandom pool.
- */
-void prandom_seed(u32 entropy)
-{
-	int i;
-	/*
-	 * No locking on the CPUs, but then somewhat random results are, well,
-	 * expected.
-	 */
-	for_each_possible_cpu(i) {
-		struct rnd_state *state = &per_cpu(net_rand_state, i);
-
-		state->s1 = __seed(state->s1 ^ entropy, 2U);
-		prandom_warmup(state);
-	}
-}
-EXPORT_SYMBOL(prandom_seed);
-
-/*
- *	Generate some initially weak seeding values to allow
- *	to start the prandom_u32() engine.
- */
-static int __init prandom_init(void)
-{
-	int i;
-
-	prandom_state_selftest();
-
-	for_each_possible_cpu(i) {
-		struct rnd_state *state = &per_cpu(net_rand_state, i);
-		u32 weak_seed = (i + jiffies) ^ random_get_entropy();
-
-		prandom_seed_early(state, weak_seed, true);
-		prandom_warmup(state);
-	}
-
-	return 0;
-}
-core_initcall(prandom_init);
-
-static void __prandom_timer(struct timer_list *unused);
-
-static DEFINE_TIMER(seed_timer, __prandom_timer);
-
-static void __prandom_timer(struct timer_list *unused)
-{
-	u32 entropy;
-	unsigned long expires;
-
-	get_random_bytes(&entropy, sizeof(entropy));
-	prandom_seed(entropy);
-
-	/* reseed every ~60 seconds, in [40 .. 80) interval with slack */
-	expires = 40 + prandom_u32_max(40);
-	seed_timer.expires = jiffies + msecs_to_jiffies(expires * MSEC_PER_SEC);
-
-	add_timer(&seed_timer);
-}
-
-static void __init __prandom_start_seed_timer(void)
-{
-	seed_timer.expires = jiffies + msecs_to_jiffies(40 * MSEC_PER_SEC);
-	add_timer(&seed_timer);
-}
-
 void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state)
 {
 	int i;
@@ -259,51 +125,6 @@ void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state)
 }
 EXPORT_SYMBOL(prandom_seed_full_state);
 
-/*
- *	Generate better values after random number generator
- *	is fully initialized.
- */
-static void __prandom_reseed(bool late)
-{
-	unsigned long flags;
-	static bool latch = false;
-	static DEFINE_SPINLOCK(lock);
-
-	/* Asking for random bytes might result in bytes getting
-	 * moved into the nonblocking pool and thus marking it
-	 * as initialized. In this case we would double back into
-	 * this function and attempt to do a late reseed.
-	 * Ignore the pointless attempt to reseed again if we're
-	 * already waiting for bytes when the nonblocking pool
-	 * got initialized.
-	 */
-
-	/* only allow initial seeding (late == false) once */
-	if (!spin_trylock_irqsave(&lock, flags))
-		return;
-
-	if (latch && !late)
-		goto out;
-
-	latch = true;
-	prandom_seed_full_state(&net_rand_state);
-out:
-	spin_unlock_irqrestore(&lock, flags);
-}
-
-void prandom_reseed_late(void)
-{
-	__prandom_reseed(true);
-}
-
-static int __init prandom_reseed(void)
-{
-	__prandom_reseed(false);
-	__prandom_start_seed_timer();
-	return 0;
-}
-late_initcall(prandom_reseed);
-
 #ifdef CONFIG_RANDOM32_SELFTEST
 static struct prandom_test1 {
 	u32 seed;
@@ -423,7 +244,28 @@ static struct prandom_test2 {
 	{  407983964U, 921U,  728767059U },
 };
 
-static void __init prandom_state_selftest(void)
+static u32 __extract_hwseed(void)
+{
+	unsigned int val = 0;
+
+	(void)(arch_get_random_seed_int(&val) ||
+	       arch_get_random_int(&val));
+
+	return val;
+}
+
+static void prandom_seed_early(struct rnd_state *state, u32 seed,
+			       bool mix_with_hwseed)
+{
+#define LCG(x)	 ((x) * 69069U)	/* super-duper LCG */
+#define HWSEED() (mix_with_hwseed ? __extract_hwseed() : 0)
+	state->s1 = __seed(HWSEED() ^ LCG(seed),        2U);
+	state->s2 = __seed(HWSEED() ^ LCG(state->s1),   8U);
+	state->s3 = __seed(HWSEED() ^ LCG(state->s2),  16U);
+	state->s4 = __seed(HWSEED() ^ LCG(state->s3), 128U);
+}
+
+static int __init prandom_state_selftest(void)
 {
 	int i, j, errors = 0, runs = 0;
 	bool error = false;
@@ -463,5 +305,267 @@ static void __init prandom_state_selftest(void)
 		pr_warn("prandom: %d/%d self tests failed\n", errors, runs);
 	else
 		pr_info("prandom: %d self tests passed\n", runs);
+	return 0;
 }
+core_initcall(prandom_state_selftest);
 #endif
+
+/*
+ * The prandom_u32() implementation is now completely separate from the
+ * prandom_state() functions, which are retained (for now) for compatibility.
+ *
+ * Because of (ab)use in the networking code for choosing random TCP/UDP port
+ * numbers, which open DoS possibilities if guessable, we want something
+ * stronger than a standard PRNG.  But the performance requirements of
+ * the network code do not allow robust crypto for this application.
+ *
+ * So this is a homebrew Junior Spaceman implementation, based on the
+ * lowest-latency trustworthy crypto primitive available, SipHash.
+ * (The authors of SipHash have not been consulted about this abuse of
+ * their work.)
+ *
+ * Standard SipHash-2-4 uses 2n+4 rounds to hash n words of input to
+ * one word of output.  This abbreviated version uses 2 rounds per word
+ * of output.
+ */
+
+struct siprand_state {
+	unsigned long v0;
+	unsigned long v1;
+	unsigned long v2;
+	unsigned long v3;
+};
+
+static DEFINE_PER_CPU(struct siprand_state, net_rand_state) __latent_entropy;
+
+/*
+ * This is the core CPRNG function.  As "pseudorandom", this is not used
+ * for truly valuable things, just intended to be a PITA to guess.
+ * For maximum speed, we do just two SipHash rounds per word.  This is
+ * the same rate as 4 rounds per 64 bits that SipHash normally uses,
+ * so hopefully it's reasonably secure.
+ *
+ * There are two changes from the official SipHash finalization:
+ * - We omit some constants XORed with v2 in the SipHash spec as irrelevant;
+ *   they are there only to make the output rounds distinct from the input
+ *   rounds, and this application has no input rounds.
+ * - Rather than returning v0^v1^v2^v3, return v1+v3.
+ *   If you look at the SipHash round, the last operation on v3 is
+ *   "v3 ^= v0", so "v0 ^ v3" just undoes that, a waste of time.
+ *   Likewise "v1 ^= v2".  (The rotate of v2 makes a difference, but
+ *   it still cancels out half of the bits in v2 for no benefit.)
+ *   Second, since the last combining operation was xor, continue the
+ *   pattern of alternating xor/add for a tiny bit of extra non-linearity.
+ */
+static inline u32 siprand_u32(struct siprand_state *s)
+{
+	unsigned long v0 = s->v0, v1 = s->v1, v2 = s->v2, v3 = s->v3;
+
+	PRND_SIPROUND(v0, v1, v2, v3);
+	PRND_SIPROUND(v0, v1, v2, v3);
+	s->v0 = v0;  s->v1 = v1;  s->v2 = v2;  s->v3 = v3;
+	return v1 + v3;
+}
+
+
+/**
+ *	prandom_u32 - pseudo random number generator
+ *
+ *	A 32 bit pseudo-random number is generated using a fast
+ *	algorithm suitable for simulation. This algorithm is NOT
+ *	considered safe for cryptographic use.
+ */
+u32 prandom_u32(void)
+{
+	struct siprand_state *state = get_cpu_ptr(&net_rand_state);
+	u32 res = siprand_u32(state);
+
+	trace_prandom_u32(res);
+	put_cpu_ptr(&net_rand_state);
+	return res;
+}
+EXPORT_SYMBOL(prandom_u32);
+
+/**
+ *	prandom_bytes - get the requested number of pseudo-random bytes
+ *	@buf: where to copy the pseudo-random bytes to
+ *	@bytes: the requested number of bytes
+ */
+void prandom_bytes(void *buf, size_t bytes)
+{
+	struct siprand_state *state = get_cpu_ptr(&net_rand_state);
+	u8 *ptr = buf;
+
+	while (bytes >= sizeof(u32)) {
+		put_unaligned(siprand_u32(state), (u32 *)ptr);
+		ptr += sizeof(u32);
+		bytes -= sizeof(u32);
+	}
+
+	if (bytes > 0) {
+		u32 rem = siprand_u32(state);
+
+		do {
+			*ptr++ = (u8)rem;
+			rem >>= BITS_PER_BYTE;
+		} while (--bytes > 0);
+	}
+	put_cpu_ptr(&net_rand_state);
+}
+EXPORT_SYMBOL(prandom_bytes);
+
+/**
+ *	prandom_seed - add entropy to pseudo random number generator
+ *	@entropy: entropy value
+ *
+ *	Add some additional seed material to the prandom pool.
+ *	The "entropy" is actually our IP address (the only caller is
+ *	the network code), not for unpredictability, but to ensure that
+ *	different machines are initialized differently.
+ */
+void prandom_seed(u32 entropy)
+{
+	int i;
+
+	add_device_randomness(&entropy, sizeof(entropy));
+
+	for_each_possible_cpu(i) {
+		struct siprand_state *state = per_cpu_ptr(&net_rand_state, i);
+		unsigned long v0 = state->v0, v1 = state->v1;
+		unsigned long v2 = state->v2, v3 = state->v3;
+
+		do {
+			v3 ^= entropy;
+			PRND_SIPROUND(v0, v1, v2, v3);
+			PRND_SIPROUND(v0, v1, v2, v3);
+			v0 ^= entropy;
+		} while (unlikely(!v0 || !v1 || !v2 || !v3));
+
+		WRITE_ONCE(state->v0, v0);
+		WRITE_ONCE(state->v1, v1);
+		WRITE_ONCE(state->v2, v2);
+		WRITE_ONCE(state->v3, v3);
+	}
+}
+EXPORT_SYMBOL(prandom_seed);
+
+/*
+ *	Generate some initially weak seeding values to allow
+ *	the prandom_u32() engine to be started.
+ */
+static int __init prandom_init_early(void)
+{
+	int i;
+	unsigned long v0, v1, v2, v3;
+
+	if (!arch_get_random_long(&v0))
+		v0 = jiffies;
+	if (!arch_get_random_long(&v1))
+		v1 = random_get_entropy();
+	v2 = v0 ^ PRND_K0;
+	v3 = v1 ^ PRND_K1;
+
+	for_each_possible_cpu(i) {
+		struct siprand_state *state;
+
+		v3 ^= i;
+		PRND_SIPROUND(v0, v1, v2, v3);
+		PRND_SIPROUND(v0, v1, v2, v3);
+		v0 ^= i;
+
+		state = per_cpu_ptr(&net_rand_state, i);
+		state->v0 = v0;  state->v1 = v1;
+		state->v2 = v2;  state->v3 = v3;
+	}
+
+	return 0;
+}
+core_initcall(prandom_init_early);
+
+
+/* Stronger reseeding when available, and periodically thereafter. */
+static void prandom_reseed(struct timer_list *unused);
+
+static DEFINE_TIMER(seed_timer, prandom_reseed);
+
+static void prandom_reseed(struct timer_list *unused)
+{
+	unsigned long expires;
+	int i;
+
+	/*
+	 * Reinitialize each CPU's PRNG with 128 bits of key.
+	 * No locking on the CPUs, but then somewhat random results are,
+	 * well, expected.
+	 */
+	for_each_possible_cpu(i) {
+		struct siprand_state *state;
+		unsigned long v0 = get_random_long(), v2 = v0 ^ PRND_K0;
+		unsigned long v1 = get_random_long(), v3 = v1 ^ PRND_K1;
+#if BITS_PER_LONG == 32
+		int j;
+
+		/*
+		 * On 32-bit machines, hash in two extra words to
+		 * approximate 128-bit key length.  Not that the hash
+		 * has that much security, but this prevents a trivial
+		 * 64-bit brute force.
+		 */
+		for (j = 0; j < 2; j++) {
+			unsigned long m = get_random_long();
+
+			v3 ^= m;
+			PRND_SIPROUND(v0, v1, v2, v3);
+			PRND_SIPROUND(v0, v1, v2, v3);
+			v0 ^= m;
+		}
+#endif
+		/*
+		 * Probably impossible in practice, but there is a
+		 * theoretical risk that a race between this reseeding
+		 * and the target CPU writing its state back could
+		 * create the all-zero SipHash fixed point.
+		 *
+		 * To ensure that never happens, ensure the state
+		 * we write contains no zero words.
+		 */
+		state = per_cpu_ptr(&net_rand_state, i);
+		WRITE_ONCE(state->v0, v0 ? v0 : -1ul);
+		WRITE_ONCE(state->v1, v1 ? v1 : -1ul);
+		WRITE_ONCE(state->v2, v2 ? v2 : -1ul);
+		WRITE_ONCE(state->v3, v3 ? v3 : -1ul);
+	}
+
+	/* reseed every ~60 seconds, in [40 .. 80) interval with slack */
+	expires = round_jiffies(jiffies + 40 * HZ + prandom_u32_max(40 * HZ));
+	mod_timer(&seed_timer, expires);
+}
+
+/*
+ * The random ready callback can be called from almost any interrupt.
+ * To avoid worrying about whether it's safe to delay that interrupt
+ * long enough to seed all CPUs, just schedule an immediate timer event.
+ */
+static void prandom_timer_start(struct random_ready_callback *unused)
+{
+	mod_timer(&seed_timer, jiffies);
+}
+
+/*
+ * Start periodic full reseeding as soon as strong
+ * random numbers are available.
+ */
+static int __init prandom_init_late(void)
+{
+	static struct random_ready_callback random_ready = {
+		.func = prandom_timer_start
+	};
+	int ret = add_random_ready_callback(&random_ready);
+
+	if (ret == -EALREADY) {
+		prandom_timer_start(&random_ready);
+		ret = 0;
+	}
+	return ret;
+}
+late_initcall(prandom_init_late);
-- 
cgit v1.2.3


From 3744741adab6d9195551ce30e65e726c7a408421 Mon Sep 17 00:00:00 2001
From: Willy Tarreau <w@1wt.eu>
Date: Mon, 10 Aug 2020 10:27:42 +0200
Subject: random32: add noise from network and scheduling activity

With the removal of the interrupt perturbations in previous random32
change (random32: make prandom_u32() output unpredictable), the PRNG
has become 100% deterministic again. While SipHash is expected to be
way more robust against brute force than the previous Tausworthe LFSR,
there's still the risk that whoever has even one temporary access to
the PRNG's internal state is able to predict all subsequent draws till
the next reseed (roughly every minute). This may happen through a side
channel attack or any data leak.

This patch restores the spirit of commit f227e3ec3b5c ("random32: update
the net random state on interrupt and activity") in that it will perturb
the internal PRNG's statee using externally collected noise, except that
it will not pick that noise from the random pool's bits nor upon
interrupt, but will rather combine a few elements along the Tx path
that are collectively hard to predict, such as dev, skb and txq
pointers, packet length and jiffies values. These ones are combined
using a single round of SipHash into a single long variable that is
mixed with the net_rand_state upon each invocation.

The operation was inlined because it produces very small and efficient
code, typically 3 xor, 2 add and 2 rol. The performance was measured
to be the same (even very slightly better) than before the switch to
SipHash; on a 6-core 12-thread Core i7-8700k equipped with a 40G NIC
(i40e), the connection rate dropped from 556k/s to 555k/s while the
SYN cookie rate grew from 5.38 Mpps to 5.45 Mpps.

Link: https://lore.kernel.org/netdev/20200808152628.GA27941@SDF.ORG/
Cc: George Spelvin <lkml@sdf.org>
Cc: Amit Klein <aksecurity@gmail.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: "Jason A. Donenfeld" <Jason@zx2c4.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: tytso@mit.edu
Cc: Florian Westphal <fw@strlen.de>
Cc: Marc Plumb <lkml.mplumb@gmail.com>
Tested-by: Sedat Dilek <sedat.dilek@gmail.com>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 include/linux/prandom.h | 19 +++++++++++++++++++
 kernel/time/timer.c     |  2 ++
 lib/random32.c          |  5 +++++
 net/core/dev.c          |  4 ++++
 4 files changed, 30 insertions(+)

(limited to 'lib')

diff --git a/include/linux/prandom.h b/include/linux/prandom.h
index cc1e71334e53..bbf4b4ad61df 100644
--- a/include/linux/prandom.h
+++ b/include/linux/prandom.h
@@ -16,6 +16,12 @@ void prandom_bytes(void *buf, size_t nbytes);
 void prandom_seed(u32 seed);
 void prandom_reseed_late(void);
 
+DECLARE_PER_CPU(unsigned long, net_rand_noise);
+
+#define PRANDOM_ADD_NOISE(a, b, c, d) \
+	prandom_u32_add_noise((unsigned long)(a), (unsigned long)(b), \
+			      (unsigned long)(c), (unsigned long)(d))
+
 #if BITS_PER_LONG == 64
 /*
  * The core SipHash round function.  Each line can be executed in
@@ -50,6 +56,18 @@ void prandom_reseed_late(void);
 #error Unsupported BITS_PER_LONG
 #endif
 
+static inline void prandom_u32_add_noise(unsigned long a, unsigned long b,
+					 unsigned long c, unsigned long d)
+{
+	/*
+	 * This is not used cryptographically; it's just
+	 * a convenient 4-word hash function. (3 xor, 2 add, 2 rol)
+	 */
+	a ^= raw_cpu_read(net_rand_noise);
+	PRND_SIPROUND(a, b, c, d);
+	raw_cpu_write(net_rand_noise, d);
+}
+
 struct rnd_state {
 	__u32 s1, s2, s3, s4;
 };
@@ -99,6 +117,7 @@ static inline void prandom_seed_state(struct rnd_state *state, u64 seed)
 	state->s2 = __seed(i,   8U);
 	state->s3 = __seed(i,  16U);
 	state->s4 = __seed(i, 128U);
+	PRANDOM_ADD_NOISE(state, i, 0, 0);
 }
 
 /* Pseudo random number generator from numerical recipes. */
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index 3e341af741b9..de37e33a868d 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -1706,6 +1706,8 @@ void update_process_times(int user_tick)
 {
 	struct task_struct *p = current;
 
+	PRANDOM_ADD_NOISE(jiffies, user_tick, p, 0);
+
 	/* Note: this timer irq context must be accounted for as well. */
 	account_process_tick(p, user_tick);
 	run_local_timers();
diff --git a/lib/random32.c b/lib/random32.c
index be9f242a4207..7f047844e494 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -337,6 +337,8 @@ struct siprand_state {
 };
 
 static DEFINE_PER_CPU(struct siprand_state, net_rand_state) __latent_entropy;
+DEFINE_PER_CPU(unsigned long, net_rand_noise);
+EXPORT_PER_CPU_SYMBOL(net_rand_noise);
 
 /*
  * This is the core CPRNG function.  As "pseudorandom", this is not used
@@ -360,9 +362,12 @@ static DEFINE_PER_CPU(struct siprand_state, net_rand_state) __latent_entropy;
 static inline u32 siprand_u32(struct siprand_state *s)
 {
 	unsigned long v0 = s->v0, v1 = s->v1, v2 = s->v2, v3 = s->v3;
+	unsigned long n = raw_cpu_read(net_rand_noise);
 
+	v3 ^= n;
 	PRND_SIPROUND(v0, v1, v2, v3);
 	PRND_SIPROUND(v0, v1, v2, v3);
+	v0 ^= n;
 	s->v0 = v0;  s->v1 = v1;  s->v2 = v2;  s->v3 = v3;
 	return v1 + v3;
 }
diff --git a/net/core/dev.c b/net/core/dev.c
index 9499a414d67e..82dc6b48e45f 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -145,6 +145,7 @@
 #include <linux/indirect_call_wrapper.h>
 #include <net/devlink.h>
 #include <linux/pm_runtime.h>
+#include <linux/prandom.h>
 
 #include "net-sysfs.h"
 
@@ -3558,6 +3559,7 @@ static int xmit_one(struct sk_buff *skb, struct net_device *dev,
 		dev_queue_xmit_nit(skb, dev);
 
 	len = skb->len;
+	PRANDOM_ADD_NOISE(skb, dev, txq, len + jiffies);
 	trace_net_dev_start_xmit(skb, dev);
 	rc = netdev_start_xmit(skb, dev, txq, more);
 	trace_net_dev_xmit(skb, rc, dev, len);
@@ -4130,6 +4132,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev)
 			if (!skb)
 				goto out;
 
+			PRANDOM_ADD_NOISE(skb, dev, txq, jiffies);
 			HARD_TX_LOCK(dev, txq, cpu);
 
 			if (!netif_xmit_stopped(txq)) {
@@ -4195,6 +4198,7 @@ int dev_direct_xmit(struct sk_buff *skb, u16 queue_id)
 
 	skb_set_queue_mapping(skb, queue_id);
 	txq = skb_get_tx_queue(dev, skb);
+	PRANDOM_ADD_NOISE(skb, dev, txq, jiffies);
 
 	local_bh_disable();
 
-- 
cgit v1.2.3


From c6e169bc146a76d5ccbf4d3825f705414352bd03 Mon Sep 17 00:00:00 2001
From: Willy Tarreau <w@1wt.eu>
Date: Sat, 24 Oct 2020 18:36:27 +0200
Subject: random32: add a selftest for the prandom32 code

Given that this code is new, let's add a selftest for it as well.
It doesn't rely on fixed sets, instead it picks 1024 numbers and
verifies that they're not more correlated than desired.

Link: https://lore.kernel.org/netdev/20200808152628.GA27941@SDF.ORG/
Cc: George Spelvin <lkml@sdf.org>
Cc: Amit Klein <aksecurity@gmail.com>
Cc: Eric Dumazet <edumazet@google.com>
Cc: "Jason A. Donenfeld" <Jason@zx2c4.com>
Cc: Andy Lutomirski <luto@kernel.org>
Cc: Kees Cook <keescook@chromium.org>
Cc: Thomas Gleixner <tglx@linutronix.de>
Cc: Peter Zijlstra <peterz@infradead.org>
Cc: Linus Torvalds <torvalds@linux-foundation.org>
Cc: tytso@mit.edu
Cc: Florian Westphal <fw@strlen.de>
Cc: Marc Plumb <lkml.mplumb@gmail.com>
Signed-off-by: Willy Tarreau <w@1wt.eu>
---
 lib/random32.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)

(limited to 'lib')

diff --git a/lib/random32.c b/lib/random32.c
index 7f047844e494..4d0e05e471d7 100644
--- a/lib/random32.c
+++ b/lib/random32.c
@@ -38,6 +38,7 @@
 #include <linux/jiffies.h>
 #include <linux/random.h>
 #include <linux/sched.h>
+#include <linux/bitops.h>
 #include <asm/unaligned.h>
 #include <trace/events/random.h>
 
@@ -556,6 +557,61 @@ static void prandom_timer_start(struct random_ready_callback *unused)
 	mod_timer(&seed_timer, jiffies);
 }
 
+#ifdef CONFIG_RANDOM32_SELFTEST
+/* Principle: True 32-bit random numbers will all have 16 differing bits on
+ * average. For each 32-bit number, there are 601M numbers differing by 16
+ * bits, and 89% of the numbers differ by at least 12 bits. Note that more
+ * than 16 differing bits also implies a correlation with inverted bits. Thus
+ * we take 1024 random numbers and compare each of them to the other ones,
+ * counting the deviation of correlated bits to 16. Constants report 32,
+ * counters 32-log2(TEST_SIZE), and pure randoms, around 6 or lower. With the
+ * u32 total, TEST_SIZE may be as large as 4096 samples.
+ */
+#define TEST_SIZE 1024
+static int __init prandom32_state_selftest(void)
+{
+	unsigned int x, y, bits, samples;
+	u32 xor, flip;
+	u32 total;
+	u32 *data;
+
+	data = kmalloc(sizeof(*data) * TEST_SIZE, GFP_KERNEL);
+	if (!data)
+		return 0;
+
+	for (samples = 0; samples < TEST_SIZE; samples++)
+		data[samples] = prandom_u32();
+
+	flip = total = 0;
+	for (x = 0; x < samples; x++) {
+		for (y = 0; y < samples; y++) {
+			if (x == y)
+				continue;
+			xor = data[x] ^ data[y];
+			flip |= xor;
+			bits = hweight32(xor);
+			total += (bits - 16) * (bits - 16);
+		}
+	}
+
+	/* We'll return the average deviation as 2*sqrt(corr/samples), which
+	 * is also sqrt(4*corr/samples) which provides a better resolution.
+	 */
+	bits = int_sqrt(total / (samples * (samples - 1)) * 4);
+	if (bits > 6)
+		pr_warn("prandom32: self test failed (at least %u bits"
+			" correlated, fixed_mask=%#x fixed_value=%#x\n",
+			bits, ~flip, data[0] & ~flip);
+	else
+		pr_info("prandom32: self test passed (less than %u bits"
+			" correlated)\n",
+			bits+1);
+	kfree(data);
+	return 0;
+}
+core_initcall(prandom32_state_selftest);
+#endif /*  CONFIG_RANDOM32_SELFTEST */
+
 /*
  * Start periodic full reseeding as soon as strong
  * random numbers are available.
-- 
cgit v1.2.3