From 20607434113b8f7d74cfc98e27a4199535c1d4fa Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 30 Mar 2020 17:22:11 -0700 Subject: lib/bitmap.c: fix spello Fix typo/spello for whitespaces. Signed-off-by: Randy Dunlap Signed-off-by: Jiri Kosina --- lib/bitmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/bitmap.c b/lib/bitmap.c index 89260aa342d6..a801379e9354 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -551,7 +551,7 @@ static inline bool end_of_region(char c) } /* - * The format allows commas and whitespases at the beginning + * The format allows commas and whitespaces at the beginning * of the region. */ static const char *bitmap_find_region(const char *str) -- cgit v1.2.3 From 8aa26c575fb343ebde810b30dad0cba7d8121efb Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 18 Aug 2020 10:17:33 +0200 Subject: netlink: make NLA_BINARY validation more flexible Add range validation for NLA_BINARY, allowing validation of any combination of combination minimum or maximum lengths, using the existing NLA_POLICY_RANGE()/NLA_POLICY_FULL_RANGE() macros, just like for integers where the value is checked. Also make NLA_POLICY_EXACT_LEN(), NLA_POLICY_EXACT_LEN_WARN() and NLA_POLICY_MIN_LEN() special cases of this, removing the old types NLA_EXACT_LEN and NLA_MIN_LEN. This allows us to save some code where both minimum and maximum lengths are requires, currently the policy only allows maximum (NLA_BINARY), minimum (NLA_MIN_LEN) or exact (NLA_EXACT_LEN), so a range of lengths cannot be accepted and must be checked by the code that consumes the attributes later. Also, this allows advertising the correct ranges in the policy export to userspace. Here, NLA_MIN_LEN and NLA_EXACT_LEN already were special cases of NLA_BINARY with min and min/max length respectively. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/net/netlink.h | 58 ++++++++++++++++++++++++++----------------------- lib/nlattr.c | 60 +++++++++++++++++++++++++++++++++------------------ net/netlink/policy.c | 32 ++++++++++++++++----------- 3 files changed, 89 insertions(+), 61 deletions(-) (limited to 'lib') diff --git a/include/net/netlink.h b/include/net/netlink.h index c0411f14fb53..fdd317f8fde4 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -181,8 +181,6 @@ enum { NLA_S64, NLA_BITFIELD32, NLA_REJECT, - NLA_EXACT_LEN, - NLA_MIN_LEN, __NLA_TYPE_MAX, }; @@ -199,11 +197,11 @@ struct netlink_range_validation_signed { enum nla_policy_validation { NLA_VALIDATE_NONE, NLA_VALIDATE_RANGE, + NLA_VALIDATE_RANGE_WARN_TOO_LONG, NLA_VALIDATE_MIN, NLA_VALIDATE_MAX, NLA_VALIDATE_RANGE_PTR, NLA_VALIDATE_FUNCTION, - NLA_VALIDATE_WARN_TOO_LONG, }; /** @@ -222,7 +220,7 @@ enum nla_policy_validation { * NLA_NUL_STRING Maximum length of string (excluding NUL) * NLA_FLAG Unused * NLA_BINARY Maximum length of attribute payload - * NLA_MIN_LEN Minimum length of attribute payload + * (but see also below with the validation type) * NLA_NESTED, * NLA_NESTED_ARRAY Length verification is done by checking len of * nested header (or empty); len field is used if @@ -237,11 +235,6 @@ enum nla_policy_validation { * just like "All other" * NLA_BITFIELD32 Unused * NLA_REJECT Unused - * NLA_EXACT_LEN Attribute should have exactly this length, otherwise - * it is rejected or warned about, the latter happening - * if and only if the `validation_type' is set to - * NLA_VALIDATE_WARN_TOO_LONG. - * NLA_MIN_LEN Minimum length of attribute payload * All other Minimum length of attribute payload * * Meaning of validation union: @@ -296,6 +289,11 @@ enum nla_policy_validation { * pointer to a struct netlink_range_validation_signed * that indicates the min/max values. * Use NLA_POLICY_FULL_RANGE_SIGNED(). + * + * NLA_BINARY If the validation type is like the ones for integers + * above, then the min/max length (not value like for + * integers) of the attribute is enforced. + * * All other Unused - but note that it's a union * * Meaning of `validate' field, use via NLA_POLICY_VALIDATE_FN: @@ -309,7 +307,7 @@ enum nla_policy_validation { * static const struct nla_policy my_policy[ATTR_MAX+1] = { * [ATTR_FOO] = { .type = NLA_U16 }, * [ATTR_BAR] = { .type = NLA_STRING, .len = BARSIZ }, - * [ATTR_BAZ] = { .type = NLA_EXACT_LEN, .len = sizeof(struct mystruct) }, + * [ATTR_BAZ] = NLA_POLICY_EXACT_LEN(sizeof(struct mystruct)), * [ATTR_GOO] = NLA_POLICY_BITFIELD32(myvalidflags), * }; */ @@ -335,9 +333,10 @@ struct nla_policy { * nesting validation starts here. * * Additionally, it means that NLA_UNSPEC is actually NLA_REJECT - * for any types >= this, so need to use NLA_MIN_LEN to get the - * previous pure { .len = xyz } behaviour. The advantage of this - * is that types not specified in the policy will be rejected. + * for any types >= this, so need to use NLA_POLICY_MIN_LEN() to + * get the previous pure { .len = xyz } behaviour. The advantage + * of this is that types not specified in the policy will be + * rejected. * * For completely new families it should be set to 1 so that the * validation is enforced for all attributes. For existing ones @@ -349,12 +348,6 @@ struct nla_policy { }; }; -#define NLA_POLICY_EXACT_LEN(_len) { .type = NLA_EXACT_LEN, .len = _len } -#define NLA_POLICY_EXACT_LEN_WARN(_len) \ - { .type = NLA_EXACT_LEN, .len = _len, \ - .validation_type = NLA_VALIDATE_WARN_TOO_LONG, } -#define NLA_POLICY_MIN_LEN(_len) { .type = NLA_MIN_LEN, .len = _len } - #define NLA_POLICY_ETH_ADDR NLA_POLICY_EXACT_LEN(ETH_ALEN) #define NLA_POLICY_ETH_ADDR_COMPAT NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN) @@ -370,19 +363,21 @@ struct nla_policy { { .type = NLA_BITFIELD32, .bitfield32_valid = valid } #define __NLA_ENSURE(condition) BUILD_BUG_ON_ZERO(!(condition)) -#define NLA_ENSURE_UINT_TYPE(tp) \ +#define NLA_ENSURE_UINT_OR_BINARY_TYPE(tp) \ (__NLA_ENSURE(tp == NLA_U8 || tp == NLA_U16 || \ tp == NLA_U32 || tp == NLA_U64 || \ - tp == NLA_MSECS) + tp) + tp == NLA_MSECS || \ + tp == NLA_BINARY) + tp) #define NLA_ENSURE_SINT_TYPE(tp) \ (__NLA_ENSURE(tp == NLA_S8 || tp == NLA_S16 || \ tp == NLA_S32 || tp == NLA_S64) + tp) -#define NLA_ENSURE_INT_TYPE(tp) \ +#define NLA_ENSURE_INT_OR_BINARY_TYPE(tp) \ (__NLA_ENSURE(tp == NLA_S8 || tp == NLA_U8 || \ tp == NLA_S16 || tp == NLA_U16 || \ tp == NLA_S32 || tp == NLA_U32 || \ tp == NLA_S64 || tp == NLA_U64 || \ - tp == NLA_MSECS) + tp) + tp == NLA_MSECS || \ + tp == NLA_BINARY) + tp) #define NLA_ENSURE_NO_VALIDATION_PTR(tp) \ (__NLA_ENSURE(tp != NLA_BITFIELD32 && \ tp != NLA_REJECT && \ @@ -390,14 +385,14 @@ struct nla_policy { tp != NLA_NESTED_ARRAY) + tp) #define NLA_POLICY_RANGE(tp, _min, _max) { \ - .type = NLA_ENSURE_INT_TYPE(tp), \ + .type = NLA_ENSURE_INT_OR_BINARY_TYPE(tp), \ .validation_type = NLA_VALIDATE_RANGE, \ .min = _min, \ .max = _max \ } #define NLA_POLICY_FULL_RANGE(tp, _range) { \ - .type = NLA_ENSURE_UINT_TYPE(tp), \ + .type = NLA_ENSURE_UINT_OR_BINARY_TYPE(tp), \ .validation_type = NLA_VALIDATE_RANGE_PTR, \ .range = _range, \ } @@ -409,13 +404,13 @@ struct nla_policy { } #define NLA_POLICY_MIN(tp, _min) { \ - .type = NLA_ENSURE_INT_TYPE(tp), \ + .type = NLA_ENSURE_INT_OR_BINARY_TYPE(tp), \ .validation_type = NLA_VALIDATE_MIN, \ .min = _min, \ } #define NLA_POLICY_MAX(tp, _max) { \ - .type = NLA_ENSURE_INT_TYPE(tp), \ + .type = NLA_ENSURE_INT_OR_BINARY_TYPE(tp), \ .validation_type = NLA_VALIDATE_MAX, \ .max = _max, \ } @@ -427,6 +422,15 @@ struct nla_policy { .len = __VA_ARGS__ + 0, \ } +#define NLA_POLICY_EXACT_LEN(_len) NLA_POLICY_RANGE(NLA_BINARY, _len, _len) +#define NLA_POLICY_EXACT_LEN_WARN(_len) { \ + .type = NLA_BINARY, \ + .validation_type = NLA_VALIDATE_RANGE_WARN_TOO_LONG, \ + .min = _len, \ + .max = _len \ +} +#define NLA_POLICY_MIN_LEN(_len) NLA_POLICY_MIN(NLA_BINARY, _len) + /** * struct nl_info - netlink source information * @nlh: Netlink message header of original request diff --git a/lib/nlattr.c b/lib/nlattr.c index bc5b5cf608c4..665bdaff02c8 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -124,6 +124,7 @@ void nla_get_range_unsigned(const struct nla_policy *pt, range->max = U8_MAX; break; case NLA_U16: + case NLA_BINARY: range->max = U16_MAX; break; case NLA_U32: @@ -140,6 +141,7 @@ void nla_get_range_unsigned(const struct nla_policy *pt, switch (pt->validation_type) { case NLA_VALIDATE_RANGE: + case NLA_VALIDATE_RANGE_WARN_TOO_LONG: range->min = pt->min; range->max = pt->max; break; @@ -157,9 +159,10 @@ void nla_get_range_unsigned(const struct nla_policy *pt, } } -static int nla_validate_int_range_unsigned(const struct nla_policy *pt, - const struct nlattr *nla, - struct netlink_ext_ack *extack) +static int nla_validate_range_unsigned(const struct nla_policy *pt, + const struct nlattr *nla, + struct netlink_ext_ack *extack, + unsigned int validate) { struct netlink_range_validation range; u64 value; @@ -178,15 +181,39 @@ static int nla_validate_int_range_unsigned(const struct nla_policy *pt, case NLA_MSECS: value = nla_get_u64(nla); break; + case NLA_BINARY: + value = nla_len(nla); + break; default: return -EINVAL; } nla_get_range_unsigned(pt, &range); + if (pt->validation_type == NLA_VALIDATE_RANGE_WARN_TOO_LONG && + pt->type == NLA_BINARY && value > range.max) { + pr_warn_ratelimited("netlink: '%s': attribute type %d has an invalid length.\n", + current->comm, pt->type); + if (validate & NL_VALIDATE_STRICT_ATTRS) { + NL_SET_ERR_MSG_ATTR(extack, nla, + "invalid attribute length"); + return -EINVAL; + } + + /* this assumes min <= max (don't validate against min) */ + return 0; + } + if (value < range.min || value > range.max) { - NL_SET_ERR_MSG_ATTR(extack, nla, - "integer out of range"); + bool binary = pt->type == NLA_BINARY; + + if (binary) + NL_SET_ERR_MSG_ATTR(extack, nla, + "binary attribute size out of range"); + else + NL_SET_ERR_MSG_ATTR(extack, nla, + "integer out of range"); + return -ERANGE; } @@ -274,7 +301,8 @@ static int nla_validate_int_range_signed(const struct nla_policy *pt, static int nla_validate_int_range(const struct nla_policy *pt, const struct nlattr *nla, - struct netlink_ext_ack *extack) + struct netlink_ext_ack *extack, + unsigned int validate) { switch (pt->type) { case NLA_U8: @@ -282,7 +310,8 @@ static int nla_validate_int_range(const struct nla_policy *pt, case NLA_U32: case NLA_U64: case NLA_MSECS: - return nla_validate_int_range_unsigned(pt, nla, extack); + case NLA_BINARY: + return nla_validate_range_unsigned(pt, nla, extack, validate); case NLA_S8: case NLA_S16: case NLA_S32: @@ -313,10 +342,7 @@ static int validate_nla(const struct nlattr *nla, int maxtype, BUG_ON(pt->type > NLA_TYPE_MAX); - if ((nla_attr_len[pt->type] && attrlen != nla_attr_len[pt->type]) || - (pt->type == NLA_EXACT_LEN && - pt->validation_type == NLA_VALIDATE_WARN_TOO_LONG && - attrlen != pt->len)) { + if (nla_attr_len[pt->type] && attrlen != nla_attr_len[pt->type]) { pr_warn_ratelimited("netlink: '%s': attribute type %d has an invalid length.\n", current->comm, type); if (validate & NL_VALIDATE_STRICT_ATTRS) { @@ -449,19 +475,10 @@ static int validate_nla(const struct nlattr *nla, int maxtype, "Unsupported attribute"); return -EINVAL; } - /* fall through */ - case NLA_MIN_LEN: if (attrlen < pt->len) goto out_err; break; - case NLA_EXACT_LEN: - if (pt->validation_type != NLA_VALIDATE_WARN_TOO_LONG) { - if (attrlen != pt->len) - goto out_err; - break; - } - /* fall through */ default: if (pt->len) minlen = pt->len; @@ -479,9 +496,10 @@ static int validate_nla(const struct nlattr *nla, int maxtype, break; case NLA_VALIDATE_RANGE_PTR: case NLA_VALIDATE_RANGE: + case NLA_VALIDATE_RANGE_WARN_TOO_LONG: case NLA_VALIDATE_MIN: case NLA_VALIDATE_MAX: - err = nla_validate_int_range(pt, nla, extack); + err = nla_validate_int_range(pt, nla, extack, validate); if (err) return err; break; diff --git a/net/netlink/policy.c b/net/netlink/policy.c index f6491853c797..46c11c9c212a 100644 --- a/net/netlink/policy.c +++ b/net/netlink/policy.c @@ -251,12 +251,6 @@ send_attribute: pt->bitfield32_valid)) goto nla_put_failure; break; - case NLA_EXACT_LEN: - type = NL_ATTR_TYPE_BINARY; - if (nla_put_u32(skb, NL_POLICY_TYPE_ATTR_MIN_LENGTH, pt->len) || - nla_put_u32(skb, NL_POLICY_TYPE_ATTR_MAX_LENGTH, pt->len)) - goto nla_put_failure; - break; case NLA_STRING: case NLA_NUL_STRING: case NLA_BINARY: @@ -266,14 +260,26 @@ send_attribute: type = NL_ATTR_TYPE_NUL_STRING; else type = NL_ATTR_TYPE_BINARY; - if (pt->len && nla_put_u32(skb, NL_POLICY_TYPE_ATTR_MAX_LENGTH, - pt->len)) - goto nla_put_failure; - break; - case NLA_MIN_LEN: - type = NL_ATTR_TYPE_BINARY; - if (nla_put_u32(skb, NL_POLICY_TYPE_ATTR_MIN_LENGTH, pt->len)) + + if (pt->validation_type != NLA_VALIDATE_NONE) { + struct netlink_range_validation range; + + nla_get_range_unsigned(pt, &range); + + if (range.min && + nla_put_u32(skb, NL_POLICY_TYPE_ATTR_MIN_LENGTH, + range.min)) + goto nla_put_failure; + + if (range.max < U16_MAX && + nla_put_u32(skb, NL_POLICY_TYPE_ATTR_MAX_LENGTH, + range.max)) + goto nla_put_failure; + } else if (pt->len && + nla_put_u32(skb, NL_POLICY_TYPE_ATTR_MAX_LENGTH, + pt->len)) { goto nla_put_failure; + } break; case NLA_FLAG: type = NL_ATTR_TYPE_FLAG; -- cgit v1.2.3 From 160c7ba34605d9b59ee406a1b4a61b0f942b1ae9 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 8 Jul 2020 16:25:43 -0700 Subject: lib: Add backtrace_idle parameter to force backtrace of idle CPUs Currently, the nmi_cpu_backtrace() declines to produce backtraces for idle CPUs. This is a good choice in the common case in which problems are caused only by non-idle CPUs. However, there are occasionally situations in which idle CPUs are helping to cause problems. This commit therefore adds an nmi_backtrace.backtrace_idle kernel boot parameter that causes nmi_cpu_backtrace() to dump stacks even of idle CPUs. Signed-off-by: Paul E. McKenney Cc: Jonathan Corbet Cc: Thomas Gleixner Cc: Andrew Morton Cc: Greg Kroah-Hartman Cc: --- Documentation/admin-guide/kernel-parameters.txt | 4 ++++ lib/nmi_backtrace.c | 6 +++++- 2 files changed, 9 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index bdc1f33fd3d1..5e6d19182c5f 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -3073,6 +3073,10 @@ and gids from such clients. This is intended to ease migration from NFSv2/v3. + nmi_backtrace.backtrace_idle [KNL] + Dump stacks even of idle CPUs in response to an + NMI stack-backtrace request. + nmi_debug= [KNL,SH] Specify one or more actions to take when a NMI is triggered. Format: [state][,regs][,debounce][,die] diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c index 15ca78e1c7d4..8abe1870dba4 100644 --- a/lib/nmi_backtrace.c +++ b/lib/nmi_backtrace.c @@ -85,12 +85,16 @@ void nmi_trigger_cpumask_backtrace(const cpumask_t *mask, put_cpu(); } +// Dump stacks even for idle CPUs. +static bool backtrace_idle; +module_param(backtrace_idle, bool, 0644); + bool nmi_cpu_backtrace(struct pt_regs *regs) { int cpu = smp_processor_id(); if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { - if (regs && cpu_in_idle(instruction_pointer(regs))) { + if (!READ_ONCE(backtrace_idle) && regs && cpu_in_idle(instruction_pointer(regs))) { pr_warn("NMI backtrace for cpu %d skipped: idling at %pS\n", cpu, (void *)instruction_pointer(regs)); } else { -- cgit v1.2.3 From e9d338a0b1799c988b678e8ccb66a442272e6aa3 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 24 Jun 2020 15:59:59 -0700 Subject: scftorture: Add smp_call_function() torture test This commit adds an smp_call_function() torture test that repeatedly invokes this function and complains if things go badly awry. Signed-off-by: Paul E. McKenney --- Documentation/admin-guide/kernel-parameters.txt | 92 +++++++ kernel/Makefile | 2 + kernel/scftorture.c | 350 ++++++++++++++++++++++++ lib/Kconfig.debug | 10 + 4 files changed, 454 insertions(+) create mode 100644 kernel/scftorture.c (limited to 'lib') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index bdc1f33fd3d1..91a56382ae56 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -4637,6 +4637,98 @@ Format: integer between 0 and 10 Default is 0. + scftorture.holdoff= [KNL] + Number of seconds to hold off before starting + test. Defaults to zero for module insertion and + to 10 seconds for built-in smp_call_function() + tests. + + scftorture.longwait= [KNL] + Request ridiculously long waits randomly selected + up to the chosen limit in seconds. Zero (the + default) disables this feature. Please note + that requesting even small non-zero numbers of + seconds can result in RCU CPU stall warnings, + softlockup complaints, and so on. + + scftorture.nthreads= [KNL] + Number of kthreads to spawn to invoke the + smp_call_function() family of functions. + The default of -1 specifies a number of kthreads + equal to the number of CPUs. + + scftorture.onoff_holdoff= [KNL] + Number seconds to wait after the start of the + test before initiating CPU-hotplug operations. + + scftorture.onoff_interval= [KNL] + Number seconds to wait between successive + CPU-hotplug operations. Specifying zero (which + is the default) disables CPU-hotplug operations. + + scftorture.shutdown_secs= [KNL] + The number of seconds following the start of the + test after which to shut down the system. The + default of zero avoids shutting down the system. + Non-zero values are useful for automated tests. + + scftorture.stat_interval= [KNL] + The number of seconds between outputting the + current test statistics to the console. A value + of zero disables statistics output. + + scftorture.stutter_cpus= [KNL] + The number of jiffies to wait between each change + to the set of CPUs under test. + + scftorture.use_cpus_read_lock= [KNL] + Use use_cpus_read_lock() instead of the default + preempt_disable() to disable CPU hotplug + while invoking one of the smp_call_function*() + functions. + + scftorture.verbose= [KNL] + Enable additional printk() statements. + + scftorture.weight_single= [KNL] + The probability weighting to use for the + smp_call_function_single() function with a zero + "wait" parameter. A value of -1 selects the + default if all other weights are -1. However, + if at least one weight has some other value, a + value of -1 will instead select a weight of zero. + + scftorture.weight_single_wait= [KNL] + The probability weighting to use for the + smp_call_function_single() function with a + non-zero "wait" parameter. See weight_single. + + scftorture.weight_many= [KNL] + The probability weighting to use for the + smp_call_function_many() function with a zero + "wait" parameter. See weight_single. + Note well that setting a high probability for + this weighting can place serious IPI load + on the system. + + scftorture.weight_many_wait= [KNL] + The probability weighting to use for the + smp_call_function_many() function with a + non-zero "wait" parameter. See weight_single + and weight_many. + + scftorture.weight_all= [KNL] + The probability weighting to use for the + smp_call_function_all() function with a zero + "wait" parameter. See weight_single and + weight_many. + + scftorture.weight_all_wait= [KNL] + The probability weighting to use for the + smp_call_function_all() function with a + non-zero "wait" parameter. See weight_single + and weight_many. + skew_tick= [KNL] Offset the periodic timer tick per cpu to mitigate xtime_lock contention on larger systems, and/or RCU lock contention on all systems with CONFIG_MAXSMP set. diff --git a/kernel/Makefile b/kernel/Makefile index 9a20016d4900..c45f551deaaa 100644 --- a/kernel/Makefile +++ b/kernel/Makefile @@ -133,6 +133,8 @@ KASAN_SANITIZE_stackleak.o := n KCSAN_SANITIZE_stackleak.o := n KCOV_INSTRUMENT_stackleak.o := n +obj-$(CONFIG_SCF_TORTURE_TEST) += scftorture.o + $(obj)/configs.o: $(obj)/config_data.gz targets += config_data.gz diff --git a/kernel/scftorture.c b/kernel/scftorture.c new file mode 100644 index 000000000000..44f1e49ba6e9 --- /dev/null +++ b/kernel/scftorture.c @@ -0,0 +1,350 @@ +// SPDX-License-Identifier: GPL-2.0+ +// +// Torture test for smp_call_function() and friends. +// +// Copyright (C) Facebook, 2020. +// +// Author: Paul E. McKenney + +#define pr_fmt(fmt) fmt + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define SCFTORT_STRING "scftorture" +#define SCFTORT_FLAG SCFTORT_STRING ": " + +#define SCFTORTOUT(s, x...) \ + pr_alert(SCFTORT_FLAG s, ## x) + +#define VERBOSE_SCFTORTOUT(s, x...) \ + do { if (verbose) pr_alert(SCFTORT_FLAG s, ## x); } while (0) + +#define VERBOSE_SCFTORTOUT_ERRSTRING(s, x...) \ + do { if (verbose) pr_alert(SCFTORT_FLAG "!!! " s, ## x); } while (0) + +MODULE_LICENSE("GPL"); +MODULE_AUTHOR("Paul E. McKenney "); + +// Wait until there are multiple CPUs before starting test. +torture_param(int, holdoff, IS_BUILTIN(CONFIG_SCF_TORTURE_TEST) ? 10 : 0, + "Holdoff time before test start (s)"); +torture_param(int, longwait, 0, "Include ridiculously long waits? (seconds)"); +torture_param(int, nthreads, -1, "# threads, defaults to -1 for all CPUs."); +torture_param(int, onoff_holdoff, 0, "Time after boot before CPU hotplugs (s)"); +torture_param(int, onoff_interval, 0, "Time between CPU hotplugs (s), 0=disable"); +torture_param(int, shutdown_secs, 0, "Shutdown time (ms), <= zero to disable."); +torture_param(int, stat_interval, 60, "Number of seconds between stats printk()s."); +torture_param(int, stutter_cpus, 5, "Number of jiffies to change CPUs under test, 0=disable"); +torture_param(bool, use_cpus_read_lock, 0, "Use cpus_read_lock() to exclude CPU hotplug."); +torture_param(int, verbose, 0, "Enable verbose debugging printk()s"); +torture_param(int, weight_single, -1, "Testing weight for single-CPU no-wait operations."); +torture_param(int, weight_single_wait, -1, "Testing weight for single-CPU operations."); +torture_param(int, weight_mult, -1, "Testing weight for multi-CPU no-wait operations."); +torture_param(int, weight_mult_wait, -1, "Testing weight for multi-CPU operations."); +torture_param(int, weight_all, -1, "Testing weight for all-CPU no-wait operations."); +torture_param(int, weight_all_wait, -1, "Testing weight for all-CPU operations."); + +char *torture_type = ""; + +#ifdef MODULE +# define SCFTORT_SHUTDOWN 0 +#else +# define SCFTORT_SHUTDOWN 1 +#endif + +torture_param(bool, shutdown, SCFTORT_SHUTDOWN, "Shutdown at end of torture test."); + +struct scf_statistics { + struct task_struct *task; + int cpu; + long long n_single; + long long n_single_wait; + long long n_multi; + long long n_multi_wait; + long long n_all; + long long n_all_wait; +}; + +static struct scf_statistics *scf_stats_p; +static struct task_struct *scf_torture_stats_task; +static DEFINE_PER_CPU(long long, scf_invoked_count); + +// Use to wait for all threads to start. +static atomic_t n_started; +static atomic_t n_errs; +static bool scfdone; + +DEFINE_TORTURE_RANDOM_PERCPU(scf_torture_rand); + +// Print torture statistics. Caller must ensure serialization. +static void scf_torture_stats_print(void) +{ + int cpu; + long long invoked_count = 0; + bool isdone = READ_ONCE(scfdone); + + for_each_possible_cpu(cpu) + invoked_count += data_race(per_cpu(scf_invoked_count, cpu)); + pr_alert("%s scf_invoked_count %s: %lld ", + SCFTORT_FLAG, isdone ? "VER" : "ver", invoked_count); + torture_onoff_stats(); + pr_cont("\n"); +} + +// Periodically prints torture statistics, if periodic statistics printing +// was specified via the stat_interval module parameter. +static int +scf_torture_stats(void *arg) +{ + VERBOSE_TOROUT_STRING("scf_torture_stats task started"); + do { + schedule_timeout_interruptible(stat_interval * HZ); + scf_torture_stats_print(); + torture_shutdown_absorb("scf_torture_stats"); + } while (!torture_must_stop()); + torture_kthread_stopping("scf_torture_stats"); + return 0; +} + +// Update statistics and occasionally burn up mass quantities of CPU time, +// if told to do so via scftorture.longwait. Otherwise, occasionally burn +// a little bit. +static void scf_handler(void *unused) +{ + int i; + int j; + unsigned long r = torture_random(this_cpu_ptr(&scf_torture_rand)); + + this_cpu_inc(scf_invoked_count); + if (longwait <= 0) { + if (!(r & 0xffc0)) + udelay(r & 0x3f); + return; + } + if (r & 0xfff) + return; + r = (r >> 12); + if (longwait <= 0) { + udelay((r & 0xff) + 1); + return; + } + r = r % longwait + 1; + for (i = 0; i < r; i++) { + for (j = 0; j < 1000; j++) { + udelay(1000); + cpu_relax(); + } + } +} + +// Randomly do an smp_call_function*() invocation. +static void scftorture_invoke_one(struct scf_statistics *scfp,struct torture_random_state *trsp) +{ + if (use_cpus_read_lock) + cpus_read_lock(); + else + preempt_disable(); + scfp->n_all++; + smp_call_function(scf_handler, NULL, 0); + if (use_cpus_read_lock) + cpus_read_unlock(); + else + preempt_enable(); + if (!(torture_random(trsp) & 0xfff)) + schedule_timeout_uninterruptible(1); +} + +// SCF test kthread. Repeatedly does calls to members of the +// smp_call_function() family of functions. +static int scftorture_invoker(void *arg) +{ + DEFINE_TORTURE_RANDOM(rand); + struct scf_statistics *scfp = (struct scf_statistics *)arg; + + VERBOSE_SCFTORTOUT("scftorture_invoker %d: task started", scfp->cpu); + set_cpus_allowed_ptr(current, cpumask_of(scfp->cpu % nr_cpu_ids)); + set_user_nice(current, MAX_NICE); + if (holdoff) + schedule_timeout_interruptible(holdoff * HZ); + + VERBOSE_SCFTORTOUT("scftorture_invoker %d: Waiting for all SCF torturers from cpu %d", scfp->cpu, smp_processor_id()); + + // Make sure that the CPU is affinitized appropriately during testing. + WARN_ON_ONCE(smp_processor_id() != scfp->cpu); + + if (!atomic_dec_return(&n_started)) + while (atomic_read_acquire(&n_started)) { + if (torture_must_stop()) { + VERBOSE_SCFTORTOUT("scftorture_invoker %d ended before starting", scfp->cpu); + goto end; + } + schedule_timeout_uninterruptible(1); + } + + VERBOSE_SCFTORTOUT("scftorture_invoker %d started", scfp->cpu); + + do { + scftorture_invoke_one(scfp, &rand); + } while (!torture_must_stop()); + + VERBOSE_SCFTORTOUT("scftorture_invoker %d ended", scfp->cpu); +end: + torture_kthread_stopping("scftorture_invoker"); + return 0; +} + +static void +scftorture_print_module_parms(const char *tag) +{ + pr_alert(SCFTORT_FLAG + "--- %s: verbose=%d holdoff=%d longwait=%d nthreads=%d onoff_holdoff=%d onoff_interval=%d shutdown_secs=%d stat_interval=%d stutter_cpus=%d use_cpus_read_lock=%d, weight_single=%d, weight_single_wait=%d, weight_mult=%d, weight_mult_wait=%d, weight_all=%d, weight_all_wait=%d\n", tag, + verbose, holdoff, longwait, nthreads, onoff_holdoff, onoff_interval, shutdown, stat_interval, stutter_cpus, use_cpus_read_lock, weight_single, weight_single_wait, weight_mult, weight_mult_wait, weight_all, weight_all_wait); +} + +static void scf_cleanup_handler(void *unused) +{ +} + +static void scf_torture_cleanup(void) +{ + int i; + + if (torture_cleanup_begin()) + return; + + WRITE_ONCE(scfdone, true); + if (nthreads) + for (i = 0; i < nthreads; i++) + torture_stop_kthread("scftorture_invoker", scf_stats_p[i].task); + else + goto end; + kfree(scf_stats_p); + scf_stats_p = NULL; + smp_call_function(scf_cleanup_handler, NULL, 0); + torture_stop_kthread(scf_torture_stats, scf_torture_stats_task); + scf_torture_stats_print(); // -After- the stats thread is stopped! + + if (atomic_read(&n_errs)) + scftorture_print_module_parms("End of test: FAILURE"); + else if (torture_onoff_failures()) + scftorture_print_module_parms("End of test: LOCK_HOTPLUG"); + else + scftorture_print_module_parms("End of test: SUCCESS"); + +end: + torture_cleanup_end(); +} + +static int __init scf_torture_init(void) +{ + long i; + int firsterr = 0; + + if (!torture_init_begin(SCFTORT_STRING, verbose)) + return -EBUSY; + + scftorture_print_module_parms("Start of test"); + + if (weight_single == -1 && weight_single_wait == -1 && + weight_mult == -1 && weight_mult_wait == -1 && + weight_all == -1 && weight_all_wait == -1) { + weight_single = 1; + weight_single_wait = 1; + weight_mult = 1; + weight_mult_wait = 1; + weight_all = 1; + weight_all_wait = 1; + } else { + if (weight_single == -1) + weight_single = 0; + if (weight_single_wait == -1) + weight_single_wait = 0; + if (weight_mult == -1) + weight_mult = 0; + if (weight_mult_wait == -1) + weight_mult_wait = 0; + if (weight_all == -1) + weight_all = 0; + if (weight_all_wait == -1) + weight_all_wait = 0; + } + if (weight_single == 0 && weight_single_wait == 0 && + weight_mult == 0 && weight_mult_wait == 0 && + weight_all == 0 && weight_all_wait == 0) { + firsterr = -EINVAL; + goto unwind; + } + + if (onoff_interval > 0) { + firsterr = torture_onoff_init(onoff_holdoff * HZ, onoff_interval, NULL); + if (firsterr) + goto unwind; + } + if (shutdown_secs > 0) { + firsterr = torture_shutdown_init(shutdown_secs, scf_torture_cleanup); + if (firsterr) + goto unwind; + } + + // Worker tasks invoking smp_call_function(). + if (nthreads < 0) + nthreads = num_online_cpus(); + scf_stats_p = kcalloc(nthreads, sizeof(scf_stats_p[0]), GFP_KERNEL); + if (!scf_stats_p) { + VERBOSE_SCFTORTOUT_ERRSTRING("out of memory"); + firsterr = -ENOMEM; + goto unwind; + } + + VERBOSE_SCFTORTOUT("Starting %d smp_call_function() threads\n", nthreads); + + atomic_set(&n_started, nthreads); + for (i = 0; i < nthreads; i++) { + scf_stats_p[i].cpu = i; + firsterr = torture_create_kthread(scftorture_invoker, (void *)&scf_stats_p[i], + scf_stats_p[i].task); + if (firsterr) + goto unwind; + } + if (stat_interval > 0) { + firsterr = torture_create_kthread(scf_torture_stats, NULL, scf_torture_stats_task); + if (firsterr) + goto unwind; + } + + torture_init_end(); + return 0; + +unwind: + torture_init_end(); + scf_torture_cleanup(); + return firsterr; +} + +module_init(scf_torture_init); +module_exit(scf_torture_cleanup); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index e068c3c7189a..0c3a6c752ede 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1367,6 +1367,16 @@ config WW_MUTEX_SELFTEST Say M if you want these self tests to build as a module. Say N if you are unsure. +config SCF_TORTURE_TEST + tristate "torture tests for smp_call_function*()" + depends on DEBUG_KERNEL + select TORTURE_TEST + help + This option provides a kernel module that runs torture tests + on the smp_call_function() family of primitives. The kernel + module may be built after the fact on the running kernel to + be tested, if desired. + endmenu # lock debugging config TRACE_IRQFLAGS -- cgit v1.2.3 From 4718a471f1a7fc5cc943377c09300bb35138daf9 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Mon, 24 Aug 2020 23:25:17 -0400 Subject: netlink: remove duplicated nla_need_padding_for_64bit() check The need for padding 64bit is implicitly checked by nla_align_64bit(), so remove this explicit one. Signed-off-by: Miaohe Lin Signed-off-by: David S. Miller --- lib/nlattr.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/nlattr.c b/lib/nlattr.c index 665bdaff02c8..80ff9fe83696 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -834,8 +834,7 @@ EXPORT_SYMBOL(__nla_reserve); struct nlattr *__nla_reserve_64bit(struct sk_buff *skb, int attrtype, int attrlen, int padattr) { - if (nla_need_padding_for_64bit(skb)) - nla_align_64bit(skb, padattr); + nla_align_64bit(skb, padattr); return __nla_reserve(skb, attrtype, attrlen); } -- cgit v1.2.3 From 695afd3d7d58ec3044d25c9d2fe384ab8627fd20 Mon Sep 17 00:00:00 2001 From: Sedat Dilek Date: Sun, 16 Aug 2020 14:32:44 +0200 Subject: kbuild: Simplify DEBUG_INFO Kconfig handling While playing with [1] I saw that the handling of CONFIG_DEBUG_INFO can be simplified. [1] https://patchwork.kernel.org/patch/11716107/ Signed-off-by: Sedat Dilek Signed-off-by: Masahiro Yamada --- Makefile | 6 +++++- lib/Kconfig.debug | 10 ++++------ 2 files changed, 9 insertions(+), 7 deletions(-) (limited to 'lib') diff --git a/Makefile b/Makefile index f21168154160..4ef6a73e48a7 100644 --- a/Makefile +++ b/Makefile @@ -816,13 +816,15 @@ endif DEBUG_CFLAGS := $(call cc-option, -fno-var-tracking-assignments) ifdef CONFIG_DEBUG_INFO + ifdef CONFIG_DEBUG_INFO_SPLIT DEBUG_CFLAGS += -gsplit-dwarf else DEBUG_CFLAGS += -g endif + KBUILD_AFLAGS += -Wa,-gdwarf-2 -endif + ifdef CONFIG_DEBUG_INFO_DWARF4 DEBUG_CFLAGS += -gdwarf-4 endif @@ -838,6 +840,8 @@ KBUILD_AFLAGS += -gz=zlib KBUILD_LDFLAGS += --compress-debug-sections=zlib endif +endif # CONFIG_DEBUG_INFO + KBUILD_CFLAGS += $(DEBUG_CFLAGS) export DEBUG_CFLAGS diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index e068c3c7189a..60f3837f608c 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -212,9 +212,10 @@ config DEBUG_INFO If unsure, say N. +if DEBUG_INFO + config DEBUG_INFO_REDUCED bool "Reduce debugging information" - depends on DEBUG_INFO help If you say Y here gcc is instructed to generate less debugging information for structure types. This means that tools that @@ -227,7 +228,6 @@ config DEBUG_INFO_REDUCED config DEBUG_INFO_COMPRESSED bool "Compressed debugging information" - depends on DEBUG_INFO depends on $(cc-option,-gz=zlib) depends on $(ld-option,--compress-debug-sections=zlib) help @@ -243,7 +243,6 @@ config DEBUG_INFO_COMPRESSED config DEBUG_INFO_SPLIT bool "Produce split debuginfo in .dwo files" - depends on DEBUG_INFO depends on $(cc-option,-gsplit-dwarf) help Generate debug info into separate .dwo files. This significantly @@ -259,7 +258,6 @@ config DEBUG_INFO_SPLIT config DEBUG_INFO_DWARF4 bool "Generate dwarf4 debuginfo" - depends on DEBUG_INFO depends on $(cc-option,-gdwarf-4) help Generate dwarf4 debug info. This requires recent versions @@ -269,7 +267,6 @@ config DEBUG_INFO_DWARF4 config DEBUG_INFO_BTF bool "Generate BTF typeinfo" - depends on DEBUG_INFO depends on !DEBUG_INFO_SPLIT && !DEBUG_INFO_REDUCED depends on !GCC_PLUGIN_RANDSTRUCT || COMPILE_TEST help @@ -279,7 +276,6 @@ config DEBUG_INFO_BTF config GDB_SCRIPTS bool "Provide GDB scripts for kernel debugging" - depends on DEBUG_INFO help This creates the required links to GDB helper scripts in the build directory. If you load vmlinux into gdb, the helper @@ -288,6 +284,8 @@ config GDB_SCRIPTS instance. See Documentation/dev-tools/gdb-kernel-debugging.rst for further details. +endif # DEBUG_INFO + config ENABLE_MUST_CHECK bool "Enable __must_check logic" default y -- cgit v1.2.3 From 1c4dd334df3a0627ff57b35612057e2b497e373b Mon Sep 17 00:00:00 2001 From: Paul Cercueil Date: Tue, 1 Sep 2020 16:26:50 +0200 Subject: lib: decompress_unzstd: Limit output size The zstd decompression code, as it is right now, will most likely fail on 32-bit systems, as the default output buffer size causes the buffer's end address to overflow. Address this issue by setting a sane default to the default output size, with a value that won't overflow the buffer's end address. Signed-off-by: Paul Cercueil Reviewed-by: Nick Terrell Signed-off-by: Thomas Bogendoerfer --- lib/decompress_unzstd.c | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/decompress_unzstd.c b/lib/decompress_unzstd.c index 0ad2c15479ed..790abc472f5b 100644 --- a/lib/decompress_unzstd.c +++ b/lib/decompress_unzstd.c @@ -178,8 +178,13 @@ static int INIT __unzstd(unsigned char *in_buf, long in_len, int err; size_t ret; + /* + * ZSTD decompression code won't be happy if the buffer size is so big + * that its end address overflows. When the size is not provided, make + * it as big as possible without having the end address overflow. + */ if (out_len == 0) - out_len = LONG_MAX; /* no limit */ + out_len = UINTPTR_MAX - (uintptr_t)out_buf; if (fill == NULL && flush == NULL) /* -- cgit v1.2.3 From 35feb60474bf4f7fa7840e14fc7fd344996b919d Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 30 Jun 2020 13:22:54 -0700 Subject: kernel/smp: Provide CSD lock timeout diagnostics This commit causes csd_lock_wait() to emit diagnostics when a CPU fails to respond quickly enough to one of the smp_call_function() family of function calls. These diagnostics are enabled by a new CSD_LOCK_WAIT_DEBUG Kconfig option that depends on DEBUG_KERNEL. This commit was inspired by an earlier patch by Josef Bacik. [ paulmck: Fix for syzbot+0f719294463916a3fc0e@syzkaller.appspotmail.com ] [ paulmck: Fix KASAN use-after-free issue reported by Qian Cai. ] [ paulmck: Fix botched nr_cpu_ids comparison per Dan Carpenter. ] [ paulmck: Apply Peter Zijlstra feedback. ] Link: https://lore.kernel.org/lkml/00000000000042f21905a991ecea@google.com Link: https://lore.kernel.org/lkml/0000000000002ef21705a9933cf3@google.com Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Thomas Gleixner Cc: Sebastian Andrzej Siewior Signed-off-by: Paul E. McKenney --- kernel/smp.c | 132 +++++++++++++++++++++++++++++++++++++++++++++++++++++- lib/Kconfig.debug | 11 +++++ 2 files changed, 141 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/kernel/smp.c b/kernel/smp.c index 865a876f83ce..c5d31885bd30 100644 --- a/kernel/smp.c +++ b/kernel/smp.c @@ -20,6 +20,9 @@ #include #include #include +#include +#include +#include #include "smpboot.h" #include "sched/smp.h" @@ -96,6 +99,103 @@ void __init call_function_init(void) smpcfd_prepare_cpu(smp_processor_id()); } +#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG + +static DEFINE_PER_CPU(call_single_data_t *, cur_csd); +static DEFINE_PER_CPU(smp_call_func_t, cur_csd_func); +static DEFINE_PER_CPU(void *, cur_csd_info); + +#define CSD_LOCK_TIMEOUT (5ULL * NSEC_PER_SEC) +atomic_t csd_bug_count = ATOMIC_INIT(0); + +/* Record current CSD work for current CPU, NULL to erase. */ +static void csd_lock_record(call_single_data_t *csd) +{ + if (!csd) { + smp_mb(); /* NULL cur_csd after unlock. */ + __this_cpu_write(cur_csd, NULL); + return; + } + __this_cpu_write(cur_csd_func, csd->func); + __this_cpu_write(cur_csd_info, csd->info); + smp_wmb(); /* func and info before csd. */ + __this_cpu_write(cur_csd, csd); + smp_mb(); /* Update cur_csd before function call. */ + /* Or before unlock, as the case may be. */ +} + +static __always_inline int csd_lock_wait_getcpu(call_single_data_t *csd) +{ + unsigned int csd_type; + + csd_type = CSD_TYPE(csd); + if (csd_type == CSD_TYPE_ASYNC || csd_type == CSD_TYPE_SYNC) + return csd->dst; /* Other CSD_TYPE_ values might not have ->dst. */ + return -1; +} + +/* + * Complain if too much time spent waiting. Note that only + * the CSD_TYPE_SYNC/ASYNC types provide the destination CPU, + * so waiting on other types gets much less information. + */ +static __always_inline bool csd_lock_wait_toolong(call_single_data_t *csd, u64 ts0, u64 *ts1, int *bug_id) +{ + int cpu = -1; + int cpux; + bool firsttime; + u64 ts2, ts_delta; + call_single_data_t *cpu_cur_csd; + unsigned int flags = READ_ONCE(csd->flags); + + if (!(flags & CSD_FLAG_LOCK)) { + if (!unlikely(*bug_id)) + return true; + cpu = csd_lock_wait_getcpu(csd); + pr_alert("csd: CSD lock (#%d) got unstuck on CPU#%02d, CPU#%02d released the lock.\n", + *bug_id, raw_smp_processor_id(), cpu); + return true; + } + + ts2 = sched_clock(); + ts_delta = ts2 - *ts1; + if (likely(ts_delta <= CSD_LOCK_TIMEOUT)) + return false; + + firsttime = !*bug_id; + if (firsttime) + *bug_id = atomic_inc_return(&csd_bug_count); + cpu = csd_lock_wait_getcpu(csd); + if (WARN_ONCE(cpu < 0 || cpu >= nr_cpu_ids, "%s: cpu = %d\n", __func__, cpu)) + cpux = 0; + else + cpux = cpu; + cpu_cur_csd = smp_load_acquire(&per_cpu(cur_csd, cpux)); /* Before func and info. */ + pr_alert("csd: %s non-responsive CSD lock (#%d) on CPU#%d, waiting %llu ns for CPU#%02d %pS(%ps).\n", + firsttime ? "Detected" : "Continued", *bug_id, raw_smp_processor_id(), ts2 - ts0, + cpu, csd->func, csd->info); + if (cpu_cur_csd && csd != cpu_cur_csd) { + pr_alert("\tcsd: CSD lock (#%d) handling prior %pS(%ps) request.\n", + *bug_id, READ_ONCE(per_cpu(cur_csd_func, cpux)), + READ_ONCE(per_cpu(cur_csd_info, cpux))); + } else { + pr_alert("\tcsd: CSD lock (#%d) %s.\n", + *bug_id, !cpu_cur_csd ? "unresponsive" : "handling this request"); + } + if (cpu >= 0) { + if (!trigger_single_cpu_backtrace(cpu)) + dump_cpu_task(cpu); + if (!cpu_cur_csd) { + pr_alert("csd: Re-sending CSD lock (#%d) IPI from CPU#%02d to CPU#%02d\n", *bug_id, raw_smp_processor_id(), cpu); + arch_send_call_function_single_ipi(cpu); + } + } + dump_stack(); + *ts1 = ts2; + + return false; +} + /* * csd_lock/csd_unlock used to serialize access to per-cpu csd resources * @@ -103,10 +203,30 @@ void __init call_function_init(void) * previous function call. For multi-cpu calls its even more interesting * as we'll have to ensure no other cpu is observing our csd. */ +static __always_inline void csd_lock_wait(call_single_data_t *csd) +{ + int bug_id = 0; + u64 ts0, ts1; + + ts1 = ts0 = sched_clock(); + for (;;) { + if (csd_lock_wait_toolong(csd, ts0, &ts1, &bug_id)) + break; + cpu_relax(); + } + smp_acquire__after_ctrl_dep(); +} + +#else +static void csd_lock_record(call_single_data_t *csd) +{ +} + static __always_inline void csd_lock_wait(call_single_data_t *csd) { smp_cond_load_acquire(&csd->flags, !(VAL & CSD_FLAG_LOCK)); } +#endif static __always_inline void csd_lock(call_single_data_t *csd) { @@ -166,9 +286,11 @@ static int generic_exec_single(int cpu, call_single_data_t *csd) * We can unlock early even for the synchronous on-stack case, * since we're doing this from the same CPU.. */ + csd_lock_record(csd); csd_unlock(csd); local_irq_save(flags); func(info); + csd_lock_record(NULL); local_irq_restore(flags); return 0; } @@ -268,8 +390,10 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline) entry = &csd_next->llist; } + csd_lock_record(csd); func(info); csd_unlock(csd); + csd_lock_record(NULL); } else { prev = &csd->llist; } @@ -296,8 +420,10 @@ static void flush_smp_call_function_queue(bool warn_cpu_offline) smp_call_func_t func = csd->func; void *info = csd->info; + csd_lock_record(csd); csd_unlock(csd); func(info); + csd_lock_record(NULL); } else if (type == CSD_TYPE_IRQ_WORK) { irq_work_single(csd); } @@ -375,7 +501,8 @@ int smp_call_function_single(int cpu, smp_call_func_t func, void *info, csd->func = func; csd->info = info; -#ifdef CONFIG_64BIT +#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG + csd->src = smp_processor_id(); csd->dst = cpu; #endif @@ -543,7 +670,8 @@ static void smp_call_function_many_cond(const struct cpumask *mask, csd->flags |= CSD_TYPE_SYNC; csd->func = func; csd->info = info; -#ifdef CONFIG_64BIT +#ifdef CONFIG_CSD_LOCK_WAIT_DEBUG + csd->src = smp_processor_id(); csd->dst = cpu; #endif if (llist_add(&csd->llist, &per_cpu(call_single_queue, cpu))) diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index e068c3c7189a..86a35fdfe021 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1367,6 +1367,17 @@ config WW_MUTEX_SELFTEST Say M if you want these self tests to build as a module. Say N if you are unsure. +config CSD_LOCK_WAIT_DEBUG + bool "Debugging for csd_lock_wait(), called from smp_call_function*()" + depends on DEBUG_KERNEL + depends on 64BIT + default n + help + This option enables debug prints when CPUs are slow to respond + to the smp_call_function*() IPI wrappers. These debug prints + include the IPI handler function currently executing (if any) + and relevant stack traces. + endmenu # lock debugging config TRACE_IRQFLAGS -- cgit v1.2.3 From 81b1e242b8bdc1f5094b8d172cdc0c032fc761c1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 3 Sep 2020 16:22:36 +0200 Subject: test_bitmap: remove user bitmap tests We can't run the tests for userspace bitmap parsing if set_fs() doesn't exist, and it is about to go away for x86, powerpc with other major architectures to follow. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- lib/test_bitmap.c | 91 +++++++++++++------------------------------------------ 1 file changed, 21 insertions(+), 70 deletions(-) (limited to 'lib') diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c index df903c53952b..4425a1dd4ef1 100644 --- a/lib/test_bitmap.c +++ b/lib/test_bitmap.c @@ -354,50 +354,37 @@ static const struct test_bitmap_parselist parselist_tests[] __initconst = { }; -static void __init __test_bitmap_parselist(int is_user) +static void __init test_bitmap_parselist(void) { int i; int err; ktime_t time; DECLARE_BITMAP(bmap, 2048); - char *mode = is_user ? "_user" : ""; for (i = 0; i < ARRAY_SIZE(parselist_tests); i++) { #define ptest parselist_tests[i] - if (is_user) { - mm_segment_t orig_fs = get_fs(); - size_t len = strlen(ptest.in); - - set_fs(KERNEL_DS); - time = ktime_get(); - err = bitmap_parselist_user((__force const char __user *)ptest.in, len, - bmap, ptest.nbits); - time = ktime_get() - time; - set_fs(orig_fs); - } else { - time = ktime_get(); - err = bitmap_parselist(ptest.in, bmap, ptest.nbits); - time = ktime_get() - time; - } + time = ktime_get(); + err = bitmap_parselist(ptest.in, bmap, ptest.nbits); + time = ktime_get() - time; if (err != ptest.errno) { - pr_err("parselist%s: %d: input is %s, errno is %d, expected %d\n", - mode, i, ptest.in, err, ptest.errno); + pr_err("parselist: %d: input is %s, errno is %d, expected %d\n", + i, ptest.in, err, ptest.errno); continue; } if (!err && ptest.expected && !__bitmap_equal(bmap, ptest.expected, ptest.nbits)) { - pr_err("parselist%s: %d: input is %s, result is 0x%lx, expected 0x%lx\n", - mode, i, ptest.in, bmap[0], + pr_err("parselist: %d: input is %s, result is 0x%lx, expected 0x%lx\n", + i, ptest.in, bmap[0], *ptest.expected); continue; } if (ptest.flags & PARSE_TIME) - pr_err("parselist%s: %d: input is '%s' OK, Time: %llu\n", - mode, i, ptest.in, time); + pr_err("parselist: %d: input is '%s' OK, Time: %llu\n", + i, ptest.in, time); #undef ptest } @@ -443,75 +430,41 @@ static const struct test_bitmap_parselist parse_tests[] __initconst = { #undef step }; -static void __init __test_bitmap_parse(int is_user) +static void __init test_bitmap_parse(void) { int i; int err; ktime_t time; DECLARE_BITMAP(bmap, 2048); - char *mode = is_user ? "_user" : ""; for (i = 0; i < ARRAY_SIZE(parse_tests); i++) { struct test_bitmap_parselist test = parse_tests[i]; + size_t len = test.flags & NO_LEN ? UINT_MAX : strlen(test.in); - if (is_user) { - size_t len = strlen(test.in); - mm_segment_t orig_fs = get_fs(); - - set_fs(KERNEL_DS); - time = ktime_get(); - err = bitmap_parse_user((__force const char __user *)test.in, len, - bmap, test.nbits); - time = ktime_get() - time; - set_fs(orig_fs); - } else { - size_t len = test.flags & NO_LEN ? - UINT_MAX : strlen(test.in); - time = ktime_get(); - err = bitmap_parse(test.in, len, bmap, test.nbits); - time = ktime_get() - time; - } + time = ktime_get(); + err = bitmap_parse(test.in, len, bmap, test.nbits); + time = ktime_get() - time; if (err != test.errno) { - pr_err("parse%s: %d: input is %s, errno is %d, expected %d\n", - mode, i, test.in, err, test.errno); + pr_err("parse: %d: input is %s, errno is %d, expected %d\n", + i, test.in, err, test.errno); continue; } if (!err && test.expected && !__bitmap_equal(bmap, test.expected, test.nbits)) { - pr_err("parse%s: %d: input is %s, result is 0x%lx, expected 0x%lx\n", - mode, i, test.in, bmap[0], + pr_err("parse: %d: input is %s, result is 0x%lx, expected 0x%lx\n", + i, test.in, bmap[0], *test.expected); continue; } if (test.flags & PARSE_TIME) - pr_err("parse%s: %d: input is '%s' OK, Time: %llu\n", - mode, i, test.in, time); + pr_err("parse: %d: input is '%s' OK, Time: %llu\n", + i, test.in, time); } } -static void __init test_bitmap_parselist(void) -{ - __test_bitmap_parselist(0); -} - -static void __init test_bitmap_parselist_user(void) -{ - __test_bitmap_parselist(1); -} - -static void __init test_bitmap_parse(void) -{ - __test_bitmap_parse(0); -} - -static void __init test_bitmap_parse_user(void) -{ - __test_bitmap_parse(1); -} - #define EXP1_IN_BITS (sizeof(exp1) * 8) static void __init test_bitmap_arr32(void) @@ -675,9 +628,7 @@ static void __init selftest(void) test_replace(); test_bitmap_arr32(); test_bitmap_parse(); - test_bitmap_parse_user(); test_bitmap_parselist(); - test_bitmap_parselist_user(); test_mem_optimisations(); test_for_each_set_clump8(); test_bitmap_cut(); -- cgit v1.2.3 From f2d10ff4a903813df767a4b56b651a26b938df06 Mon Sep 17 00:00:00 2001 From: Daniel Thompson Date: Sun, 27 Sep 2020 22:15:29 +0100 Subject: kgdb: Honour the kprobe blocklist when setting breakpoints Currently kgdb has absolutely no safety rails in place to discourage or prevent a user from placing a breakpoint in dangerous places such as the debugger's own trap entry/exit and other places where it is not safe to take synchronous traps. Introduce a new config symbol KGDB_HONOUR_BLOCKLIST and modify the default implementation of kgdb_validate_break_address() so that we use the kprobe blocklist to prohibit instrumentation of critical functions if the config symbol is set. The config symbol dependencies are set to ensure that the blocklist will be enabled by default if we enable KGDB and are compiling for an architecture where we HAVE_KPROBES. Suggested-by: Peter Zijlstra Reviewed-by: Douglas Anderson Reviewed-by: Masami Hiramatsu Link: https://lore.kernel.org/r/20200927211531.1380577-2-daniel.thompson@linaro.org Signed-off-by: Daniel Thompson --- include/linux/kgdb.h | 18 ++++++++++++++++++ kernel/debug/debug_core.c | 4 ++++ kernel/debug/kdb/kdb_bp.c | 9 +++++++++ lib/Kconfig.kgdb | 15 +++++++++++++++ 4 files changed, 46 insertions(+) (limited to 'lib') diff --git a/include/linux/kgdb.h b/include/linux/kgdb.h index 477b8b7c908f..0d6cf64c8bb1 100644 --- a/include/linux/kgdb.h +++ b/include/linux/kgdb.h @@ -16,6 +16,7 @@ #include #include #include +#include #ifdef CONFIG_HAVE_ARCH_KGDB #include #endif @@ -335,6 +336,23 @@ extern int kgdb_nmicallin(int cpu, int trapnr, void *regs, int err_code, atomic_t *snd_rdy); extern void gdbstub_exit(int status); +/* + * kgdb and kprobes both use the same (kprobe) blocklist (which makes sense + * given they are both typically hooked up to the same trap meaning on most + * architectures one cannot be used to debug the other) + * + * However on architectures where kprobes is not (yet) implemented we permit + * breakpoints everywhere rather than blocking everything by default. + */ +static inline bool kgdb_within_blocklist(unsigned long addr) +{ +#ifdef CONFIG_KGDB_HONOUR_BLOCKLIST + return within_kprobe_blacklist(addr); +#else + return false; +#endif +} + extern int kgdb_single_step; extern atomic_t kgdb_active; #define in_dbg_master() \ diff --git a/kernel/debug/debug_core.c b/kernel/debug/debug_core.c index 165e5b0c2083..6b9383fa8278 100644 --- a/kernel/debug/debug_core.c +++ b/kernel/debug/debug_core.c @@ -180,6 +180,10 @@ int __weak kgdb_validate_break_address(unsigned long addr) { struct kgdb_bkpt tmp; int err; + + if (kgdb_within_blocklist(addr)) + return -EINVAL; + /* Validate setting the breakpoint and then removing it. If the * remove fails, the kernel needs to emit a bad message because we * are deep trouble not being able to put things back the way we diff --git a/kernel/debug/kdb/kdb_bp.c b/kernel/debug/kdb/kdb_bp.c index d7ebb2c79cb8..ec4940146612 100644 --- a/kernel/debug/kdb/kdb_bp.c +++ b/kernel/debug/kdb/kdb_bp.c @@ -306,6 +306,15 @@ static int kdb_bp(int argc, const char **argv) if (!template.bp_addr) return KDB_BADINT; + /* + * This check is redundant (since the breakpoint machinery should + * be doing the same check during kdb_bp_install) but gives the + * user immediate feedback. + */ + diag = kgdb_validate_break_address(template.bp_addr); + if (diag) + return diag; + /* * Find an empty bp structure to allocate */ diff --git a/lib/Kconfig.kgdb b/lib/Kconfig.kgdb index 256f2486f9bd..05dae05b6cc9 100644 --- a/lib/Kconfig.kgdb +++ b/lib/Kconfig.kgdb @@ -24,6 +24,21 @@ menuconfig KGDB if KGDB +config KGDB_HONOUR_BLOCKLIST + bool "KGDB: use kprobe blocklist to prohibit unsafe breakpoints" + depends on HAVE_KPROBES + depends on MODULES + select KPROBES + default y + help + If set to Y the debug core will use the kprobe blocklist to + identify symbols where it is unsafe to set breakpoints. + In particular this disallows instrumentation of functions + called during debug trap handling and thus makes it very + difficult to inadvertently provoke recursive trap handling. + + If unsure, say Y. + config KGDB_SERIAL_CONSOLE tristate "KGDB: use kgdb over the serial console" select CONSOLE_POLL -- cgit v1.2.3 From 07da1223ec939982497db3caccd6215b55acc35c Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Sun, 4 Oct 2020 18:43:37 +0300 Subject: lib/scatterlist: Add support in dynamic allocation of SG table from pages Extend __sg_alloc_table_from_pages to support dynamic allocation of SG table from pages. It should be used by drivers that can't supply all the pages at one time. This function returns the last populated SGE in the table. Users should pass it as an argument to the function from the second call and forward. As before, nents will be equal to the number of populated SGEs (chunks). With this new extension, drivers can benefit the optimization of merging contiguous pages without a need to allocate all pages in advance and hold them in a large buffer. E.g. with the Infiniband driver that allocates a single page for hold the pages. For 1TB memory registration, the temporary buffer would consume only 4KB, instead of 2GB. Link: https://lore.kernel.org/r/20201004154340.1080481-2-leon@kernel.org Signed-off-by: Maor Gottlieb Reviewed-by: Christoph Hellwig Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- drivers/gpu/drm/i915/gem/i915_gem_userptr.c | 12 +-- drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c | 15 ++-- include/linux/scatterlist.h | 38 +++++---- lib/scatterlist.c | 125 ++++++++++++++++++++++------ tools/testing/scatterlist/main.c | 9 +- 5 files changed, 142 insertions(+), 57 deletions(-) (limited to 'lib') diff --git a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c index 12b30075134a..f2eaed6aca3d 100644 --- a/drivers/gpu/drm/i915/gem/i915_gem_userptr.c +++ b/drivers/gpu/drm/i915/gem/i915_gem_userptr.c @@ -403,6 +403,7 @@ __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj, unsigned int max_segment = i915_sg_segment_size(); struct sg_table *st; unsigned int sg_page_sizes; + struct scatterlist *sg; int ret; st = kmalloc(sizeof(*st), GFP_KERNEL); @@ -410,13 +411,12 @@ __i915_gem_userptr_alloc_pages(struct drm_i915_gem_object *obj, return ERR_PTR(-ENOMEM); alloc_table: - ret = __sg_alloc_table_from_pages(st, pvec, num_pages, - 0, num_pages << PAGE_SHIFT, - max_segment, - GFP_KERNEL); - if (ret) { + sg = __sg_alloc_table_from_pages(st, pvec, num_pages, 0, + num_pages << PAGE_SHIFT, max_segment, + NULL, 0, GFP_KERNEL); + if (IS_ERR(sg)) { kfree(st); - return ERR_PTR(ret); + return ERR_CAST(sg); } ret = i915_gem_gtt_prepare_pages(obj, st); diff --git a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c index ab524ab3b0b4..f22acd398b1f 100644 --- a/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c +++ b/drivers/gpu/drm/vmwgfx/vmwgfx_ttm_buffer.c @@ -419,6 +419,7 @@ static int vmw_ttm_map_dma(struct vmw_ttm_tt *vmw_tt) int ret = 0; static size_t sgl_size; static size_t sgt_size; + struct scatterlist *sg; if (vmw_tt->mapped) return 0; @@ -441,13 +442,15 @@ static int vmw_ttm_map_dma(struct vmw_ttm_tt *vmw_tt) if (unlikely(ret != 0)) return ret; - ret = __sg_alloc_table_from_pages - (&vmw_tt->sgt, vsgt->pages, vsgt->num_pages, 0, - (unsigned long) vsgt->num_pages << PAGE_SHIFT, - dma_get_max_seg_size(dev_priv->dev->dev), - GFP_KERNEL); - if (unlikely(ret != 0)) + sg = __sg_alloc_table_from_pages(&vmw_tt->sgt, vsgt->pages, + vsgt->num_pages, 0, + (unsigned long) vsgt->num_pages << PAGE_SHIFT, + dma_get_max_seg_size(dev_priv->dev->dev), + NULL, 0, GFP_KERNEL); + if (IS_ERR(sg)) { + ret = PTR_ERR(sg); goto out_sg_alloc_fail; + } if (vsgt->num_pages > vmw_tt->sgt.nents) { uint64_t over_alloc = diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 45cf7b69d852..36c47e7e66a2 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -165,6 +165,22 @@ static inline void sg_set_buf(struct scatterlist *sg, const void *buf, #define for_each_sgtable_dma_sg(sgt, sg, i) \ for_each_sg((sgt)->sgl, sg, (sgt)->nents, i) +static inline void __sg_chain(struct scatterlist *chain_sg, + struct scatterlist *sgl) +{ + /* + * offset and length are unused for chain entry. Clear them. + */ + chain_sg->offset = 0; + chain_sg->length = 0; + + /* + * Set lowest bit to indicate a link pointer, and make sure to clear + * the termination bit if it happens to be set. + */ + chain_sg->page_link = ((unsigned long) sgl | SG_CHAIN) & ~SG_END; +} + /** * sg_chain - Chain two sglists together * @prv: First scatterlist @@ -178,18 +194,7 @@ static inline void sg_set_buf(struct scatterlist *sg, const void *buf, static inline void sg_chain(struct scatterlist *prv, unsigned int prv_nents, struct scatterlist *sgl) { - /* - * offset and length are unused for chain entry. Clear them. - */ - prv[prv_nents - 1].offset = 0; - prv[prv_nents - 1].length = 0; - - /* - * Set lowest bit to indicate a link pointer, and make sure to clear - * the termination bit if it happens to be set. - */ - prv[prv_nents - 1].page_link = ((unsigned long) sgl | SG_CHAIN) - & ~SG_END; + __sg_chain(&prv[prv_nents - 1], sgl); } /** @@ -286,10 +291,11 @@ void sg_free_table(struct sg_table *); int __sg_alloc_table(struct sg_table *, unsigned int, unsigned int, struct scatterlist *, unsigned int, gfp_t, sg_alloc_fn *); int sg_alloc_table(struct sg_table *, unsigned int, gfp_t); -int __sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages, - unsigned int n_pages, unsigned int offset, - unsigned long size, unsigned int max_segment, - gfp_t gfp_mask); +struct scatterlist *__sg_alloc_table_from_pages(struct sg_table *sgt, + struct page **pages, unsigned int n_pages, unsigned int offset, + unsigned long size, unsigned int max_segment, + struct scatterlist *prv, unsigned int left_pages, + gfp_t gfp_mask); int sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages, unsigned int n_pages, unsigned int offset, unsigned long size, gfp_t gfp_mask); diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 5d63a8857f36..e102fdfaa75b 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -365,6 +365,37 @@ int sg_alloc_table(struct sg_table *table, unsigned int nents, gfp_t gfp_mask) } EXPORT_SYMBOL(sg_alloc_table); +static struct scatterlist *get_next_sg(struct sg_table *table, + struct scatterlist *cur, + unsigned long needed_sges, + gfp_t gfp_mask) +{ + struct scatterlist *new_sg, *next_sg; + unsigned int alloc_size; + + if (cur) { + next_sg = sg_next(cur); + /* Check if last entry should be keeped for chainning */ + if (!sg_is_last(next_sg) || needed_sges == 1) + return next_sg; + } + + alloc_size = min_t(unsigned long, needed_sges, SG_MAX_SINGLE_ALLOC); + new_sg = sg_kmalloc(alloc_size, gfp_mask); + if (!new_sg) + return ERR_PTR(-ENOMEM); + sg_init_table(new_sg, alloc_size); + if (cur) { + __sg_chain(next_sg, new_sg); + table->orig_nents += alloc_size - 1; + } else { + table->sgl = new_sg; + table->orig_nents = alloc_size; + table->nents = 0; + } + return new_sg; +} + /** * __sg_alloc_table_from_pages - Allocate and initialize an sg table from * an array of pages @@ -374,29 +405,63 @@ EXPORT_SYMBOL(sg_alloc_table); * @offset: Offset from start of the first page to the start of a buffer * @size: Number of valid bytes in the buffer (after offset) * @max_segment: Maximum size of a scatterlist node in bytes (page aligned) + * @prv: Last populated sge in sgt + * @left_pages: Left pages caller have to set after this call * @gfp_mask: GFP allocation mask * - * Description: - * Allocate and initialize an sg table from a list of pages. Contiguous - * ranges of the pages are squashed into a single scatterlist node up to the - * maximum size specified in @max_segment. An user may provide an offset at a - * start and a size of valid data in a buffer specified by the page array. - * The returned sg table is released by sg_free_table. + * Description: + * If @prv is NULL, allocate and initialize an sg table from a list of pages, + * else reuse the scatterlist passed in at @prv. + * Contiguous ranges of the pages are squashed into a single scatterlist + * entry up to the maximum size specified in @max_segment. A user may + * provide an offset at a start and a size of valid data in a buffer + * specified by the page array. * * Returns: - * 0 on success, negative error on failure + * Last SGE in sgt on success, PTR_ERR on otherwise. + * The allocation in @sgt must be released by sg_free_table. + * + * Notes: + * If this function returns non-0 (eg failure), the caller must call + * sg_free_table() to cleanup any leftover allocations. */ -int __sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages, - unsigned int n_pages, unsigned int offset, - unsigned long size, unsigned int max_segment, - gfp_t gfp_mask) +struct scatterlist *__sg_alloc_table_from_pages(struct sg_table *sgt, + struct page **pages, unsigned int n_pages, unsigned int offset, + unsigned long size, unsigned int max_segment, + struct scatterlist *prv, unsigned int left_pages, + gfp_t gfp_mask) { - unsigned int chunks, cur_page, seg_len, i; - int ret; - struct scatterlist *s; + unsigned int chunks, cur_page, seg_len, i, prv_len = 0; + unsigned int added_nents = 0; + struct scatterlist *s = prv; if (WARN_ON(!max_segment || offset_in_page(max_segment))) - return -EINVAL; + return ERR_PTR(-EINVAL); + + if (IS_ENABLED(CONFIG_ARCH_NO_SG_CHAIN) && prv) + return ERR_PTR(-EOPNOTSUPP); + + if (prv) { + unsigned long paddr = (page_to_pfn(sg_page(prv)) * PAGE_SIZE + + prv->offset + prv->length) / + PAGE_SIZE; + + if (WARN_ON(offset)) + return ERR_PTR(-EINVAL); + + /* Merge contiguous pages into the last SG */ + prv_len = prv->length; + while (n_pages && page_to_pfn(pages[0]) == paddr) { + if (prv->length + PAGE_SIZE > max_segment) + break; + prv->length += PAGE_SIZE; + paddr++; + pages++; + n_pages--; + } + if (!n_pages) + goto out; + } /* compute number of contiguous chunks */ chunks = 1; @@ -410,13 +475,9 @@ int __sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages, } } - ret = sg_alloc_table(sgt, chunks, gfp_mask); - if (unlikely(ret)) - return ret; - /* merging chunks and putting them into the scatterlist */ cur_page = 0; - for_each_sg(sgt->sgl, s, sgt->orig_nents, i) { + for (i = 0; i < chunks; i++) { unsigned int j, chunk_size; /* look for the end of the current chunk */ @@ -429,15 +490,30 @@ int __sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages, break; } + /* Pass how many chunks might be left */ + s = get_next_sg(sgt, s, chunks - i + left_pages, gfp_mask); + if (IS_ERR(s)) { + /* + * Adjust entry length to be as before function was + * called. + */ + if (prv) + prv->length = prv_len; + return s; + } chunk_size = ((j - cur_page) << PAGE_SHIFT) - offset; sg_set_page(s, pages[cur_page], min_t(unsigned long, size, chunk_size), offset); + added_nents++; size -= chunk_size; offset = 0; cur_page = j; } - - return 0; + sgt->nents += added_nents; +out: + if (!left_pages) + sg_mark_end(s); + return s; } EXPORT_SYMBOL(__sg_alloc_table_from_pages); @@ -465,8 +541,9 @@ int sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages, unsigned int n_pages, unsigned int offset, unsigned long size, gfp_t gfp_mask) { - return __sg_alloc_table_from_pages(sgt, pages, n_pages, offset, size, - SCATTERLIST_MAX_SEGMENT, gfp_mask); + return PTR_ERR_OR_ZERO(__sg_alloc_table_from_pages(sgt, pages, n_pages, + offset, size, SCATTERLIST_MAX_SEGMENT, + NULL, 0, gfp_mask)); } EXPORT_SYMBOL(sg_alloc_table_from_pages); diff --git a/tools/testing/scatterlist/main.c b/tools/testing/scatterlist/main.c index c6db0a1989b2..b2c7e9f7b8d3 100644 --- a/tools/testing/scatterlist/main.c +++ b/tools/testing/scatterlist/main.c @@ -79,14 +79,13 @@ int main(void) for (i = 0, test = tests; test->expected_segments; test++, i++) { struct page *pages[MAX_PAGES]; struct sg_table st; - int ret; + struct scatterlist *sg; set_pages(pages, test->pfn, test->num_pages); - ret = __sg_alloc_table_from_pages(&st, pages, test->num_pages, - 0, test->size, test->max_seg, - GFP_KERNEL); - assert(ret == test->alloc_ret); + sg = __sg_alloc_table_from_pages(&st, pages, test->num_pages, 0, + test->size, test->max_seg, NULL, 0, GFP_KERNEL); + assert(PTR_ERR_OR_ZERO(sg) == test->alloc_ret); if (test->alloc_ret) continue; -- cgit v1.2.3 From bdbb4e29df8b790db50cb73ce25d23543329f05f Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 5 Oct 2020 15:07:38 -0700 Subject: netlink: add mask validation We don't have good validation policy for existing unsigned int attrs which serve as flags (for new ones we could use NLA_BITFIELD32). With increased use of policy dumping having the validation be expressed as part of the policy is important. Add validation policy in form of a mask of supported/valid bits. Support u64 in the uAPI to be future-proof, but really for now the embedded mask member can only hold 32 bits, so anything with bit 32+ set will always fail validation. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/net/netlink.h | 10 ++++++++++ include/uapi/linux/netlink.h | 2 ++ lib/nlattr.c | 36 ++++++++++++++++++++++++++++++++++++ net/netlink/policy.c | 8 ++++++++ 4 files changed, 56 insertions(+) (limited to 'lib') diff --git a/include/net/netlink.h b/include/net/netlink.h index c5aa46f379bc..2b9e41075f19 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -200,6 +200,7 @@ enum nla_policy_validation { NLA_VALIDATE_RANGE_WARN_TOO_LONG, NLA_VALIDATE_MIN, NLA_VALIDATE_MAX, + NLA_VALIDATE_MASK, NLA_VALIDATE_RANGE_PTR, NLA_VALIDATE_FUNCTION, }; @@ -317,6 +318,7 @@ struct nla_policy { u16 len; union { const u32 bitfield32_valid; + const u32 mask; const char *reject_message; const struct nla_policy *nested_policy; struct netlink_range_validation *range; @@ -368,6 +370,8 @@ struct nla_policy { (tp == NLA_S8 || tp == NLA_S16 || tp == NLA_S32 || tp == NLA_S64) #define __NLA_ENSURE(condition) BUILD_BUG_ON_ZERO(!(condition)) +#define NLA_ENSURE_UINT_TYPE(tp) \ + (__NLA_ENSURE(__NLA_IS_UINT_TYPE(tp)) + tp) #define NLA_ENSURE_UINT_OR_BINARY_TYPE(tp) \ (__NLA_ENSURE(__NLA_IS_UINT_TYPE(tp) || \ tp == NLA_MSECS || \ @@ -416,6 +420,12 @@ struct nla_policy { .max = _max, \ } +#define NLA_POLICY_MASK(tp, _mask) { \ + .type = NLA_ENSURE_UINT_TYPE(tp), \ + .validation_type = NLA_VALIDATE_MASK, \ + .mask = _mask, \ +} + #define NLA_POLICY_VALIDATE_FN(tp, fn, ...) { \ .type = NLA_ENSURE_NO_VALIDATION_PTR(tp), \ .validation_type = NLA_VALIDATE_FUNCTION, \ diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index eac8a6a648ea..d02e472ba54c 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -331,6 +331,7 @@ enum netlink_attribute_type { * the index, if limited inside the nesting (U32) * @NL_POLICY_TYPE_ATTR_BITFIELD32_MASK: valid mask for the * bitfield32 type (U32) + * @NL_POLICY_TYPE_ATTR_MASK: mask of valid bits for unsigned integers (U64) * @NL_POLICY_TYPE_ATTR_PAD: pad attribute for 64-bit alignment */ enum netlink_policy_type_attr { @@ -346,6 +347,7 @@ enum netlink_policy_type_attr { NL_POLICY_TYPE_ATTR_POLICY_MAXTYPE, NL_POLICY_TYPE_ATTR_BITFIELD32_MASK, NL_POLICY_TYPE_ATTR_PAD, + NL_POLICY_TYPE_ATTR_MASK, /* keep last */ __NL_POLICY_TYPE_ATTR_MAX, diff --git a/lib/nlattr.c b/lib/nlattr.c index 80ff9fe83696..9c99f5daa4d2 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -323,6 +323,37 @@ static int nla_validate_int_range(const struct nla_policy *pt, } } +static int nla_validate_mask(const struct nla_policy *pt, + const struct nlattr *nla, + struct netlink_ext_ack *extack) +{ + u64 value; + + switch (pt->type) { + case NLA_U8: + value = nla_get_u8(nla); + break; + case NLA_U16: + value = nla_get_u16(nla); + break; + case NLA_U32: + value = nla_get_u32(nla); + break; + case NLA_U64: + value = nla_get_u64(nla); + break; + default: + return -EINVAL; + } + + if (value & ~(u64)pt->mask) { + NL_SET_ERR_MSG_ATTR(extack, nla, "reserved bit set"); + return -EINVAL; + } + + return 0; +} + static int validate_nla(const struct nlattr *nla, int maxtype, const struct nla_policy *policy, unsigned int validate, struct netlink_ext_ack *extack, unsigned int depth) @@ -503,6 +534,11 @@ static int validate_nla(const struct nlattr *nla, int maxtype, if (err) return err; break; + case NLA_VALIDATE_MASK: + err = nla_validate_mask(pt, nla, extack); + if (err) + return err; + break; case NLA_VALIDATE_FUNCTION: if (pt->validate) { err = pt->validate(nla, extack); diff --git a/net/netlink/policy.c b/net/netlink/policy.c index cf23c0151721..ee26d01328ee 100644 --- a/net/netlink/policy.c +++ b/net/netlink/policy.c @@ -263,6 +263,14 @@ send_attribute: else type = NL_ATTR_TYPE_U64; + if (pt->validation_type == NLA_VALIDATE_MASK) { + if (nla_put_u64_64bit(skb, NL_POLICY_TYPE_ATTR_MASK, + pt->mask, + NL_POLICY_TYPE_ATTR_PAD)) + goto nla_put_failure; + break; + } + nla_get_range_unsigned(pt, &range); if (nla_put_u64_64bit(skb, NL_POLICY_TYPE_ATTR_MIN_VALUE_U, -- cgit v1.2.3 From dd841a749d1ded8e2e5facc4242ee0b6779fc0cb Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 14 Jun 2020 06:07:10 -0400 Subject: radix tree test suite: Fix compilation Introducing local_lock broke compilation; fix it all up. Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/radix-tree.h | 1 + lib/radix-tree.c | 1 - tools/testing/radix-tree/linux/kernel.h | 1 + tools/testing/radix-tree/linux/local_lock.h | 8 ++++++++ tools/testing/radix-tree/test.h | 4 ---- 5 files changed, 10 insertions(+), 5 deletions(-) create mode 100644 tools/testing/radix-tree/linux/local_lock.h (limited to 'lib') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index c2a9f7c90727..5c85059a92ba 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 8e4a3a4397f2..0f10485d46b6 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -20,7 +20,6 @@ #include #include #include -#include #include /* in_interrupt() */ #include #include diff --git a/tools/testing/radix-tree/linux/kernel.h b/tools/testing/radix-tree/linux/kernel.h index 4568248222ae..39867fd80c8f 100644 --- a/tools/testing/radix-tree/linux/kernel.h +++ b/tools/testing/radix-tree/linux/kernel.h @@ -22,4 +22,5 @@ #define __releases(x) #define __must_hold(x) +#define EXPORT_PER_CPU_SYMBOL_GPL(x) #endif /* _KERNEL_H */ diff --git a/tools/testing/radix-tree/linux/local_lock.h b/tools/testing/radix-tree/linux/local_lock.h new file mode 100644 index 000000000000..b3cf8b233ca4 --- /dev/null +++ b/tools/testing/radix-tree/linux/local_lock.h @@ -0,0 +1,8 @@ +#ifndef _LINUX_LOCAL_LOCK +#define _LINUX_LOCAL_LOCK +typedef struct { } local_lock_t; + +static inline void local_lock(local_lock_t *lock) { } +static inline void local_unlock(local_lock_t *lock) { } +#define INIT_LOCAL_LOCK(x) { } +#endif diff --git a/tools/testing/radix-tree/test.h b/tools/testing/radix-tree/test.h index 34dab4d18744..7ef7067e942c 100644 --- a/tools/testing/radix-tree/test.h +++ b/tools/testing/radix-tree/test.h @@ -56,8 +56,4 @@ int root_tag_get(struct radix_tree_root *root, unsigned int tag); unsigned long node_maxindex(struct radix_tree_node *); unsigned long shift_maxindex(unsigned int shift); int radix_tree_cpu_dead(unsigned int cpu); -struct radix_tree_preload { - unsigned nr; - struct radix_tree_node *nodes; -}; extern struct radix_tree_preload radix_tree_preloads; -- cgit v1.2.3 From a219b856a2b993da234108307be772448f22b0ce Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 2 Apr 2020 14:26:13 -0400 Subject: ida: Free allocated bitmap in error path If a bitmap needs to be allocated, and then by the time the thread is scheduled to be run again all the indices which would satisfy the allocation have been allocated then we would leak the allocation. Almost impossible to hit in practice, but a trivial fix. Found by Coverity. Fixes: f32f004cddf8 ("ida: Convert to XArray") Reported-by: coverity-bot Reviewed-by: Kees Cook Signed-off-by: Matthew Wilcox (Oracle) --- lib/idr.c | 1 + tools/testing/radix-tree/idr-test.c | 29 +++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+) (limited to 'lib') diff --git a/lib/idr.c b/lib/idr.c index c2cf2c52bbde..4d2eef0259d2 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -470,6 +470,7 @@ alloc: goto retry; nospc: xas_unlock_irqrestore(&xas, flags); + kfree(alloc); return -ENOSPC; } EXPORT_SYMBOL(ida_alloc_range); diff --git a/tools/testing/radix-tree/idr-test.c b/tools/testing/radix-tree/idr-test.c index 8995092d541e..3b796dd5e577 100644 --- a/tools/testing/radix-tree/idr-test.c +++ b/tools/testing/radix-tree/idr-test.c @@ -523,8 +523,27 @@ static void *ida_random_fn(void *arg) return NULL; } +static void *ida_leak_fn(void *arg) +{ + struct ida *ida = arg; + time_t s = time(NULL); + int i, ret; + + rcu_register_thread(); + + do for (i = 0; i < 1000; i++) { + ret = ida_alloc_range(ida, 128, 128, GFP_KERNEL); + if (ret >= 0) + ida_free(ida, 128); + } while (time(NULL) < s + 2); + + rcu_unregister_thread(); + return NULL; +} + void ida_thread_tests(void) { + DEFINE_IDA(ida); pthread_t threads[20]; int i; @@ -536,6 +555,16 @@ void ida_thread_tests(void) while (i--) pthread_join(threads[i], NULL); + + for (i = 0; i < ARRAY_SIZE(threads); i++) + if (pthread_create(&threads[i], NULL, ida_leak_fn, &ida)) { + perror("creating ida thread"); + exit(1); + } + + while (i--) + pthread_join(threads[i], NULL); + assert(ida_is_empty(&ida)); } void ida_tests(void) -- cgit v1.2.3 From 062b735912b9f3aa3e14cd02b5ede08cf8bc093f Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 31 Mar 2020 14:23:59 -0400 Subject: XArray: Test two more things about xa_cmpxchg 1. If we xa_cmpxchg() an entry in, it marks the index as not free. 2. If we xa_cmpxchg() NULL in, it marks the index as free. Signed-off-by: Matthew Wilcox (Oracle) --- lib/test_xarray.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'lib') diff --git a/lib/test_xarray.c b/lib/test_xarray.c index d4f97925dbd8..9fc3da430aba 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -393,6 +393,9 @@ static noinline void check_cmpxchg(struct xarray *xa) XA_BUG_ON(xa, xa_cmpxchg(xa, 12345678, FIVE, LOTS, GFP_KERNEL) != FIVE); XA_BUG_ON(xa, xa_cmpxchg(xa, 5, FIVE, NULL, GFP_KERNEL) != NULL); XA_BUG_ON(xa, xa_cmpxchg(xa, 5, NULL, FIVE, GFP_KERNEL) != NULL); + XA_BUG_ON(xa, xa_insert(xa, 5, FIVE, GFP_KERNEL) != -EBUSY); + XA_BUG_ON(xa, xa_cmpxchg(xa, 5, FIVE, NULL, GFP_KERNEL) != FIVE); + XA_BUG_ON(xa, xa_insert(xa, 5, FIVE, GFP_KERNEL) == -EBUSY); xa_erase_index(xa, 12345678); xa_erase_index(xa, 5); XA_BUG_ON(xa, !xa_empty(xa)); -- cgit v1.2.3 From 04e9e9bb8470bea74eafad1cafd552f3f06c32d9 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Sun, 14 Jun 2020 21:52:04 -0400 Subject: XArray: Test marked multiorder iterations Demonstrate that starting a marked iteration partway through a marked multi-order entry works. Signed-off-by: Matthew Wilcox (Oracle) --- lib/test_xarray.c | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'lib') diff --git a/lib/test_xarray.c b/lib/test_xarray.c index 9fc3da430aba..1122c4453c87 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -289,6 +289,27 @@ static noinline void check_xa_mark_2(struct xarray *xa) xa_destroy(xa); } +static noinline void check_xa_mark_3(struct xarray *xa) +{ +#ifdef CONFIG_XARRAY_MULTI + XA_STATE(xas, xa, 0x41); + void *entry; + int count = 0; + + xa_store_order(xa, 0x40, 2, xa_mk_index(0x40), GFP_KERNEL); + xa_set_mark(xa, 0x41, XA_MARK_0); + + rcu_read_lock(); + xas_for_each_marked(&xas, entry, ULONG_MAX, XA_MARK_0) { + count++; + XA_BUG_ON(xa, entry != xa_mk_index(0x40)); + } + XA_BUG_ON(xa, count != 1); + rcu_read_unlock(); + xa_destroy(xa); +#endif +} + static noinline void check_xa_mark(struct xarray *xa) { unsigned long index; @@ -297,6 +318,7 @@ static noinline void check_xa_mark(struct xarray *xa) check_xa_mark_1(xa, index); check_xa_mark_2(xa); + check_xa_mark_3(xa); } static noinline void check_xa_shrink(struct xarray *xa) -- cgit v1.2.3 From aac35468ca20a3a0e75a24c13c0e31610727f120 Mon Sep 17 00:00:00 2001 From: Alan Maguire Date: Tue, 4 Aug 2020 13:47:42 -0700 Subject: kunit: test: create a single centralized executor for all tests Add a centralized executor to dispatch tests rather than relying on late_initcall to schedule each test suite separately. Centralized execution is for built-in tests only; modules will execute tests when loaded. Signed-off-by: Alan Maguire Co-developed-by: Iurii Zaikin Signed-off-by: Iurii Zaikin Co-developed-by: Brendan Higgins Signed-off-by: Brendan Higgins Reviewed-by: Stephen Boyd Reviewed-by: Kees Cook Signed-off-by: Shuah Khan --- include/kunit/test.h | 67 +++++++++++++++++++++++++++++++++++----------------- lib/kunit/Makefile | 3 ++- lib/kunit/executor.c | 28 ++++++++++++++++++++++ lib/kunit/test.c | 2 +- 4 files changed, 76 insertions(+), 24 deletions(-) create mode 100644 lib/kunit/executor.c (limited to 'lib') diff --git a/include/kunit/test.h b/include/kunit/test.h index 59f3144f009a..d2d261f58259 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -233,7 +233,7 @@ size_t kunit_suite_num_test_cases(struct kunit_suite *suite); unsigned int kunit_test_case_num(struct kunit_suite *suite, struct kunit_case *test_case); -int __kunit_test_suites_init(struct kunit_suite **suites); +int __kunit_test_suites_init(struct kunit_suite * const * const suites); void __kunit_test_suites_exit(struct kunit_suite **suites); @@ -246,34 +246,57 @@ void __kunit_test_suites_exit(struct kunit_suite **suites); * Registers @suites_list with the test framework. See &struct kunit_suite for * more information. * - * When builtin, KUnit tests are all run as late_initcalls; this means - * that they cannot test anything where tests must run at a different init - * phase. One significant restriction resulting from this is that KUnit - * cannot reliably test anything that is initialize in the late_init phase; - * another is that KUnit is useless to test things that need to be run in - * an earlier init phase. - * - * An alternative is to build the tests as a module. Because modules - * do not support multiple late_initcall()s, we need to initialize an - * array of suites for a module. - * - * TODO(brendanhiggins@google.com): Don't run all KUnit tests as - * late_initcalls. I have some future work planned to dispatch all KUnit - * tests from the same place, and at the very least to do so after - * everything else is definitely initialized. + * If a test suite is built-in, module_init() gets translated into + * an initcall which we don't want as the idea is that for builtins + * the executor will manage execution. So ensure we do not define + * module_{init|exit} functions for the builtin case when registering + * suites via kunit_test_suites() below. */ -#define kunit_test_suites(suites_list...) \ - static struct kunit_suite *suites[] = {suites_list, NULL}; \ - static int kunit_test_suites_init(void) \ +#ifdef MODULE +#define kunit_test_suites_for_module(__suites) \ + static int __init kunit_test_suites_init(void) \ { \ - return __kunit_test_suites_init(suites); \ + return __kunit_test_suites_init(__suites); \ } \ - late_initcall(kunit_test_suites_init); \ + module_init(kunit_test_suites_init); \ + \ static void __exit kunit_test_suites_exit(void) \ { \ - return __kunit_test_suites_exit(suites); \ + return __kunit_test_suites_exit(__suites); \ } \ module_exit(kunit_test_suites_exit) +#else +#define kunit_test_suites_for_module(__suites) +#endif /* MODULE */ + +#define __kunit_test_suites(unique_array, unique_suites, ...) \ + static struct kunit_suite *unique_array[] = { __VA_ARGS__, NULL }; \ + kunit_test_suites_for_module(unique_array); \ + static struct kunit_suite **unique_suites \ + __used __section(.kunit_test_suites) = unique_array + +/** + * kunit_test_suites() - used to register one or more &struct kunit_suite + * with KUnit. + * + * @suites: a statically allocated list of &struct kunit_suite. + * + * Registers @suites with the test framework. See &struct kunit_suite for + * more information. + * + * When builtin, KUnit tests are all run via executor; this is done + * by placing the array of struct kunit_suite * in the .kunit_test_suites + * ELF section. + * + * An alternative is to build the tests as a module. Because modules do not + * support multiple initcall()s, we need to initialize an array of suites for a + * module. + * + */ +#define kunit_test_suites(...) \ + __kunit_test_suites(__UNIQUE_ID(array), \ + __UNIQUE_ID(suites), \ + __VA_ARGS__) #define kunit_test_suite(suite) kunit_test_suites(&suite) diff --git a/lib/kunit/Makefile b/lib/kunit/Makefile index 724b94311ca3..c49f4ffb6273 100644 --- a/lib/kunit/Makefile +++ b/lib/kunit/Makefile @@ -3,7 +3,8 @@ obj-$(CONFIG_KUNIT) += kunit.o kunit-objs += test.o \ string-stream.o \ assert.o \ - try-catch.o + try-catch.o \ + executor.o ifeq ($(CONFIG_KUNIT_DEBUGFS),y) kunit-objs += debugfs.o diff --git a/lib/kunit/executor.c b/lib/kunit/executor.c new file mode 100644 index 000000000000..7015e7328dce --- /dev/null +++ b/lib/kunit/executor.c @@ -0,0 +1,28 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include + +/* + * These symbols point to the .kunit_test_suites section and are defined in + * include/asm-generic/vmlinux.lds.h, and consequently must be extern. + */ +extern struct kunit_suite * const * const __kunit_suites_start[]; +extern struct kunit_suite * const * const __kunit_suites_end[]; + +#if IS_BUILTIN(CONFIG_KUNIT) + +static int kunit_run_all_tests(void) +{ + struct kunit_suite * const * const *suites; + + for (suites = __kunit_suites_start; + suites < __kunit_suites_end; + suites++) + __kunit_test_suites_init(*suites); + + return 0; +} + +late_initcall(kunit_run_all_tests); + +#endif /* IS_BUILTIN(CONFIG_KUNIT) */ diff --git a/lib/kunit/test.c b/lib/kunit/test.c index c36037200310..3fd89f91937f 100644 --- a/lib/kunit/test.c +++ b/lib/kunit/test.c @@ -381,7 +381,7 @@ static void kunit_init_suite(struct kunit_suite *suite) kunit_debugfs_create_suite(suite); } -int __kunit_test_suites_init(struct kunit_suite **suites) +int __kunit_test_suites_init(struct kunit_suite * const * const suites) { unsigned int i; -- cgit v1.2.3 From 8c0d884986ba22f1020be9c02e41c030890ee8f2 Mon Sep 17 00:00:00 2001 From: Brendan Higgins Date: Tue, 4 Aug 2020 13:47:43 -0700 Subject: init: main: add KUnit to kernel init Although we have not seen any actual examples where KUnit doesn't work because it runs in the late init phase of the kernel, it has been a concern for some time that this could potentially be an issue in the future. So, remove KUnit from init calls entirely, instead call directly from kernel_init() so that KUnit runs after late init. Co-developed-by: Alan Maguire Signed-off-by: Alan Maguire Signed-off-by: Brendan Higgins Reviewed-by: Stephen Boyd Reviewed-by: Kees Cook Reviewed-by: Luis Chamberlain Signed-off-by: Shuah Khan --- include/kunit/test.h | 9 +++++++++ init/main.c | 4 ++++ lib/kunit/executor.c | 4 +--- 3 files changed, 14 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/include/kunit/test.h b/include/kunit/test.h index d2d261f58259..7ea24466e49c 100644 --- a/include/kunit/test.h +++ b/include/kunit/test.h @@ -237,6 +237,15 @@ int __kunit_test_suites_init(struct kunit_suite * const * const suites); void __kunit_test_suites_exit(struct kunit_suite **suites); +#if IS_BUILTIN(CONFIG_KUNIT) +int kunit_run_all_tests(void); +#else +static inline int kunit_run_all_tests(void) +{ + return 0; +} +#endif /* IS_BUILTIN(CONFIG_KUNIT) */ + /** * kunit_test_suites() - used to register one or more &struct kunit_suite * with KUnit. diff --git a/init/main.c b/init/main.c index ae78fb68d231..232c8df465ee 100644 --- a/init/main.c +++ b/init/main.c @@ -107,6 +107,8 @@ #define CREATE_TRACE_POINTS #include +#include + static int kernel_init(void *); extern void init_IRQ(void); @@ -1511,6 +1513,8 @@ static noinline void __init kernel_init_freeable(void) do_basic_setup(); + kunit_run_all_tests(); + console_on_rootfs(); /* diff --git a/lib/kunit/executor.c b/lib/kunit/executor.c index 7015e7328dce..4aab7f70a88c 100644 --- a/lib/kunit/executor.c +++ b/lib/kunit/executor.c @@ -11,7 +11,7 @@ extern struct kunit_suite * const * const __kunit_suites_end[]; #if IS_BUILTIN(CONFIG_KUNIT) -static int kunit_run_all_tests(void) +int kunit_run_all_tests(void) { struct kunit_suite * const * const *suites; @@ -23,6 +23,4 @@ static int kunit_run_all_tests(void) return 0; } -late_initcall(kunit_run_all_tests); - #endif /* IS_BUILTIN(CONFIG_KUNIT) */ -- cgit v1.2.3 From 45dcbb6f5ef78b0a9c1b91bea2f6f227642a65aa Mon Sep 17 00:00:00 2001 From: Brendan Higgins Date: Tue, 4 Aug 2020 13:47:44 -0700 Subject: kunit: test: add test plan to KUnit TAP format TAP 14 allows an optional test plan to be emitted before the start of the start of testing[1]; this is valuable because it makes it possible for a test harness to detect whether the number of tests run matches the number of tests expected to be run, ensuring that no tests silently failed. Link[1]: https://github.com/isaacs/testanything.github.io/blob/tap14/tap-version-14-specification.md#the-plan Signed-off-by: Brendan Higgins Reviewed-by: Stephen Boyd Signed-off-by: Shuah Khan --- lib/kunit/executor.c | 17 +++++ lib/kunit/test.c | 11 ---- tools/testing/kunit/kunit_parser.py | 76 ++++++++++++++++++---- .../test_data/test_is_test_passed-all_passed.log | 1 + .../kunit/test_data/test_is_test_passed-crash.log | 1 + .../test_data/test_is_test_passed-failure.log | 1 + 6 files changed, 82 insertions(+), 25 deletions(-) (limited to 'lib') diff --git a/lib/kunit/executor.c b/lib/kunit/executor.c index 4aab7f70a88c..a95742a4ece7 100644 --- a/lib/kunit/executor.c +++ b/lib/kunit/executor.c @@ -11,10 +11,27 @@ extern struct kunit_suite * const * const __kunit_suites_end[]; #if IS_BUILTIN(CONFIG_KUNIT) +static void kunit_print_tap_header(void) +{ + struct kunit_suite * const * const *suites, * const *subsuite; + int num_of_suites = 0; + + for (suites = __kunit_suites_start; + suites < __kunit_suites_end; + suites++) + for (subsuite = *suites; *subsuite != NULL; subsuite++) + num_of_suites++; + + pr_info("TAP version 14\n"); + pr_info("1..%d\n", num_of_suites); +} + int kunit_run_all_tests(void) { struct kunit_suite * const * const *suites; + kunit_print_tap_header(); + for (suites = __kunit_suites_start; suites < __kunit_suites_end; suites++) diff --git a/lib/kunit/test.c b/lib/kunit/test.c index 3fd89f91937f..de07876b6601 100644 --- a/lib/kunit/test.c +++ b/lib/kunit/test.c @@ -20,16 +20,6 @@ static void kunit_set_failure(struct kunit *test) WRITE_ONCE(test->success, false); } -static void kunit_print_tap_version(void) -{ - static bool kunit_has_printed_tap_version; - - if (!kunit_has_printed_tap_version) { - pr_info("TAP version 14\n"); - kunit_has_printed_tap_version = true; - } -} - /* * Append formatted message to log, size of which is limited to * KUNIT_LOG_SIZE bytes (including null terminating byte). @@ -69,7 +59,6 @@ EXPORT_SYMBOL_GPL(kunit_suite_num_test_cases); static void kunit_print_subtest_start(struct kunit_suite *suite) { - kunit_print_tap_version(); kunit_log(KERN_INFO, suite, KUNIT_SUBTEST_INDENT "# Subtest: %s", suite->name); kunit_log(KERN_INFO, suite, KUNIT_SUBTEST_INDENT "1..%zd", diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index f13e0c0d6663..8019e3dd4c32 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -45,10 +45,11 @@ class TestStatus(Enum): FAILURE = auto() TEST_CRASHED = auto() NO_TESTS = auto() + FAILURE_TO_PARSE_TESTS = auto() kunit_start_re = re.compile(r'TAP version [0-9]+$') kunit_end_re = re.compile('(List of all partitions:|' - 'Kernel panic - not syncing: VFS:|reboot: System halted)') + 'Kernel panic - not syncing: VFS:)') def isolate_kunit_output(kernel_output): started = False @@ -109,7 +110,7 @@ OkNotOkResult = namedtuple('OkNotOkResult', ['is_ok','description', 'text']) OK_NOT_OK_SUBTEST = re.compile(r'^[\s]+(ok|not ok) [0-9]+ - (.*)$') -OK_NOT_OK_MODULE = re.compile(r'^(ok|not ok) [0-9]+ - (.*)$') +OK_NOT_OK_MODULE = re.compile(r'^(ok|not ok) ([0-9]+) - (.*)$') def parse_ok_not_ok_test_case(lines: List[str], test_case: TestCase) -> bool: save_non_diagnositic(lines, test_case) @@ -197,7 +198,9 @@ def max_status(left: TestStatus, right: TestStatus) -> TestStatus: else: return TestStatus.SUCCESS -def parse_ok_not_ok_test_suite(lines: List[str], test_suite: TestSuite) -> bool: +def parse_ok_not_ok_test_suite(lines: List[str], + test_suite: TestSuite, + expected_suite_index: int) -> bool: consume_non_diagnositic(lines) if not lines: test_suite.status = TestStatus.TEST_CRASHED @@ -210,6 +213,12 @@ def parse_ok_not_ok_test_suite(lines: List[str], test_suite: TestSuite) -> bool: test_suite.status = TestStatus.SUCCESS else: test_suite.status = TestStatus.FAILURE + suite_index = int(match.group(2)) + if suite_index != expected_suite_index: + print_with_timestamp( + red('[ERROR] ') + 'expected_suite_index ' + + str(expected_suite_index) + ', but got ' + + str(suite_index)) return True else: return False @@ -222,7 +231,7 @@ def bubble_up_test_case_errors(test_suite: TestSuite) -> TestStatus: max_test_case_status = bubble_up_errors(lambda x: x.status, test_suite.cases) return max_status(max_test_case_status, test_suite.status) -def parse_test_suite(lines: List[str]) -> TestSuite: +def parse_test_suite(lines: List[str], expected_suite_index: int) -> TestSuite: if not lines: return None consume_non_diagnositic(lines) @@ -241,7 +250,7 @@ def parse_test_suite(lines: List[str]) -> TestSuite: break test_suite.cases.append(test_case) expected_test_case_num -= 1 - if parse_ok_not_ok_test_suite(lines, test_suite): + if parse_ok_not_ok_test_suite(lines, test_suite, expected_suite_index): test_suite.status = bubble_up_test_case_errors(test_suite) return test_suite elif not lines: @@ -261,6 +270,17 @@ def parse_tap_header(lines: List[str]) -> bool: else: return False +TEST_PLAN = re.compile(r'[0-9]+\.\.([0-9]+)') + +def parse_test_plan(lines: List[str]) -> int: + consume_non_diagnositic(lines) + match = TEST_PLAN.match(lines[0]) + if match: + lines.pop(0) + return int(match.group(1)) + else: + return None + def bubble_up_suite_errors(test_suite_list: List[TestSuite]) -> TestStatus: return bubble_up_errors(lambda x: x.status, test_suite_list) @@ -268,20 +288,33 @@ def parse_test_result(lines: List[str]) -> TestResult: consume_non_diagnositic(lines) if not lines or not parse_tap_header(lines): return TestResult(TestStatus.NO_TESTS, [], lines) + expected_test_suite_num = parse_test_plan(lines) + if not expected_test_suite_num: + return TestResult(TestStatus.FAILURE_TO_PARSE_TESTS, [], lines) test_suites = [] - test_suite = parse_test_suite(lines) - while test_suite: - test_suites.append(test_suite) - test_suite = parse_test_suite(lines) - return TestResult(bubble_up_suite_errors(test_suites), test_suites, lines) + for i in range(1, expected_test_suite_num + 1): + test_suite = parse_test_suite(lines, i) + if test_suite: + test_suites.append(test_suite) + else: + print_with_timestamp( + red('[ERROR] ') + ' expected ' + + str(expected_test_suite_num) + + ' test suites, but got ' + str(i - 2)) + break + test_suite = parse_test_suite(lines, -1) + if test_suite: + print_with_timestamp(red('[ERROR] ') + + 'got unexpected test suite: ' + test_suite.name) + if test_suites: + return TestResult(bubble_up_suite_errors(test_suites), test_suites, lines) + else: + return TestResult(TestStatus.NO_TESTS, [], lines) -def parse_run_tests(kernel_output) -> TestResult: +def print_and_count_results(test_result: TestResult) -> None: total_tests = 0 failed_tests = 0 crashed_tests = 0 - test_result = parse_test_result(list(isolate_kunit_output(kernel_output))) - if test_result.status == TestStatus.NO_TESTS: - print_with_timestamp(red('[ERROR] ') + 'no kunit output detected') for test_suite in test_result.suites: if test_suite.status == TestStatus.SUCCESS: print_suite_divider(green('[PASSED] ') + test_suite.name) @@ -303,6 +336,21 @@ def parse_run_tests(kernel_output) -> TestResult: print_with_timestamp(red('[FAILED] ') + test_case.name) print_log(map(yellow, test_case.log)) print_with_timestamp('') + return total_tests, failed_tests, crashed_tests + +def parse_run_tests(kernel_output) -> TestResult: + total_tests = 0 + failed_tests = 0 + crashed_tests = 0 + test_result = parse_test_result(list(isolate_kunit_output(kernel_output))) + if test_result.status == TestStatus.NO_TESTS: + print(red('[ERROR] ') + yellow('no tests run!')) + elif test_result.status == TestStatus.FAILURE_TO_PARSE_TESTS: + print(red('[ERROR] ') + yellow('could not parse test results!')) + else: + (total_tests, + failed_tests, + crashed_tests) = print_and_count_results(test_result) print_with_timestamp(DIVIDER) fmt = green if test_result.status == TestStatus.SUCCESS else red print_with_timestamp( diff --git a/tools/testing/kunit/test_data/test_is_test_passed-all_passed.log b/tools/testing/kunit/test_data/test_is_test_passed-all_passed.log index 62ebc0288355..bc0dc8fe35b7 100644 --- a/tools/testing/kunit/test_data/test_is_test_passed-all_passed.log +++ b/tools/testing/kunit/test_data/test_is_test_passed-all_passed.log @@ -1,4 +1,5 @@ TAP version 14 +1..2 # Subtest: sysctl_test 1..8 # sysctl_test_dointvec_null_tbl_data: sysctl_test_dointvec_null_tbl_data passed diff --git a/tools/testing/kunit/test_data/test_is_test_passed-crash.log b/tools/testing/kunit/test_data/test_is_test_passed-crash.log index 0b249870c8be..4d97f6708c4a 100644 --- a/tools/testing/kunit/test_data/test_is_test_passed-crash.log +++ b/tools/testing/kunit/test_data/test_is_test_passed-crash.log @@ -1,6 +1,7 @@ printk: console [tty0] enabled printk: console [mc-1] enabled TAP version 14 +1..2 # Subtest: sysctl_test 1..8 # sysctl_test_dointvec_null_tbl_data: sysctl_test_dointvec_null_tbl_data passed diff --git a/tools/testing/kunit/test_data/test_is_test_passed-failure.log b/tools/testing/kunit/test_data/test_is_test_passed-failure.log index 9e89d32d5667..7a416497e3be 100644 --- a/tools/testing/kunit/test_data/test_is_test_passed-failure.log +++ b/tools/testing/kunit/test_data/test_is_test_passed-failure.log @@ -1,4 +1,5 @@ TAP version 14 +1..2 # Subtest: sysctl_test 1..8 # sysctl_test_dointvec_null_tbl_data: sysctl_test_dointvec_null_tbl_data passed -- cgit v1.2.3 From 44f3625bc61653ea3bde9960298faf2f5518fda5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Thu, 8 Oct 2020 12:45:17 +0200 Subject: netlink: export policy in extended ACK Add a new attribute NLMSGERR_ATTR_POLICY to the extended ACK to advertise the policy, e.g. if an attribute was out of range, you'll know the range that's permissible. Add new NL_SET_ERR_MSG_ATTR_POL() and NL_SET_ERR_MSG_ATTR_POL() macros to set this, since realistically it's only useful to do this when the bad attribute (offset) is also returned. Use it in lib/nlattr.c which practically does all the policy validation. v2: - add and use netlink_policy_dump_attr_size_estimate() v3: - remove redundant break v4: - really remove redundant break ... sorry Reviewed-by: Jakub Kicinski Signed-off-by: Johannes Berg Signed-off-by: Jakub Kicinski --- include/linux/netlink.h | 30 ++++++++++++++-------- include/net/netlink.h | 4 +++ include/uapi/linux/netlink.h | 2 ++ lib/nlattr.c | 35 +++++++++++++------------ net/netlink/af_netlink.c | 5 ++++ net/netlink/policy.c | 61 ++++++++++++++++++++++++++++++++++++++++++++ 6 files changed, 110 insertions(+), 27 deletions(-) (limited to 'lib') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index e3e49f0e5c13..666cd0390699 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -68,12 +68,14 @@ netlink_kernel_create(struct net *net, int unit, struct netlink_kernel_cfg *cfg) * @_msg: message string to report - don't access directly, use * %NL_SET_ERR_MSG * @bad_attr: attribute with error + * @policy: policy for a bad attribute * @cookie: cookie data to return to userspace (for success) * @cookie_len: actual cookie data length */ struct netlink_ext_ack { const char *_msg; const struct nlattr *bad_attr; + const struct nla_policy *policy; u8 cookie[NETLINK_MAX_COOKIE_LEN]; u8 cookie_len; }; @@ -95,21 +97,29 @@ struct netlink_ext_ack { #define NL_SET_ERR_MSG_MOD(extack, msg) \ NL_SET_ERR_MSG((extack), KBUILD_MODNAME ": " msg) -#define NL_SET_BAD_ATTR(extack, attr) do { \ - if ((extack)) \ +#define NL_SET_BAD_ATTR_POLICY(extack, attr, pol) do { \ + if ((extack)) { \ (extack)->bad_attr = (attr); \ + (extack)->policy = (pol); \ + } \ } while (0) -#define NL_SET_ERR_MSG_ATTR(extack, attr, msg) do { \ - static const char __msg[] = msg; \ - struct netlink_ext_ack *__extack = (extack); \ - \ - if (__extack) { \ - __extack->_msg = __msg; \ - __extack->bad_attr = (attr); \ - } \ +#define NL_SET_BAD_ATTR(extack, attr) NL_SET_BAD_ATTR_POLICY(extack, attr, NULL) + +#define NL_SET_ERR_MSG_ATTR_POL(extack, attr, pol, msg) do { \ + static const char __msg[] = msg; \ + struct netlink_ext_ack *__extack = (extack); \ + \ + if (__extack) { \ + __extack->_msg = __msg; \ + __extack->bad_attr = (attr); \ + __extack->policy = (pol); \ + } \ } while (0) +#define NL_SET_ERR_MSG_ATTR(extack, attr, msg) \ + NL_SET_ERR_MSG_ATTR_POL(extack, attr, NULL, msg) + static inline void nl_set_extack_cookie_u64(struct netlink_ext_ack *extack, u64 cookie) { diff --git a/include/net/netlink.h b/include/net/netlink.h index 2b9e41075f19..7356f41d23ba 100644 --- a/include/net/netlink.h +++ b/include/net/netlink.h @@ -1957,6 +1957,10 @@ int netlink_policy_dump_get_policy_idx(struct netlink_policy_dump_state *state, bool netlink_policy_dump_loop(struct netlink_policy_dump_state *state); int netlink_policy_dump_write(struct sk_buff *skb, struct netlink_policy_dump_state *state); +int netlink_policy_dump_attr_size_estimate(const struct nla_policy *pt); +int netlink_policy_dump_write_attr(struct sk_buff *skb, + const struct nla_policy *pt, + int nestattr); void netlink_policy_dump_free(struct netlink_policy_dump_state *state); #endif diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h index d02e472ba54c..c3816ff7bfc3 100644 --- a/include/uapi/linux/netlink.h +++ b/include/uapi/linux/netlink.h @@ -129,6 +129,7 @@ struct nlmsgerr { * @NLMSGERR_ATTR_COOKIE: arbitrary subsystem specific cookie to * be used - in the success case - to identify a created * object or operation or similar (binary) + * @NLMSGERR_ATTR_POLICY: policy for a rejected attribute * @__NLMSGERR_ATTR_MAX: number of attributes * @NLMSGERR_ATTR_MAX: highest attribute number */ @@ -137,6 +138,7 @@ enum nlmsgerr_attrs { NLMSGERR_ATTR_MSG, NLMSGERR_ATTR_OFFS, NLMSGERR_ATTR_COOKIE, + NLMSGERR_ATTR_POLICY, __NLMSGERR_ATTR_MAX, NLMSGERR_ATTR_MAX = __NLMSGERR_ATTR_MAX - 1 diff --git a/lib/nlattr.c b/lib/nlattr.c index 9c99f5daa4d2..74019c8ebf6b 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -96,8 +96,8 @@ static int nla_validate_array(const struct nlattr *head, int len, int maxtype, continue; if (nla_len(entry) < NLA_HDRLEN) { - NL_SET_ERR_MSG_ATTR(extack, entry, - "Array element too short"); + NL_SET_ERR_MSG_ATTR_POL(extack, entry, policy, + "Array element too short"); return -ERANGE; } @@ -195,8 +195,8 @@ static int nla_validate_range_unsigned(const struct nla_policy *pt, pr_warn_ratelimited("netlink: '%s': attribute type %d has an invalid length.\n", current->comm, pt->type); if (validate & NL_VALIDATE_STRICT_ATTRS) { - NL_SET_ERR_MSG_ATTR(extack, nla, - "invalid attribute length"); + NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt, + "invalid attribute length"); return -EINVAL; } @@ -208,11 +208,11 @@ static int nla_validate_range_unsigned(const struct nla_policy *pt, bool binary = pt->type == NLA_BINARY; if (binary) - NL_SET_ERR_MSG_ATTR(extack, nla, - "binary attribute size out of range"); + NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt, + "binary attribute size out of range"); else - NL_SET_ERR_MSG_ATTR(extack, nla, - "integer out of range"); + NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt, + "integer out of range"); return -ERANGE; } @@ -291,8 +291,8 @@ static int nla_validate_int_range_signed(const struct nla_policy *pt, nla_get_range_signed(pt, &range); if (value < range.min || value > range.max) { - NL_SET_ERR_MSG_ATTR(extack, nla, - "integer out of range"); + NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt, + "integer out of range"); return -ERANGE; } @@ -377,8 +377,8 @@ static int validate_nla(const struct nlattr *nla, int maxtype, pr_warn_ratelimited("netlink: '%s': attribute type %d has an invalid length.\n", current->comm, type); if (validate & NL_VALIDATE_STRICT_ATTRS) { - NL_SET_ERR_MSG_ATTR(extack, nla, - "invalid attribute length"); + NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt, + "invalid attribute length"); return -EINVAL; } } @@ -386,14 +386,14 @@ static int validate_nla(const struct nlattr *nla, int maxtype, if (validate & NL_VALIDATE_NESTED) { if ((pt->type == NLA_NESTED || pt->type == NLA_NESTED_ARRAY) && !(nla->nla_type & NLA_F_NESTED)) { - NL_SET_ERR_MSG_ATTR(extack, nla, - "NLA_F_NESTED is missing"); + NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt, + "NLA_F_NESTED is missing"); return -EINVAL; } if (pt->type != NLA_NESTED && pt->type != NLA_NESTED_ARRAY && pt->type != NLA_UNSPEC && (nla->nla_type & NLA_F_NESTED)) { - NL_SET_ERR_MSG_ATTR(extack, nla, - "NLA_F_NESTED not expected"); + NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt, + "NLA_F_NESTED not expected"); return -EINVAL; } } @@ -550,7 +550,8 @@ static int validate_nla(const struct nlattr *nla, int maxtype, return 0; out_err: - NL_SET_ERR_MSG_ATTR(extack, nla, "Attribute failed policy validation"); + NL_SET_ERR_MSG_ATTR_POL(extack, nla, pt, + "Attribute failed policy validation"); return err; } diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c index df675a8e1918..daca50d6bb12 100644 --- a/net/netlink/af_netlink.c +++ b/net/netlink/af_netlink.c @@ -2420,6 +2420,8 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, tlvlen += nla_total_size(sizeof(u32)); if (nlk_has_extack && extack && extack->cookie_len) tlvlen += nla_total_size(extack->cookie_len); + if (err && nlk_has_extack && extack && extack->policy) + tlvlen += netlink_policy_dump_attr_size_estimate(extack->policy); if (tlvlen) flags |= NLM_F_ACK_TLVS; @@ -2452,6 +2454,9 @@ void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, if (extack->cookie_len) WARN_ON(nla_put(skb, NLMSGERR_ATTR_COOKIE, extack->cookie_len, extack->cookie)); + if (extack->policy) + netlink_policy_dump_write_attr(skb, extack->policy, + NLMSGERR_ATTR_POLICY); } nlmsg_end(skb, rep); diff --git a/net/netlink/policy.c b/net/netlink/policy.c index 4383436759e2..8d7c900e27f4 100644 --- a/net/netlink/policy.c +++ b/net/netlink/policy.c @@ -196,12 +196,54 @@ bool netlink_policy_dump_loop(struct netlink_policy_dump_state *state) return !netlink_policy_dump_finished(state); } +int netlink_policy_dump_attr_size_estimate(const struct nla_policy *pt) +{ + /* nested + type */ + int common = 2 * nla_attr_size(sizeof(u32)); + + switch (pt->type) { + case NLA_UNSPEC: + case NLA_REJECT: + /* these actually don't need any space */ + return 0; + case NLA_NESTED: + case NLA_NESTED_ARRAY: + /* common, policy idx, policy maxattr */ + return common + 2 * nla_attr_size(sizeof(u32)); + case NLA_U8: + case NLA_U16: + case NLA_U32: + case NLA_U64: + case NLA_MSECS: + case NLA_S8: + case NLA_S16: + case NLA_S32: + case NLA_S64: + /* maximum is common, u64 min/max with padding */ + return common + + 2 * (nla_attr_size(0) + nla_attr_size(sizeof(u64))); + case NLA_BITFIELD32: + return common + nla_attr_size(sizeof(u32)); + case NLA_STRING: + case NLA_NUL_STRING: + case NLA_BINARY: + /* maximum is common, u32 min-length/max-length */ + return common + 2 * nla_attr_size(sizeof(u32)); + case NLA_FLAG: + return common; + } + + /* this should then cause a warning later */ + return 0; +} + static int __netlink_policy_dump_write_attr(struct netlink_policy_dump_state *state, struct sk_buff *skb, const struct nla_policy *pt, int nestattr) { + int estimate = netlink_policy_dump_attr_size_estimate(pt); enum netlink_attribute_type type; struct nlattr *attr; @@ -334,12 +376,31 @@ __netlink_policy_dump_write_attr(struct netlink_policy_dump_state *state, goto nla_put_failure; nla_nest_end(skb, attr); + WARN_ON(attr->nla_len > estimate); + return 0; nla_put_failure: nla_nest_cancel(skb, attr); return -ENOBUFS; } +/** + * netlink_policy_dump_write_attr - write a given attribute policy + * @skb: the message skb to write to + * @pt: the attribute's policy + * @nestattr: the nested attribute ID to use + * + * Returns: 0 on success, an error code otherwise; -%ENODATA is + * special, indicating that there's no policy data and + * the attribute is generally rejected. + */ +int netlink_policy_dump_write_attr(struct sk_buff *skb, + const struct nla_policy *pt, + int nestattr) +{ + return __netlink_policy_dump_write_attr(NULL, skb, pt, nestattr); +} + /** * netlink_policy_dump_write - write current policy dump attributes * @skb: the message skb to write to -- cgit v1.2.3 From f82cd2f0b5eb715b1a296e20b34da7d296b6e9a4 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 18 Aug 2020 09:05:56 -0400 Subject: XArray: Add private interface for workingset node deletion Move the tricky bits of dealing with the XArray from the workingset code to the XArray. Make it clear in the documentation that this is a private interface, and only export it for the benefit of the test suite. Signed-off-by: Matthew Wilcox (Oracle) --- include/linux/xarray.h | 2 ++ lib/test_xarray.c | 7 +------ lib/xarray.c | 23 +++++++++++++++++++++++ mm/workingset.c | 13 ++----------- 4 files changed, 28 insertions(+), 17 deletions(-) (limited to 'lib') diff --git a/include/linux/xarray.h b/include/linux/xarray.h index 6b336098fca7..29db4e16eb89 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -1286,6 +1286,8 @@ static inline bool xa_is_advanced(const void *entry) */ typedef void (*xa_update_node_t)(struct xa_node *node); +void xa_delete_node(struct xa_node *, xa_update_node_t); + /* * The xa_state is opaque to its users. It contains various different pieces * of state involved in the current operation on the XArray. It should be diff --git a/lib/test_xarray.c b/lib/test_xarray.c index 1122c4453c87..64ae07b1bcf4 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -1600,14 +1600,9 @@ static noinline void shadow_remove(struct xarray *xa) xa_lock(xa); while ((node = list_first_entry_or_null(&shadow_nodes, struct xa_node, private_list))) { - XA_STATE(xas, node->array, 0); XA_BUG_ON(xa, node->array != xa); list_del_init(&node->private_list); - xas.xa_node = xa_parent_locked(node->array, node); - xas.xa_offset = node->offset; - xas.xa_shift = node->shift + XA_CHUNK_SHIFT; - xas_set_update(&xas, test_update_node); - xas_store(&xas, NULL); + xa_delete_node(node, test_update_node); } xa_unlock(xa); } diff --git a/lib/xarray.c b/lib/xarray.c index e9e641d3c0c3..1fa5c5658e63 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -1973,6 +1973,29 @@ unsigned int xa_extract(struct xarray *xa, void **dst, unsigned long start, } EXPORT_SYMBOL(xa_extract); +/** + * xa_delete_node() - Private interface for workingset code. + * @node: Node to be removed from the tree. + * @update: Function to call to update ancestor nodes. + * + * Context: xa_lock must be held on entry and will not be released. + */ +void xa_delete_node(struct xa_node *node, xa_update_node_t update) +{ + struct xa_state xas = { + .xa = node->array, + .xa_index = (unsigned long)node->offset << + (node->shift + XA_CHUNK_SHIFT), + .xa_shift = node->shift + XA_CHUNK_SHIFT, + .xa_offset = node->offset, + .xa_node = xa_parent_locked(node->array, node), + .xa_update = update, + }; + + xas_store(&xas, NULL); +} +EXPORT_SYMBOL_GPL(xa_delete_node); /* For the benefit of the test suite */ + /** * xa_destroy() - Free all internal data structures. * @xa: XArray. diff --git a/mm/workingset.c b/mm/workingset.c index 92e66113a577..e185bfb8bd4e 100644 --- a/mm/workingset.c +++ b/mm/workingset.c @@ -519,12 +519,11 @@ static enum lru_status shadow_lru_isolate(struct list_head *item, void *arg) __must_hold(lru_lock) { struct xa_node *node = container_of(item, struct xa_node, private_list); - XA_STATE(xas, node->array, 0); struct address_space *mapping; int ret; /* - * Page cache insertions and deletions synchroneously maintain + * Page cache insertions and deletions synchronously maintain * the shadow node LRU under the i_pages lock and the * lru_lock. Because the page cache tree is emptied before * the inode can be destroyed, holding the lru_lock pins any @@ -559,15 +558,7 @@ static enum lru_status shadow_lru_isolate(struct list_head *item, if (WARN_ON_ONCE(node->count != node->nr_values)) goto out_invalid; mapping->nrexceptional -= node->nr_values; - xas.xa_node = xa_parent_locked(&mapping->i_pages, node); - xas.xa_offset = node->offset; - xas.xa_shift = node->shift + XA_CHUNK_SHIFT; - xas_set_update(&xas, workingset_update_node); - /* - * We could store a shadow entry here which was the minimum of the - * shadow entries we were tracking ... - */ - xas_store(&xas, NULL); + xa_delete_node(node, workingset_update_node); __inc_lruvec_slab_state(node, WORKINGSET_NODERECLAIM); out_invalid: -- cgit v1.2.3 From 84c34df158cf215b0cd1475ab3b8e6f212f81f23 Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Tue, 13 Oct 2020 08:46:29 -0400 Subject: XArray: Fix xas_create_range for ranges above 4 billion The 'sibs' variable would be shifted as a 32-bit integer, so if 'shift' is more than 32, this is undefined behaviour. In practice, this doesn't happen because the page cache is the only user and nobody uses 16TB pages. Signed-off-by: Matthew Wilcox (Oracle) --- lib/xarray.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/xarray.c b/lib/xarray.c index 1fa5c5658e63..2046d676ab41 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -703,7 +703,7 @@ void xas_create_range(struct xa_state *xas) unsigned char shift = xas->xa_shift; unsigned char sibs = xas->xa_sibs; - xas->xa_index |= ((sibs + 1) << shift) - 1; + xas->xa_index |= ((sibs + 1UL) << shift) - 1; if (xas_is_node(xas) && xas->xa_node->shift == xas->xa_shift) xas->xa_offset |= sibs; xas->xa_shift = 0; -- cgit v1.2.3 From d2585f5164c298aaaed14c2c8d313cbe7bd5b253 Mon Sep 17 00:00:00 2001 From: Vitor Massaru Iha Date: Wed, 29 Jul 2020 14:58:49 -0300 Subject: lib: kunit: add bitfield test conversion to KUnit This adds the conversion of the runtime tests of test_bitfield, from `lib/test_bitfield.c` to KUnit tests. Code Style Documentation: [0] Signed-off-by: Vitor Massaru Iha Link: [0] https://lore.kernel.org/linux-kselftest/20200620054944.167330-1-davidgow@google.com/T/#u Reviewed-by: Brendan Higgins Signed-off-by: Shuah Khan --- lib/Kconfig.debug | 23 ++++--- lib/Makefile | 2 +- lib/bitfield_kunit.c | 156 +++++++++++++++++++++++++++++++++++++++++++++++ lib/test_bitfield.c | 168 --------------------------------------------------- 4 files changed, 173 insertions(+), 176 deletions(-) create mode 100644 lib/bitfield_kunit.c delete mode 100644 lib/test_bitfield.c (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index e068c3c7189a..4f09c6505a2e 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2037,13 +2037,6 @@ config TEST_BITMAP If unsure, say N. -config TEST_BITFIELD - tristate "Test bitfield functions at runtime" - help - Enable this option to test the bitfield functions at boot. - - If unsure, say N. - config TEST_UUID tristate "Test functions located in the uuid module at runtime" @@ -2193,6 +2186,22 @@ config TEST_SYSCTL If unsure, say N. +config BITFIELD_KUNIT + tristate "KUnit test bitfield functions at runtime" + depends on KUNIT + help + Enable this option to test the bitfield functions at boot. + + KUnit tests run during boot and output the results to the debug log + in TAP format (http://testanything.org/). Only useful for kernel devs + running the KUnit test harness, and not intended for inclusion into a + production build. + + For more information on KUnit and unit tests in general please refer + to the KUnit documentation in Documentation/dev-tools/kunit/. + + If unsure, say N. + config SYSCTL_KUNIT_TEST tristate "KUnit test for sysctl" if !KUNIT_ALL_TESTS depends on KUNIT diff --git a/lib/Makefile b/lib/Makefile index e290fc5707ea..d862d41fdc3d 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -80,7 +80,6 @@ obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o obj-$(CONFIG_TEST_PRINTF) += test_printf.o obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o obj-$(CONFIG_TEST_STRSCPY) += test_strscpy.o -obj-$(CONFIG_TEST_BITFIELD) += test_bitfield.o obj-$(CONFIG_TEST_UUID) += test_uuid.o obj-$(CONFIG_TEST_XARRAY) += test_xarray.o obj-$(CONFIG_TEST_PARMAN) += test_parman.o @@ -340,6 +339,7 @@ obj-$(CONFIG_OBJAGG) += objagg.o obj-$(CONFIG_PLDMFW) += pldmfw/ # KUnit tests +obj-$(CONFIG_BITFIELD_KUNIT) += bitfield_kunit.o obj-$(CONFIG_LIST_KUNIT_TEST) += list-test.o obj-$(CONFIG_LINEAR_RANGES_TEST) += test_linear_ranges.o obj-$(CONFIG_BITS_TEST) += test_bits.o diff --git a/lib/bitfield_kunit.c b/lib/bitfield_kunit.c new file mode 100644 index 000000000000..d63a2be5aff8 --- /dev/null +++ b/lib/bitfield_kunit.c @@ -0,0 +1,156 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Test cases for bitfield helpers. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include + +#define CHECK_ENC_GET_U(tp, v, field, res) do { \ + { \ + u##tp _res; \ + \ + _res = u##tp##_encode_bits(v, field); \ + KUNIT_ASSERT_FALSE_MSG(context, _res != res, \ + "u" #tp "_encode_bits(" #v ", " #field ") is 0x%llx != " #res "\n", \ + (u64)_res); \ + KUNIT_ASSERT_FALSE(context, \ + u##tp##_get_bits(_res, field) != v); \ + } \ + } while (0) + +#define CHECK_ENC_GET_LE(tp, v, field, res) do { \ + { \ + __le##tp _res; \ + \ + _res = le##tp##_encode_bits(v, field); \ + KUNIT_ASSERT_FALSE_MSG(context, \ + _res != cpu_to_le##tp(res), \ + "le" #tp "_encode_bits(" #v ", " #field ") is 0x%llx != 0x%llx",\ + (u64)le##tp##_to_cpu(_res), \ + (u64)(res)); \ + KUNIT_ASSERT_FALSE(context, \ + le##tp##_get_bits(_res, field) != v);\ + } \ + } while (0) + +#define CHECK_ENC_GET_BE(tp, v, field, res) do { \ + { \ + __be##tp _res; \ + \ + _res = be##tp##_encode_bits(v, field); \ + KUNIT_ASSERT_FALSE_MSG(context, \ + _res != cpu_to_be##tp(res), \ + "be" #tp "_encode_bits(" #v ", " #field ") is 0x%llx != 0x%llx", \ + (u64)be##tp##_to_cpu(_res), \ + (u64)(res)); \ + KUNIT_ASSERT_FALSE(context, \ + be##tp##_get_bits(_res, field) != v);\ + } \ + } while (0) + +#define CHECK_ENC_GET(tp, v, field, res) do { \ + CHECK_ENC_GET_U(tp, v, field, res); \ + CHECK_ENC_GET_LE(tp, v, field, res); \ + CHECK_ENC_GET_BE(tp, v, field, res); \ + } while (0) + +static void __init test_bitfields_constants(struct kunit *context) +{ + /* + * NOTE + * This whole function compiles (or at least should, if everything + * is going according to plan) to nothing after optimisation. + */ + + CHECK_ENC_GET(16, 1, 0x000f, 0x0001); + CHECK_ENC_GET(16, 3, 0x00f0, 0x0030); + CHECK_ENC_GET(16, 5, 0x0f00, 0x0500); + CHECK_ENC_GET(16, 7, 0xf000, 0x7000); + CHECK_ENC_GET(16, 14, 0x000f, 0x000e); + CHECK_ENC_GET(16, 15, 0x00f0, 0x00f0); + + CHECK_ENC_GET_U(8, 1, 0x0f, 0x01); + CHECK_ENC_GET_U(8, 3, 0xf0, 0x30); + CHECK_ENC_GET_U(8, 14, 0x0f, 0x0e); + CHECK_ENC_GET_U(8, 15, 0xf0, 0xf0); + + CHECK_ENC_GET(32, 1, 0x00000f00, 0x00000100); + CHECK_ENC_GET(32, 3, 0x0000f000, 0x00003000); + CHECK_ENC_GET(32, 5, 0x000f0000, 0x00050000); + CHECK_ENC_GET(32, 7, 0x00f00000, 0x00700000); + CHECK_ENC_GET(32, 14, 0x0f000000, 0x0e000000); + CHECK_ENC_GET(32, 15, 0xf0000000, 0xf0000000); + + CHECK_ENC_GET(64, 1, 0x00000f0000000000ull, 0x0000010000000000ull); + CHECK_ENC_GET(64, 3, 0x0000f00000000000ull, 0x0000300000000000ull); + CHECK_ENC_GET(64, 5, 0x000f000000000000ull, 0x0005000000000000ull); + CHECK_ENC_GET(64, 7, 0x00f0000000000000ull, 0x0070000000000000ull); + CHECK_ENC_GET(64, 14, 0x0f00000000000000ull, 0x0e00000000000000ull); + CHECK_ENC_GET(64, 15, 0xf000000000000000ull, 0xf000000000000000ull); +} + +#define CHECK(tp, mask) do { \ + u64 v; \ + \ + for (v = 0; v < 1 << hweight32(mask); v++) \ + KUNIT_ASSERT_FALSE(context, \ + tp##_encode_bits(v, mask) != v << __ffs64(mask));\ + } while (0) + +static void __init test_bitfields_variables(struct kunit *context) +{ + CHECK(u8, 0x0f); + CHECK(u8, 0xf0); + CHECK(u8, 0x38); + + CHECK(u16, 0x0038); + CHECK(u16, 0x0380); + CHECK(u16, 0x3800); + CHECK(u16, 0x8000); + + CHECK(u32, 0x80000000); + CHECK(u32, 0x7f000000); + CHECK(u32, 0x07e00000); + CHECK(u32, 0x00018000); + + CHECK(u64, 0x8000000000000000ull); + CHECK(u64, 0x7f00000000000000ull); + CHECK(u64, 0x0001800000000000ull); + CHECK(u64, 0x0000000080000000ull); + CHECK(u64, 0x000000007f000000ull); + CHECK(u64, 0x0000000018000000ull); + CHECK(u64, 0x0000001f8000000ull); +} + + +static void __init test_bitfields_compile(struct kunit *context) +{ + /* these should fail compilation */ + CHECK_ENC_GET(16, 16, 0x0f00, 0x1000); + u32_encode_bits(7, 0x06000000); + + /* this should at least give a warning */ + u16_encode_bits(0, 0x60000); +} + +static struct kunit_case __refdata bitfields_test_cases[] = { + KUNIT_CASE(test_bitfields_constants), + KUNIT_CASE(test_bitfields_variables), +#ifdef TEST_BITFIELD_COMPILE + KUNIT_CASE(test_bitfields_compile), +#endif + {} +}; + +static struct kunit_suite bitfields_test_suite = { + .name = "bitfields", + .test_cases = bitfields_test_cases, +}; + +kunit_test_suites(&bitfields_test_suite); + +MODULE_AUTHOR("Johannes Berg "); +MODULE_LICENSE("GPL"); diff --git a/lib/test_bitfield.c b/lib/test_bitfield.c deleted file mode 100644 index 5b8f4108662d..000000000000 --- a/lib/test_bitfield.c +++ /dev/null @@ -1,168 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0+ -/* - * Test cases for bitfield helpers. - */ - -#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt - -#include -#include -#include - -#define CHECK_ENC_GET_U(tp, v, field, res) do { \ - { \ - u##tp _res; \ - \ - _res = u##tp##_encode_bits(v, field); \ - if (_res != res) { \ - pr_warn("u" #tp "_encode_bits(" #v ", " #field ") is 0x%llx != " #res "\n",\ - (u64)_res); \ - return -EINVAL; \ - } \ - if (u##tp##_get_bits(_res, field) != v) \ - return -EINVAL; \ - } \ - } while (0) - -#define CHECK_ENC_GET_LE(tp, v, field, res) do { \ - { \ - __le##tp _res; \ - \ - _res = le##tp##_encode_bits(v, field); \ - if (_res != cpu_to_le##tp(res)) { \ - pr_warn("le" #tp "_encode_bits(" #v ", " #field ") is 0x%llx != 0x%llx\n",\ - (u64)le##tp##_to_cpu(_res), \ - (u64)(res)); \ - return -EINVAL; \ - } \ - if (le##tp##_get_bits(_res, field) != v) \ - return -EINVAL; \ - } \ - } while (0) - -#define CHECK_ENC_GET_BE(tp, v, field, res) do { \ - { \ - __be##tp _res; \ - \ - _res = be##tp##_encode_bits(v, field); \ - if (_res != cpu_to_be##tp(res)) { \ - pr_warn("be" #tp "_encode_bits(" #v ", " #field ") is 0x%llx != 0x%llx\n",\ - (u64)be##tp##_to_cpu(_res), \ - (u64)(res)); \ - return -EINVAL; \ - } \ - if (be##tp##_get_bits(_res, field) != v) \ - return -EINVAL; \ - } \ - } while (0) - -#define CHECK_ENC_GET(tp, v, field, res) do { \ - CHECK_ENC_GET_U(tp, v, field, res); \ - CHECK_ENC_GET_LE(tp, v, field, res); \ - CHECK_ENC_GET_BE(tp, v, field, res); \ - } while (0) - -static int test_constants(void) -{ - /* - * NOTE - * This whole function compiles (or at least should, if everything - * is going according to plan) to nothing after optimisation. - */ - - CHECK_ENC_GET(16, 1, 0x000f, 0x0001); - CHECK_ENC_GET(16, 3, 0x00f0, 0x0030); - CHECK_ENC_GET(16, 5, 0x0f00, 0x0500); - CHECK_ENC_GET(16, 7, 0xf000, 0x7000); - CHECK_ENC_GET(16, 14, 0x000f, 0x000e); - CHECK_ENC_GET(16, 15, 0x00f0, 0x00f0); - - CHECK_ENC_GET_U(8, 1, 0x0f, 0x01); - CHECK_ENC_GET_U(8, 3, 0xf0, 0x30); - CHECK_ENC_GET_U(8, 14, 0x0f, 0x0e); - CHECK_ENC_GET_U(8, 15, 0xf0, 0xf0); - - CHECK_ENC_GET(32, 1, 0x00000f00, 0x00000100); - CHECK_ENC_GET(32, 3, 0x0000f000, 0x00003000); - CHECK_ENC_GET(32, 5, 0x000f0000, 0x00050000); - CHECK_ENC_GET(32, 7, 0x00f00000, 0x00700000); - CHECK_ENC_GET(32, 14, 0x0f000000, 0x0e000000); - CHECK_ENC_GET(32, 15, 0xf0000000, 0xf0000000); - - CHECK_ENC_GET(64, 1, 0x00000f0000000000ull, 0x0000010000000000ull); - CHECK_ENC_GET(64, 3, 0x0000f00000000000ull, 0x0000300000000000ull); - CHECK_ENC_GET(64, 5, 0x000f000000000000ull, 0x0005000000000000ull); - CHECK_ENC_GET(64, 7, 0x00f0000000000000ull, 0x0070000000000000ull); - CHECK_ENC_GET(64, 14, 0x0f00000000000000ull, 0x0e00000000000000ull); - CHECK_ENC_GET(64, 15, 0xf000000000000000ull, 0xf000000000000000ull); - - return 0; -} - -#define CHECK(tp, mask) do { \ - u64 v; \ - \ - for (v = 0; v < 1 << hweight32(mask); v++) \ - if (tp##_encode_bits(v, mask) != v << __ffs64(mask)) \ - return -EINVAL; \ - } while (0) - -static int test_variables(void) -{ - CHECK(u8, 0x0f); - CHECK(u8, 0xf0); - CHECK(u8, 0x38); - - CHECK(u16, 0x0038); - CHECK(u16, 0x0380); - CHECK(u16, 0x3800); - CHECK(u16, 0x8000); - - CHECK(u32, 0x80000000); - CHECK(u32, 0x7f000000); - CHECK(u32, 0x07e00000); - CHECK(u32, 0x00018000); - - CHECK(u64, 0x8000000000000000ull); - CHECK(u64, 0x7f00000000000000ull); - CHECK(u64, 0x0001800000000000ull); - CHECK(u64, 0x0000000080000000ull); - CHECK(u64, 0x000000007f000000ull); - CHECK(u64, 0x0000000018000000ull); - CHECK(u64, 0x0000001f8000000ull); - - return 0; -} - -static int __init test_bitfields(void) -{ - int ret = test_constants(); - - if (ret) { - pr_warn("constant tests failed!\n"); - return ret; - } - - ret = test_variables(); - if (ret) { - pr_warn("variable tests failed!\n"); - return ret; - } - -#ifdef TEST_BITFIELD_COMPILE - /* these should fail compilation */ - CHECK_ENC_GET(16, 16, 0x0f00, 0x1000); - u32_encode_bits(7, 0x06000000); - - /* this should at least give a warning */ - u16_encode_bits(0, 0x60000); -#endif - - pr_info("tests passed\n"); - - return 0; -} -module_init(test_bitfields) - -MODULE_AUTHOR("Johannes Berg "); -MODULE_LICENSE("GPL"); -- cgit v1.2.3 From b2a182a40278bc5849730e66bca01a762188ed86 Mon Sep 17 00:00:00 2001 From: Douglas Gilbert Date: Thu, 15 Oct 2020 14:57:35 -0400 Subject: sgl_alloc_order: fix memory leak sgl_alloc_order() can fail when 'length' is large on a memory constrained system. When order > 0 it will potentially be making several multi-page allocations with the later ones more likely to fail than the earlier one. So it is important that sgl_alloc_order() frees up any pages it has obtained before returning NULL. In the case when order > 0 it calls the wrong free page function and leaks. In testing the leak was sufficient to bring down my 8 GiB laptop with OOM. Reviewed-by: Bart Van Assche Signed-off-by: Douglas Gilbert Signed-off-by: Jens Axboe --- lib/scatterlist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 5d63a8857f36..c448642e0f78 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -514,7 +514,7 @@ struct scatterlist *sgl_alloc_order(unsigned long long length, elem_len = min_t(u64, length, PAGE_SIZE << order); page = alloc_pages(gfp, order); if (!page) { - sgl_free(sgl); + sgl_free_order(sgl, order); return NULL; } -- cgit v1.2.3 From 9a40401cfa1356b0d169be8470ed7b2edc33b98f Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 16 Oct 2020 08:46:01 -0300 Subject: lib/scatterlist: Do not limit max_segment to PAGE_ALIGNED values The main intention of the max_segment argument to __sg_alloc_table_from_pages() is to match the DMA layer segment size set by dma_set_max_seg_size(). Restricting the input to be page aligned makes it impossible to just connect the DMA layer to this API. The only reason for a page alignment here is because the algorithm will overshoot the max_segment if it is not a multiple of PAGE_SIZE. Simply fix the alignment before starting and don't expose this implementation detail to the callers. A future patch will completely remove SCATTERLIST_MAX_SEGMENT. Signed-off-by: Jason Gunthorpe --- lib/scatterlist.c | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/scatterlist.c b/lib/scatterlist.c index e102fdfaa75b..ed2497c79a21 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -404,7 +404,7 @@ static struct scatterlist *get_next_sg(struct sg_table *table, * @n_pages: Number of pages in the pages array * @offset: Offset from start of the first page to the start of a buffer * @size: Number of valid bytes in the buffer (after offset) - * @max_segment: Maximum size of a scatterlist node in bytes (page aligned) + * @max_segment: Maximum size of a scatterlist element in bytes * @prv: Last populated sge in sgt * @left_pages: Left pages caller have to set after this call * @gfp_mask: GFP allocation mask @@ -435,7 +435,12 @@ struct scatterlist *__sg_alloc_table_from_pages(struct sg_table *sgt, unsigned int added_nents = 0; struct scatterlist *s = prv; - if (WARN_ON(!max_segment || offset_in_page(max_segment))) + /* + * The algorithm below requires max_segment to be aligned to PAGE_SIZE + * otherwise it can overshoot. + */ + max_segment = ALIGN_DOWN(max_segment, PAGE_SIZE); + if (WARN_ON(max_segment < PAGE_SIZE)) return ERR_PTR(-EINVAL); if (IS_ENABLED(CONFIG_ARCH_NO_SG_CHAIN) && prv) @@ -542,8 +547,7 @@ int sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages, unsigned long size, gfp_t gfp_mask) { return PTR_ERR_OR_ZERO(__sg_alloc_table_from_pages(sgt, pages, n_pages, - offset, size, SCATTERLIST_MAX_SEGMENT, - NULL, 0, gfp_mask)); + offset, size, UINT_MAX, NULL, 0, gfp_mask)); } EXPORT_SYMBOL(sg_alloc_table_from_pages); -- cgit v1.2.3 From 57417cebc96b57122a2207fc84a6077d20c84b4b Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 15 Oct 2020 20:05:13 -0700 Subject: XArray: add xa_get_order Patch series "Fix read-only THP for non-tmpfs filesystems". As described more verbosely in the [3/3] changelog, we can inadvertently put an order-0 page in the page cache which occupies 512 consecutive entries. Users are running into this if they enable the READ_ONLY_THP_FOR_FS config option; see https://bugzilla.kernel.org/show_bug.cgi?id=206569 and Qian Cai has also reported it here: https://lore.kernel.org/lkml/20200616013309.GB815@lca.pw/ This is a rather intrusive way of fixing the problem, but has the advantage that I've actually been testing it with the THP patches, which means that it sees far more use than it does upstream -- indeed, Song has been entirely unable to reproduce it. It also has the advantage that it removes a few patches from my gargantuan backlog of THP patches. This patch (of 3): This function returns the order of the entry at the index. We need this because there isn't space in the shadow entry to encode its order. [akpm@linux-foundation.org: export xa_get_order to modules] Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Cc: "Kirill A . Shutemov" Cc: Qian Cai Cc: Song Liu Link: https://lkml.kernel.org/r/20200903183029.14930-1-willy@infradead.org Link: https://lkml.kernel.org/r/20200903183029.14930-2-willy@infradead.org Signed-off-by: Linus Torvalds --- include/linux/xarray.h | 9 +++++++++ lib/test_xarray.c | 21 +++++++++++++++++++++ lib/xarray.c | 40 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 70 insertions(+) (limited to 'lib') diff --git a/include/linux/xarray.h b/include/linux/xarray.h index b4d70e7568b2..5b7f4ebcf4ff 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -1505,6 +1505,15 @@ void xas_pause(struct xa_state *); void xas_create_range(struct xa_state *); +#ifdef CONFIG_XARRAY_MULTI +int xa_get_order(struct xarray *, unsigned long index); +#else +static inline int xa_get_order(struct xarray *xa, unsigned long index) +{ + return 0; +} +#endif + /** * xas_reload() - Refetch an entry from the xarray. * @xas: XArray operation state. diff --git a/lib/test_xarray.c b/lib/test_xarray.c index d4f97925dbd8..bdd4d7995f79 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -1649,6 +1649,26 @@ static noinline void check_account(struct xarray *xa) #endif } +static noinline void check_get_order(struct xarray *xa) +{ + unsigned int max_order = IS_ENABLED(CONFIG_XARRAY_MULTI) ? 20 : 1; + unsigned int order; + unsigned long i, j; + + for (i = 0; i < 3; i++) + XA_BUG_ON(xa, xa_get_order(xa, i) != 0); + + for (order = 0; order < max_order; order++) { + for (i = 0; i < 10; i++) { + xa_store_order(xa, i << order, order, + xa_mk_index(i << order), GFP_KERNEL); + for (j = i << order; j < (i + 1) << order; j++) + XA_BUG_ON(xa, xa_get_order(xa, j) != order); + xa_erase(xa, i << order); + } + } +} + static noinline void check_destroy(struct xarray *xa) { unsigned long index; @@ -1697,6 +1717,7 @@ static int xarray_checks(void) check_reserve(&array); check_reserve(&xa0); check_multi_store(&array); + check_get_order(&array); check_xa_alloc(); check_find(&array); check_find_entry(&array); diff --git a/lib/xarray.c b/lib/xarray.c index e9e641d3c0c3..f24cb9a43af8 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -1592,6 +1592,46 @@ unlock: return xas_result(&xas, NULL); } EXPORT_SYMBOL(xa_store_range); + +/** + * xa_get_order() - Get the order of an entry. + * @xa: XArray. + * @index: Index of the entry. + * + * Return: A number between 0 and 63 indicating the order of the entry. + */ +int xa_get_order(struct xarray *xa, unsigned long index) +{ + XA_STATE(xas, xa, index); + void *entry; + int order = 0; + + rcu_read_lock(); + entry = xas_load(&xas); + + if (!entry) + goto unlock; + + if (!xas.xa_node) + goto unlock; + + for (;;) { + unsigned int slot = xas.xa_offset + (1 << order); + + if (slot >= XA_CHUNK_SIZE) + break; + if (!xa_is_sibling(xas.xa_node->slots[slot])) + break; + order++; + } + + order += xas.xa_node->shift; +unlock: + rcu_read_unlock(); + + return order; +} +EXPORT_SYMBOL(xa_get_order); #endif /* CONFIG_XARRAY_MULTI */ /** -- cgit v1.2.3 From 8fc75643c5e14574c8be59b69182452ece28315a Mon Sep 17 00:00:00 2001 From: "Matthew Wilcox (Oracle)" Date: Thu, 15 Oct 2020 20:05:16 -0700 Subject: XArray: add xas_split In order to use multi-index entries for huge pages in the page cache, we need to be able to split a multi-index entry (eg if a file is truncated in the middle of a huge page entry). This version does not support splitting more than one level of the tree at a time. This is an acceptable limitation for the page cache as we do not expect to support order-12 pages in the near future. [akpm@linux-foundation.org: export xas_split_alloc() to modules] [willy@infradead.org: fix xarray split] Link: https://lkml.kernel.org/r/20200910175450.GV6583@casper.infradead.org [willy@infradead.org: fix xarray] Link: https://lkml.kernel.org/r/20201001233943.GW20115@casper.infradead.org Signed-off-by: Matthew Wilcox (Oracle) Signed-off-by: Andrew Morton Cc: "Kirill A . Shutemov" Cc: Qian Cai Cc: Song Liu Link: https://lkml.kernel.org/r/20200903183029.14930-3-willy@infradead.org Signed-off-by: Linus Torvalds --- Documentation/core-api/xarray.rst | 16 ++-- include/linux/xarray.h | 13 +++ lib/test_xarray.c | 44 ++++++++++ lib/xarray.c | 168 ++++++++++++++++++++++++++++++++++++-- 4 files changed, 225 insertions(+), 16 deletions(-) (limited to 'lib') diff --git a/Documentation/core-api/xarray.rst b/Documentation/core-api/xarray.rst index 640934b6f7b4..a137a0e6d068 100644 --- a/Documentation/core-api/xarray.rst +++ b/Documentation/core-api/xarray.rst @@ -475,13 +475,15 @@ or iterations will move the index to the first index in the range. Each entry will only be returned once, no matter how many indices it occupies. -Using xas_next() or xas_prev() with a multi-index xa_state -is not supported. Using either of these functions on a multi-index entry -will reveal sibling entries; these should be skipped over by the caller. - -Storing ``NULL`` into any index of a multi-index entry will set the entry -at every index to ``NULL`` and dissolve the tie. Splitting a multi-index -entry into entries occupying smaller ranges is not yet supported. +Using xas_next() or xas_prev() with a multi-index xa_state is not +supported. Using either of these functions on a multi-index entry will +reveal sibling entries; these should be skipped over by the caller. + +Storing ``NULL`` into any index of a multi-index entry will set the +entry at every index to ``NULL`` and dissolve the tie. A multi-index +entry can be split into entries occupying smaller ranges by calling +xas_split_alloc() without the xa_lock held, followed by taking the lock +and calling xas_split(). Functions and structures ======================== diff --git a/include/linux/xarray.h b/include/linux/xarray.h index 5b7f4ebcf4ff..5cdf441f6377 100644 --- a/include/linux/xarray.h +++ b/include/linux/xarray.h @@ -1507,11 +1507,24 @@ void xas_create_range(struct xa_state *); #ifdef CONFIG_XARRAY_MULTI int xa_get_order(struct xarray *, unsigned long index); +void xas_split(struct xa_state *, void *entry, unsigned int order); +void xas_split_alloc(struct xa_state *, void *entry, unsigned int order, gfp_t); #else static inline int xa_get_order(struct xarray *xa, unsigned long index) { return 0; } + +static inline void xas_split(struct xa_state *xas, void *entry, + unsigned int order) +{ + xas_store(xas, entry); +} + +static inline void xas_split_alloc(struct xa_state *xas, void *entry, + unsigned int order, gfp_t gfp) +{ +} #endif /** diff --git a/lib/test_xarray.c b/lib/test_xarray.c index bdd4d7995f79..8262c3f05a5d 100644 --- a/lib/test_xarray.c +++ b/lib/test_xarray.c @@ -1503,6 +1503,49 @@ static noinline void check_store_range(struct xarray *xa) } } +#ifdef CONFIG_XARRAY_MULTI +static void check_split_1(struct xarray *xa, unsigned long index, + unsigned int order) +{ + XA_STATE(xas, xa, index); + void *entry; + unsigned int i = 0; + + xa_store_order(xa, index, order, xa, GFP_KERNEL); + + xas_split_alloc(&xas, xa, order, GFP_KERNEL); + xas_lock(&xas); + xas_split(&xas, xa, order); + xas_unlock(&xas); + + xa_for_each(xa, index, entry) { + XA_BUG_ON(xa, entry != xa); + i++; + } + XA_BUG_ON(xa, i != 1 << order); + + xa_set_mark(xa, index, XA_MARK_0); + XA_BUG_ON(xa, !xa_get_mark(xa, index, XA_MARK_0)); + + xa_destroy(xa); +} + +static noinline void check_split(struct xarray *xa) +{ + unsigned int order; + + XA_BUG_ON(xa, !xa_empty(xa)); + + for (order = 1; order < 2 * XA_CHUNK_SHIFT; order++) { + check_split_1(xa, 0, order); + check_split_1(xa, 1UL << order, order); + check_split_1(xa, 3UL << order, order); + } +} +#else +static void check_split(struct xarray *xa) { } +#endif + static void check_align_1(struct xarray *xa, char *name) { int i; @@ -1729,6 +1772,7 @@ static int xarray_checks(void) check_store_range(&array); check_store_iter(&array); check_align(&xa0); + check_split(&array); check_workingset(&array, 0); check_workingset(&array, 64); diff --git a/lib/xarray.c b/lib/xarray.c index f24cb9a43af8..b76eea7b314c 100644 --- a/lib/xarray.c +++ b/lib/xarray.c @@ -266,13 +266,14 @@ static void xa_node_free(struct xa_node *node) */ static void xas_destroy(struct xa_state *xas) { - struct xa_node *node = xas->xa_alloc; + struct xa_node *next, *node = xas->xa_alloc; - if (!node) - return; - XA_NODE_BUG_ON(node, !list_empty(&node->private_list)); - kmem_cache_free(radix_tree_node_cachep, node); - xas->xa_alloc = NULL; + while (node) { + XA_NODE_BUG_ON(node, !list_empty(&node->private_list)); + next = rcu_dereference_raw(node->parent); + radix_tree_node_rcu_free(&node->rcu_head); + xas->xa_alloc = node = next; + } } /** @@ -304,6 +305,7 @@ bool xas_nomem(struct xa_state *xas, gfp_t gfp) xas->xa_alloc = kmem_cache_alloc(radix_tree_node_cachep, gfp); if (!xas->xa_alloc) return false; + xas->xa_alloc->parent = NULL; XA_NODE_BUG_ON(xas->xa_alloc, !list_empty(&xas->xa_alloc->private_list)); xas->xa_node = XAS_RESTART; return true; @@ -339,6 +341,7 @@ static bool __xas_nomem(struct xa_state *xas, gfp_t gfp) } if (!xas->xa_alloc) return false; + xas->xa_alloc->parent = NULL; XA_NODE_BUG_ON(xas->xa_alloc, !list_empty(&xas->xa_alloc->private_list)); xas->xa_node = XAS_RESTART; return true; @@ -403,7 +406,7 @@ static unsigned long xas_size(const struct xa_state *xas) /* * Use this to calculate the maximum index that will need to be created * in order to add the entry described by @xas. Because we cannot store a - * multiple-index entry at index 0, the calculation is a little more complex + * multi-index entry at index 0, the calculation is a little more complex * than you might expect. */ static unsigned long xas_max(struct xa_state *xas) @@ -946,6 +949,153 @@ void xas_init_marks(const struct xa_state *xas) } EXPORT_SYMBOL_GPL(xas_init_marks); +#ifdef CONFIG_XARRAY_MULTI +static unsigned int node_get_marks(struct xa_node *node, unsigned int offset) +{ + unsigned int marks = 0; + xa_mark_t mark = XA_MARK_0; + + for (;;) { + if (node_get_mark(node, offset, mark)) + marks |= 1 << (__force unsigned int)mark; + if (mark == XA_MARK_MAX) + break; + mark_inc(mark); + } + + return marks; +} + +static void node_set_marks(struct xa_node *node, unsigned int offset, + struct xa_node *child, unsigned int marks) +{ + xa_mark_t mark = XA_MARK_0; + + for (;;) { + if (marks & (1 << (__force unsigned int)mark)) { + node_set_mark(node, offset, mark); + if (child) + node_mark_all(child, mark); + } + if (mark == XA_MARK_MAX) + break; + mark_inc(mark); + } +} + +/** + * xas_split_alloc() - Allocate memory for splitting an entry. + * @xas: XArray operation state. + * @entry: New entry which will be stored in the array. + * @order: New entry order. + * @gfp: Memory allocation flags. + * + * This function should be called before calling xas_split(). + * If necessary, it will allocate new nodes (and fill them with @entry) + * to prepare for the upcoming split of an entry of @order size into + * entries of the order stored in the @xas. + * + * Context: May sleep if @gfp flags permit. + */ +void xas_split_alloc(struct xa_state *xas, void *entry, unsigned int order, + gfp_t gfp) +{ + unsigned int sibs = (1 << (order % XA_CHUNK_SHIFT)) - 1; + unsigned int mask = xas->xa_sibs; + + /* XXX: no support for splitting really large entries yet */ + if (WARN_ON(xas->xa_shift + 2 * XA_CHUNK_SHIFT < order)) + goto nomem; + if (xas->xa_shift + XA_CHUNK_SHIFT > order) + return; + + do { + unsigned int i; + void *sibling; + struct xa_node *node; + + node = kmem_cache_alloc(radix_tree_node_cachep, gfp); + if (!node) + goto nomem; + node->array = xas->xa; + for (i = 0; i < XA_CHUNK_SIZE; i++) { + if ((i & mask) == 0) { + RCU_INIT_POINTER(node->slots[i], entry); + sibling = xa_mk_sibling(0); + } else { + RCU_INIT_POINTER(node->slots[i], sibling); + } + } + RCU_INIT_POINTER(node->parent, xas->xa_alloc); + xas->xa_alloc = node; + } while (sibs-- > 0); + + return; +nomem: + xas_destroy(xas); + xas_set_err(xas, -ENOMEM); +} +EXPORT_SYMBOL_GPL(xas_split_alloc); + +/** + * xas_split() - Split a multi-index entry into smaller entries. + * @xas: XArray operation state. + * @entry: New entry to store in the array. + * @order: New entry order. + * + * The value in the entry is copied to all the replacement entries. + * + * Context: Any context. The caller should hold the xa_lock. + */ +void xas_split(struct xa_state *xas, void *entry, unsigned int order) +{ + unsigned int sibs = (1 << (order % XA_CHUNK_SHIFT)) - 1; + unsigned int offset, marks; + struct xa_node *node; + void *curr = xas_load(xas); + int values = 0; + + node = xas->xa_node; + if (xas_top(node)) + return; + + marks = node_get_marks(node, xas->xa_offset); + + offset = xas->xa_offset + sibs; + do { + if (xas->xa_shift < node->shift) { + struct xa_node *child = xas->xa_alloc; + + xas->xa_alloc = rcu_dereference_raw(child->parent); + child->shift = node->shift - XA_CHUNK_SHIFT; + child->offset = offset; + child->count = XA_CHUNK_SIZE; + child->nr_values = xa_is_value(entry) ? + XA_CHUNK_SIZE : 0; + RCU_INIT_POINTER(child->parent, node); + node_set_marks(node, offset, child, marks); + rcu_assign_pointer(node->slots[offset], + xa_mk_node(child)); + if (xa_is_value(curr)) + values--; + } else { + unsigned int canon = offset - xas->xa_sibs; + + node_set_marks(node, canon, NULL, marks); + rcu_assign_pointer(node->slots[canon], entry); + while (offset > canon) + rcu_assign_pointer(node->slots[offset--], + xa_mk_sibling(canon)); + values += (xa_is_value(entry) - xa_is_value(curr)) * + (xas->xa_sibs + 1); + } + } while (offset-- > xas->xa_offset); + + node->nr_values += values; +} +EXPORT_SYMBOL_GPL(xas_split); +#endif + /** * xas_pause() - Pause a walk to drop a lock. * @xas: XArray operation state. @@ -1407,7 +1557,7 @@ EXPORT_SYMBOL(__xa_store); * @gfp: Memory allocation flags. * * After this function returns, loads from this index will return @entry. - * Storing into an existing multislot entry updates the entry of every index. + * Storing into an existing multi-index entry updates the entry of every index. * The marks associated with @index are unaffected unless @entry is %NULL. * * Context: Any context. Takes and releases the xa_lock. @@ -1549,7 +1699,7 @@ static void xas_set_range(struct xa_state *xas, unsigned long first, * * After this function returns, loads from any index between @first and @last, * inclusive will return @entry. - * Storing into an existing multislot entry updates the entry of every index. + * Storing into an existing multi-index entry updates the entry of every index. * The marks associated with @index are unaffected unless @entry is %NULL. * * Context: Process context. Takes and releases the xa_lock. May sleep -- cgit v1.2.3 From b296a6d53339a79082c1d2c1761e948e8b3def69 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 15 Oct 2020 20:10:21 -0700 Subject: kernel.h: split out min()/max() et al. helpers kernel.h is being used as a dump for all kinds of stuff for a long time. Here is the attempt to start cleaning it up by splitting out min()/max() et al. helpers. At the same time convert users in header and lib folder to use new header. Though for time being include new header back to kernel.h to avoid twisted indirected includes for other existing users. Signed-off-by: Andy Shevchenko Signed-off-by: Andrew Morton Cc: "Rafael J. Wysocki" Cc: Steven Rostedt Cc: Rasmus Villemoes Cc: Joe Perches Cc: Linus Torvalds Link: https://lkml.kernel.org/r/20200910164152.GA1891694@smile.fi.intel.com Signed-off-by: Linus Torvalds --- include/linux/blkdev.h | 1 + include/linux/bvec.h | 6 +- include/linux/jiffies.h | 3 +- include/linux/kernel.h | 150 +-------------------------------------------- include/linux/minmax.h | 153 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/nodemask.h | 2 +- include/linux/uaccess.h | 1 + kernel/range.c | 3 +- lib/find_bit.c | 1 + lib/hexdump.c | 1 + lib/math/rational.c | 2 +- lib/math/reciprocal_div.c | 1 + 12 files changed, 170 insertions(+), 154 deletions(-) create mode 100644 include/linux/minmax.h (limited to 'lib') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c09375e0a0eb..639cae2c158b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -8,6 +8,7 @@ #include #include #include +#include #include #include #include diff --git a/include/linux/bvec.h b/include/linux/bvec.h index dd74503f7e5e..2efec10bf792 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -7,10 +7,14 @@ #ifndef __LINUX_BVEC_ITER_H #define __LINUX_BVEC_ITER_H -#include #include #include +#include +#include #include +#include + +struct page; /** * struct bio_vec - a contiguous range of physical memory addresses diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index fed6ba96c527..5e13f801c902 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -3,8 +3,9 @@ #define _LINUX_JIFFIES_H #include +#include #include -#include +#include #include #include #include diff --git a/include/linux/kernel.h b/include/linux/kernel.h index e4aa29b1ad62..c629215fdad9 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -11,6 +11,7 @@ #include #include #include +#include #include #include #include @@ -833,155 +834,6 @@ ftrace_vprintk(const char *fmt, va_list ap) static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } #endif /* CONFIG_TRACING */ -/* - * min()/max()/clamp() macros must accomplish three things: - * - * - avoid multiple evaluations of the arguments (so side-effects like - * "x++" happen only once) when non-constant. - * - perform strict type-checking (to generate warnings instead of - * nasty runtime surprises). See the "unnecessary" pointer comparison - * in __typecheck(). - * - retain result as a constant expressions when called with only - * constant expressions (to avoid tripping VLA warnings in stack - * allocation usage). - */ -#define __typecheck(x, y) \ - (!!(sizeof((typeof(x) *)1 == (typeof(y) *)1))) - -/* - * This returns a constant expression while determining if an argument is - * a constant expression, most importantly without evaluating the argument. - * Glory to Martin Uecker - */ -#define __is_constexpr(x) \ - (sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8))) - -#define __no_side_effects(x, y) \ - (__is_constexpr(x) && __is_constexpr(y)) - -#define __safe_cmp(x, y) \ - (__typecheck(x, y) && __no_side_effects(x, y)) - -#define __cmp(x, y, op) ((x) op (y) ? (x) : (y)) - -#define __cmp_once(x, y, unique_x, unique_y, op) ({ \ - typeof(x) unique_x = (x); \ - typeof(y) unique_y = (y); \ - __cmp(unique_x, unique_y, op); }) - -#define __careful_cmp(x, y, op) \ - __builtin_choose_expr(__safe_cmp(x, y), \ - __cmp(x, y, op), \ - __cmp_once(x, y, __UNIQUE_ID(__x), __UNIQUE_ID(__y), op)) - -/** - * min - return minimum of two values of the same or compatible types - * @x: first value - * @y: second value - */ -#define min(x, y) __careful_cmp(x, y, <) - -/** - * max - return maximum of two values of the same or compatible types - * @x: first value - * @y: second value - */ -#define max(x, y) __careful_cmp(x, y, >) - -/** - * min3 - return minimum of three values - * @x: first value - * @y: second value - * @z: third value - */ -#define min3(x, y, z) min((typeof(x))min(x, y), z) - -/** - * max3 - return maximum of three values - * @x: first value - * @y: second value - * @z: third value - */ -#define max3(x, y, z) max((typeof(x))max(x, y), z) - -/** - * min_not_zero - return the minimum that is _not_ zero, unless both are zero - * @x: value1 - * @y: value2 - */ -#define min_not_zero(x, y) ({ \ - typeof(x) __x = (x); \ - typeof(y) __y = (y); \ - __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); }) - -/** - * clamp - return a value clamped to a given range with strict typechecking - * @val: current value - * @lo: lowest allowable value - * @hi: highest allowable value - * - * This macro does strict typechecking of @lo/@hi to make sure they are of the - * same type as @val. See the unnecessary pointer comparisons. - */ -#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi) - -/* - * ..and if you can't take the strict - * types, you can specify one yourself. - * - * Or not use min/max/clamp at all, of course. - */ - -/** - * min_t - return minimum of two values, using the specified type - * @type: data type to use - * @x: first value - * @y: second value - */ -#define min_t(type, x, y) __careful_cmp((type)(x), (type)(y), <) - -/** - * max_t - return maximum of two values, using the specified type - * @type: data type to use - * @x: first value - * @y: second value - */ -#define max_t(type, x, y) __careful_cmp((type)(x), (type)(y), >) - -/** - * clamp_t - return a value clamped to a given range using a given type - * @type: the type of variable to use - * @val: current value - * @lo: minimum allowable value - * @hi: maximum allowable value - * - * This macro does no typechecking and uses temporary variables of type - * @type to make all the comparisons. - */ -#define clamp_t(type, val, lo, hi) min_t(type, max_t(type, val, lo), hi) - -/** - * clamp_val - return a value clamped to a given range using val's type - * @val: current value - * @lo: minimum allowable value - * @hi: maximum allowable value - * - * This macro does no typechecking and uses temporary variables of whatever - * type the input argument @val is. This is useful when @val is an unsigned - * type and @lo and @hi are literals that will otherwise be assigned a signed - * integer type. - */ -#define clamp_val(val, lo, hi) clamp_t(typeof(val), val, lo, hi) - - -/** - * swap - swap values of @a and @b - * @a: first value - * @b: second value - */ -#define swap(a, b) \ - do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) - /* This counts to 12. Any more, it will return 13th argument. */ #define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _n, X...) _n #define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) diff --git a/include/linux/minmax.h b/include/linux/minmax.h new file mode 100644 index 000000000000..c0f57b0c64d9 --- /dev/null +++ b/include/linux/minmax.h @@ -0,0 +1,153 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _LINUX_MINMAX_H +#define _LINUX_MINMAX_H + +/* + * min()/max()/clamp() macros must accomplish three things: + * + * - avoid multiple evaluations of the arguments (so side-effects like + * "x++" happen only once) when non-constant. + * - perform strict type-checking (to generate warnings instead of + * nasty runtime surprises). See the "unnecessary" pointer comparison + * in __typecheck(). + * - retain result as a constant expressions when called with only + * constant expressions (to avoid tripping VLA warnings in stack + * allocation usage). + */ +#define __typecheck(x, y) \ + (!!(sizeof((typeof(x) *)1 == (typeof(y) *)1))) + +/* + * This returns a constant expression while determining if an argument is + * a constant expression, most importantly without evaluating the argument. + * Glory to Martin Uecker + */ +#define __is_constexpr(x) \ + (sizeof(int) == sizeof(*(8 ? ((void *)((long)(x) * 0l)) : (int *)8))) + +#define __no_side_effects(x, y) \ + (__is_constexpr(x) && __is_constexpr(y)) + +#define __safe_cmp(x, y) \ + (__typecheck(x, y) && __no_side_effects(x, y)) + +#define __cmp(x, y, op) ((x) op (y) ? (x) : (y)) + +#define __cmp_once(x, y, unique_x, unique_y, op) ({ \ + typeof(x) unique_x = (x); \ + typeof(y) unique_y = (y); \ + __cmp(unique_x, unique_y, op); }) + +#define __careful_cmp(x, y, op) \ + __builtin_choose_expr(__safe_cmp(x, y), \ + __cmp(x, y, op), \ + __cmp_once(x, y, __UNIQUE_ID(__x), __UNIQUE_ID(__y), op)) + +/** + * min - return minimum of two values of the same or compatible types + * @x: first value + * @y: second value + */ +#define min(x, y) __careful_cmp(x, y, <) + +/** + * max - return maximum of two values of the same or compatible types + * @x: first value + * @y: second value + */ +#define max(x, y) __careful_cmp(x, y, >) + +/** + * min3 - return minimum of three values + * @x: first value + * @y: second value + * @z: third value + */ +#define min3(x, y, z) min((typeof(x))min(x, y), z) + +/** + * max3 - return maximum of three values + * @x: first value + * @y: second value + * @z: third value + */ +#define max3(x, y, z) max((typeof(x))max(x, y), z) + +/** + * min_not_zero - return the minimum that is _not_ zero, unless both are zero + * @x: value1 + * @y: value2 + */ +#define min_not_zero(x, y) ({ \ + typeof(x) __x = (x); \ + typeof(y) __y = (y); \ + __x == 0 ? __y : ((__y == 0) ? __x : min(__x, __y)); }) + +/** + * clamp - return a value clamped to a given range with strict typechecking + * @val: current value + * @lo: lowest allowable value + * @hi: highest allowable value + * + * This macro does strict typechecking of @lo/@hi to make sure they are of the + * same type as @val. See the unnecessary pointer comparisons. + */ +#define clamp(val, lo, hi) min((typeof(val))max(val, lo), hi) + +/* + * ..and if you can't take the strict + * types, you can specify one yourself. + * + * Or not use min/max/clamp at all, of course. + */ + +/** + * min_t - return minimum of two values, using the specified type + * @type: data type to use + * @x: first value + * @y: second value + */ +#define min_t(type, x, y) __careful_cmp((type)(x), (type)(y), <) + +/** + * max_t - return maximum of two values, using the specified type + * @type: data type to use + * @x: first value + * @y: second value + */ +#define max_t(type, x, y) __careful_cmp((type)(x), (type)(y), >) + +/** + * clamp_t - return a value clamped to a given range using a given type + * @type: the type of variable to use + * @val: current value + * @lo: minimum allowable value + * @hi: maximum allowable value + * + * This macro does no typechecking and uses temporary variables of type + * @type to make all the comparisons. + */ +#define clamp_t(type, val, lo, hi) min_t(type, max_t(type, val, lo), hi) + +/** + * clamp_val - return a value clamped to a given range using val's type + * @val: current value + * @lo: minimum allowable value + * @hi: maximum allowable value + * + * This macro does no typechecking and uses temporary variables of whatever + * type the input argument @val is. This is useful when @val is an unsigned + * type and @lo and @hi are literals that will otherwise be assigned a signed + * integer type. + */ +#define clamp_val(val, lo, hi) clamp_t(typeof(val), val, lo, hi) + +/** + * swap - swap values of @a and @b + * @a: first value + * @b: second value + */ +#define swap(a, b) \ + do { typeof(a) __tmp = (a); (a) = (b); (b) = __tmp; } while (0) + +#endif /* _LINUX_MINMAX_H */ diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index 3334ce056335..ac398e143c9a 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -90,9 +90,9 @@ * for such situations. See below and CPUMASK_ALLOC also. */ -#include #include #include +#include #include typedef struct { DECLARE_BITMAP(bits, MAX_NUMNODES); } nodemask_t; diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 1ae36bc8db35..ef084eacaa7c 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -3,6 +3,7 @@ #define __LINUX_UACCESS_H__ #include +#include #include #include diff --git a/kernel/range.c b/kernel/range.c index d84de6766472..56435f96da73 100644 --- a/kernel/range.c +++ b/kernel/range.c @@ -2,8 +2,9 @@ /* * Range add and subtract */ -#include #include +#include +#include #include #include #include diff --git a/lib/find_bit.c b/lib/find_bit.c index 49f875f1baf7..4a8751010d59 100644 --- a/lib/find_bit.c +++ b/lib/find_bit.c @@ -16,6 +16,7 @@ #include #include #include +#include #if !defined(find_next_bit) || !defined(find_next_zero_bit) || \ !defined(find_next_bit_le) || !defined(find_next_zero_bit_le) || \ diff --git a/lib/hexdump.c b/lib/hexdump.c index 147133f8eb2f..9301578f98e8 100644 --- a/lib/hexdump.c +++ b/lib/hexdump.c @@ -7,6 +7,7 @@ #include #include #include +#include #include #include diff --git a/lib/math/rational.c b/lib/math/rational.c index df75c8809693..9781d521963d 100644 --- a/lib/math/rational.c +++ b/lib/math/rational.c @@ -11,7 +11,7 @@ #include #include #include -#include +#include /* * calculate best rational approximation for a given fraction diff --git a/lib/math/reciprocal_div.c b/lib/math/reciprocal_div.c index bf043258fa00..32436dd4171e 100644 --- a/lib/math/reciprocal_div.c +++ b/lib/math/reciprocal_div.c @@ -4,6 +4,7 @@ #include #include #include +#include /* * For a description of the algorithm please have a look at -- cgit v1.2.3 From 197d6c1dde4e19517e8cf843eec7fc6417241969 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 15 Oct 2020 20:10:45 -0700 Subject: lib: bitmap: delete duplicated words Drop the repeated word "an". Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Link: https://lkml.kernel.org/r/20200823040424.25760-1-rdunlap@infradead.org Signed-off-by: Linus Torvalds --- lib/bitmap.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/bitmap.c b/lib/bitmap.c index 61394890788e..75006c4036e9 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -23,7 +23,7 @@ /** * DOC: bitmap introduction * - * bitmaps provide an array of bits, implemented using an an + * bitmaps provide an array of bits, implemented using an * array of unsigned longs. The number of valid bits in a * given bitmap does _not_ need to be an exact multiple of * BITS_PER_LONG. -- cgit v1.2.3 From f1e594acb1bdf29b583fe205793699a8e7c35fc0 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 15 Oct 2020 20:10:48 -0700 Subject: lib: libcrc32c: delete duplicated words Drop the repeated word "the". Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Link: https://lkml.kernel.org/r/20200823040430.25807-1-rdunlap@infradead.org Signed-off-by: Linus Torvalds --- lib/libcrc32c.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/libcrc32c.c b/lib/libcrc32c.c index 77ab839644c5..5ca0d815a95d 100644 --- a/lib/libcrc32c.c +++ b/lib/libcrc32c.c @@ -12,7 +12,7 @@ * pages = {}, * month = {June}, *} - * Used by the iSCSI driver, possibly others, and derived from the + * Used by the iSCSI driver, possibly others, and derived from * the iscsi-crc.c module of the linux-iscsi driver at * http://linux-iscsi.sourceforge.net. * -- cgit v1.2.3 From 2f22385fb1211a90f53ee197e0f41ae0d35b391c Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 15 Oct 2020 20:10:51 -0700 Subject: lib: decompress_bunzip2: delete duplicated words Drop the repeated word "how". Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Link: https://lkml.kernel.org/r/20200823040436.25852-1-rdunlap@infradead.org Signed-off-by: Linus Torvalds --- lib/decompress_bunzip2.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c index f9628f3924ce..c72c865032fa 100644 --- a/lib/decompress_bunzip2.c +++ b/lib/decompress_bunzip2.c @@ -390,7 +390,7 @@ static int INIT get_next_block(struct bunzip_data *bd) j = (bd->inbufBits >> bd->inbufBitCount)& ((1 << hufGroup->maxLen)-1); got_huff_bits: - /* Figure how how many bits are in next symbol and + /* Figure how many bits are in next symbol and * unget extras */ i = hufGroup->minLen; while (j > limit[i]) -- cgit v1.2.3 From dde57fe01a0a39ce131ad5611e0a4430cec66bd8 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 15 Oct 2020 20:10:57 -0700 Subject: lib: dynamic_queue_limits: delete duplicated words + fix typo Drop the repeated word "the". Fix spelling of "excess". Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Link: https://lkml.kernel.org/r/20200823040449.25946-1-rdunlap@infradead.org Signed-off-by: Linus Torvalds --- lib/dynamic_queue_limits.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/lib/dynamic_queue_limits.c b/lib/dynamic_queue_limits.c index e659a027036e..fde0aa244148 100644 --- a/lib/dynamic_queue_limits.c +++ b/lib/dynamic_queue_limits.c @@ -60,8 +60,8 @@ void dql_completed(struct dql *dql, unsigned int count) * A decrease is only considered if the queue has been busy in * the whole interval (the check above). * - * If there is slack, the amount of execess data queued above - * the the amount needed to prevent starvation, the queue limit + * If there is slack, the amount of excess data queued above + * the amount needed to prevent starvation, the queue limit * can be decreased. To avoid hysteresis we consider the * minimum amount of slack found over several iterations of the * completion routine. -- cgit v1.2.3 From 4e20ace06f705404df09316b859c267e9ec99354 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 15 Oct 2020 20:11:01 -0700 Subject: lib: earlycpio: delete duplicated words Drop the repeated word "the". Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Link: https://lkml.kernel.org/r/20200823040455.25995-1-rdunlap@infradead.org Signed-off-by: Linus Torvalds --- lib/earlycpio.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/earlycpio.c b/lib/earlycpio.c index c001e084829e..e83628882001 100644 --- a/lib/earlycpio.c +++ b/lib/earlycpio.c @@ -42,7 +42,7 @@ enum cpio_fields { /** * cpio_data find_cpio_data - Search for files in an uncompressed cpio * @path: The directory to search for, including a slash at the end - * @data: Pointer to the the cpio archive or a header inside + * @data: Pointer to the cpio archive or a header inside * @len: Remaining length of the cpio based on data pointer * @nextoff: When a matching file is found, this is the offset from the * beginning of the cpio to the beginning of the next file, not the -- cgit v1.2.3 From e0656501a6192ebced36764a007f91cc014d896d Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 15 Oct 2020 20:11:04 -0700 Subject: lib: radix-tree: delete duplicated words Drop the repeated word "be". Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Link: https://lkml.kernel.org/r/20200823040508.26086-1-rdunlap@infradead.org Signed-off-by: Linus Torvalds --- lib/radix-tree.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 8e4a3a4397f2..005ced92a4a9 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -325,7 +325,7 @@ static __must_check int __radix_tree_preload(gfp_t gfp_mask, unsigned nr) int ret = -ENOMEM; /* - * Nodes preloaded by one cgroup can be be used by another cgroup, so + * Nodes preloaded by one cgroup can be used by another cgroup, so * they should never be accounted to any particular memory cgroup. */ gfp_mask &= ~__GFP_ACCOUNT; -- cgit v1.2.3 From 408a93a2bb4f276f28059b51fbe0bbd06be350fc Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 15 Oct 2020 20:11:07 -0700 Subject: lib: syscall: delete duplicated words Drop the repeated word "the". Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Link: https://lkml.kernel.org/r/20200823040514.26136-1-rdunlap@infradead.org Signed-off-by: Linus Torvalds --- lib/syscall.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/syscall.c b/lib/syscall.c index fb328e7ccb08..8533d2fea2d7 100644 --- a/lib/syscall.c +++ b/lib/syscall.c @@ -44,7 +44,7 @@ static int collect_syscall(struct task_struct *target, struct syscall_info *info * .data.instruction_pointer - filled with user PC * * If @target is blocked in a system call, returns zero with @info.data.nr - * set to the the call's number and @info.data.args filled in with its + * set to the call's number and @info.data.args filled in with its * arguments. Registers not used for system call arguments may not be available * and it is not kosher to use &struct user_regset calls while the system * call is still in progress. Note we may get this result if @target -- cgit v1.2.3 From 2d0469814ade60efb5cd8ca9af93c43bc3ba831e Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 15 Oct 2020 20:11:10 -0700 Subject: lib: test_sysctl: delete duplicated words Drop the repeated word "the". Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Link: https://lkml.kernel.org/r/20200823040520.1999-1-rdunlap@infradead.org Signed-off-by: Linus Torvalds --- lib/test_sysctl.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/test_sysctl.c b/lib/test_sysctl.c index 98bc92a91662..3750323973f4 100644 --- a/lib/test_sysctl.c +++ b/lib/test_sysctl.c @@ -16,7 +16,7 @@ */ /* - * This module provides an interface to the the proc sysctl interfaces. This + * This module provides an interface to the proc sysctl interfaces. This * driver requires CONFIG_PROC_SYSCTL. It will not normally be loaded by the * system unless explicitly requested by name. You can also build this driver * into your kernel. -- cgit v1.2.3 From 8d8472cfdefa0f5c839f5c4a507af5e26ee6f5bf Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Thu, 15 Oct 2020 20:11:14 -0700 Subject: lib/mpi/mpi-bit.c: fix spello of "functions" Fix typo/spello of "functions". Signed-off-by: Randy Dunlap Signed-off-by: Andrew Morton Link: https://lkml.kernel.org/r/8df15173-a6df-9426-7cad-a2d279bf1170@infradead.org Signed-off-by: Linus Torvalds --- lib/mpi/mpi-bit.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/mpi/mpi-bit.c b/lib/mpi/mpi-bit.c index a5119a2bcdd4..142b680835df 100644 --- a/lib/mpi/mpi-bit.c +++ b/lib/mpi/mpi-bit.c @@ -1,4 +1,4 @@ -/* mpi-bit.c - MPI bit level fucntions +/* mpi-bit.c - MPI bit level functions * Copyright (C) 1998, 1999 Free Software Foundation, Inc. * * This file is part of GnuPG. -- cgit v1.2.3 From 3b6742618ed9216dd6caad968fe8c83b32dff485 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 15 Oct 2020 20:11:17 -0700 Subject: lib/idr.c: document calling context for IDA APIs mustn't use locks The documentation for these functions indicates that callers don't need to hold a lock while calling them, but that documentation is only in one place under "IDA Usage". Let's state the same information on each IDA function so that it's clear what the calling context requires. Furthermore, let's document ida_simple_get() with the same information so that callers know how this API works. Signed-off-by: Stephen Boyd Signed-off-by: Andrew Morton Reviewed-by: Greg Kroah-Hartman Cc: Tri Vo Cc: Jonathan Corbet Cc: Matthew Wilcox Link: https://lkml.kernel.org/r/20200910055246.2297797-1-swboyd@chromium.org Signed-off-by: Linus Torvalds --- include/linux/idr.h | 9 ++++++--- lib/idr.c | 9 ++++++--- 2 files changed, 12 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/include/linux/idr.h b/include/linux/idr.h index 3ade03e5c7af..b235ed987021 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -263,7 +263,8 @@ void ida_destroy(struct ida *ida); * * Allocate an ID between 0 and %INT_MAX, inclusive. * - * Context: Any context. + * Context: Any context. It is safe to call this function without + * locking in your code. * Return: The allocated ID, or %-ENOMEM if memory could not be allocated, * or %-ENOSPC if there are no free IDs. */ @@ -280,7 +281,8 @@ static inline int ida_alloc(struct ida *ida, gfp_t gfp) * * Allocate an ID between @min and %INT_MAX, inclusive. * - * Context: Any context. + * Context: Any context. It is safe to call this function without + * locking in your code. * Return: The allocated ID, or %-ENOMEM if memory could not be allocated, * or %-ENOSPC if there are no free IDs. */ @@ -297,7 +299,8 @@ static inline int ida_alloc_min(struct ida *ida, unsigned int min, gfp_t gfp) * * Allocate an ID between 0 and @max, inclusive. * - * Context: Any context. + * Context: Any context. It is safe to call this function without + * locking in your code. * Return: The allocated ID, or %-ENOMEM if memory could not be allocated, * or %-ENOSPC if there are no free IDs. */ diff --git a/lib/idr.c b/lib/idr.c index c2cf2c52bbde..3fa8be43696f 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -372,7 +372,8 @@ EXPORT_SYMBOL(idr_replace); * Allocate an ID between @min and @max, inclusive. The allocated ID will * not exceed %INT_MAX, even if @max is larger. * - * Context: Any context. + * Context: Any context. It is safe to call this function without + * locking in your code. * Return: The allocated ID, or %-ENOMEM if memory could not be allocated, * or %-ENOSPC if there are no free IDs. */ @@ -479,7 +480,8 @@ EXPORT_SYMBOL(ida_alloc_range); * @ida: IDA handle. * @id: Previously allocated ID. * - * Context: Any context. + * Context: Any context. It is safe to call this function without + * locking in your code. */ void ida_free(struct ida *ida, unsigned int id) { @@ -531,7 +533,8 @@ EXPORT_SYMBOL(ida_free); * or freed. If the IDA is already empty, there is no need to call this * function. * - * Context: Any context. + * Context: Any context. It is safe to call this function without + * locking in your code. */ void ida_destroy(struct ida *ida) { -- cgit v1.2.3 From 6ed9b92e290b530197c2dda2271f5312abc475e6 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Thu, 15 Oct 2020 20:11:25 -0700 Subject: lib/scatterlist.c: avoid a double memset 'sgl' is zeroed a few lines below in 'sg_init_table()'. There is no need to clear it twice. Remove the redundant initialization. Signed-off-by: Christophe JAILLET Signed-off-by: Andrew Morton Link: https://lkml.kernel.org/r/20200920071544.368841-1-christophe.jaillet@wanadoo.fr Signed-off-by: Linus Torvalds --- lib/scatterlist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 5d63a8857f36..d94628fa3349 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -504,7 +504,7 @@ struct scatterlist *sgl_alloc_order(unsigned long long length, nalloc++; } sgl = kmalloc_array(nalloc, sizeof(struct scatterlist), - (gfp & ~GFP_DMA) | __GFP_ZERO); + gfp & ~GFP_DMA); if (!sgl) return NULL; -- cgit v1.2.3 From 1d339638a954edce06ffcaa15d883d322e9e8291 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Thu, 15 Oct 2020 20:11:28 -0700 Subject: lib/percpu_counter.c: use helper macro abs() Use helper macro abs() to simplify the "x >= t || x <= -t" cmp. Signed-off-by: Miaohe Lin Signed-off-by: Andrew Morton Reviewed-by: Andrew Morton Link: https://lkml.kernel.org/r/20200927122746.5964-1-linmiaohe@huawei.com Signed-off-by: Linus Torvalds --- lib/percpu_counter.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index f61689a96e85..00f666d94486 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -85,7 +85,7 @@ void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch) preempt_disable(); count = __this_cpu_read(*fbc->counters) + amount; - if (count >= batch || count <= -batch) { + if (abs(count) >= batch) { unsigned long flags; raw_spin_lock_irqsave(&fbc->lock, flags); fbc->count += count; -- cgit v1.2.3 From f3c9d0a3fe97a8b05548d27e9a8e7a5e6875004c Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 15 Oct 2020 20:11:34 -0700 Subject: lib/test_hmm.c: fix an error code in dmirror_allocate_chunk() This is supposed to return false on failure, not a negative error code. Fixes: 170e38548b81 ("mm/hmm/test: use after free in dmirror_allocate_chunk()") Signed-off-by: Dan Carpenter Signed-off-by: Andrew Morton Reviewed-by: Ralph Campbell Cc: Jerome Glisse Cc: Stephen Rothwell Cc: Jason Gunthorpe Cc: Dan Williams Link: https://lkml.kernel.org/r/20201010200812.GA1886610@mwanda Signed-off-by: Linus Torvalds --- lib/test_hmm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/test_hmm.c b/lib/test_hmm.c index e151a7f10519..80a78877bd93 100644 --- a/lib/test_hmm.c +++ b/lib/test_hmm.c @@ -461,7 +461,7 @@ static bool dmirror_allocate_chunk(struct dmirror_device *mdevice, devmem = kzalloc(sizeof(*devmem), GFP_KERNEL); if (!devmem) - return -ENOMEM; + return false; res = request_free_mem_region(&iomem_resource, DEVMEM_CHUNK_SIZE, "hmm_dmirror"); -- cgit v1.2.3 From 904542dc56524f921a6bab0639ff6249c01e775f Mon Sep 17 00:00:00 2001 From: Tobias Jordan Date: Thu, 15 Oct 2020 20:11:38 -0700 Subject: lib/crc32.c: fix trivial typo in preprocessor condition Whether crc32_be needs a lookup table is chosen based on CRC_LE_BITS. Obviously, the _be function should be governed by the _BE_ define. This probably never pops up as it's hard to come up with a configuration where CRC_BE_BITS isn't the same as CRC_LE_BITS and as nobody is using bitwise CRC anyway. Fixes: 46c5801eaf86 ("crc32: bolt on crc32c") Signed-off-by: Tobias Jordan Signed-off-by: Andrew Morton Cc: Krzysztof Kozlowski Cc: Jonathan Corbet Cc: Mauro Carvalho Chehab Link: https://lkml.kernel.org/r/20200923182122.GA3338@agrajag.zerfleddert.de Signed-off-by: Linus Torvalds --- lib/crc32.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/crc32.c b/lib/crc32.c index 35a03d03f973..2a68dfd3b96c 100644 --- a/lib/crc32.c +++ b/lib/crc32.c @@ -331,7 +331,7 @@ static inline u32 __pure crc32_be_generic(u32 crc, unsigned char const *p, return crc; } -#if CRC_LE_BITS == 1 +#if CRC_BE_BITS == 1 u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len) { return crc32_be_generic(crc, p, len, NULL, CRC32_POLY_BE); -- cgit v1.2.3 From 6a6155f664e31c9be43cd690541a9a682ba3dc22 Mon Sep 17 00:00:00 2001 From: George Popescu Date: Thu, 15 Oct 2020 20:13:38 -0700 Subject: ubsan: introduce CONFIG_UBSAN_LOCAL_BOUNDS for Clang When the kernel is compiled with Clang, -fsanitize=bounds expands to -fsanitize=array-bounds and -fsanitize=local-bounds. Enabling -fsanitize=local-bounds with Clang has the unfortunate side-effect of inserting traps; this goes back to its original intent, which was as a hardening and not a debugging feature [1]. The same feature made its way into -fsanitize=bounds, but the traps remained. For that reason, -fsanitize=bounds was split into 'array-bounds' and 'local-bounds' [2]. Since 'local-bounds' doesn't behave like a normal sanitizer, enable it with Clang only if trapping behaviour was requested by CONFIG_UBSAN_TRAP=y. Add the UBSAN_BOUNDS_LOCAL config to Kconfig.ubsan to enable the 'local-bounds' option by default when UBSAN_TRAP is enabled. [1] http://lists.llvm.org/pipermail/llvm-dev/2012-May/049972.html [2] http://lists.llvm.org/pipermail/cfe-commits/Week-of-Mon-20131021/091536.html Suggested-by: Marco Elver Signed-off-by: George Popescu Signed-off-by: Andrew Morton Reviewed-by: David Brazdil Reviewed-by: Marco Elver Cc: Masahiro Yamada Cc: Michal Marek Cc: Nathan Chancellor Cc: Nick Desaulniers Cc: Kees Cook Cc: Dmitry Vyukov Cc: Arnd Bergmann Cc: Peter Zijlstra Link: https://lkml.kernel.org/r/20200922074330.2549523-1-georgepope@google.com Signed-off-by: Linus Torvalds --- lib/Kconfig.ubsan | 14 ++++++++++++++ scripts/Makefile.ubsan | 10 +++++++++- 2 files changed, 23 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan index 774315de555a..58f8d03d037b 100644 --- a/lib/Kconfig.ubsan +++ b/lib/Kconfig.ubsan @@ -47,6 +47,20 @@ config UBSAN_BOUNDS to the {str,mem}*cpy() family of functions (that is addressed by CONFIG_FORTIFY_SOURCE). +config UBSAN_LOCAL_BOUNDS + bool "Perform array local bounds checking" + depends on UBSAN_TRAP + depends on CC_IS_CLANG + depends on !UBSAN_KCOV_BROKEN + help + This option enables -fsanitize=local-bounds which traps when an + exception/error is detected. Therefore, it should be enabled only + if trapping is expected. + Enabling this option detects errors due to accesses through a + pointer that is derived from an object of a statically-known size, + where an added offset (which may not be known statically) is + out-of-bounds. + config UBSAN_MISC bool "Enable all other Undefined Behavior sanity checks" default UBSAN diff --git a/scripts/Makefile.ubsan b/scripts/Makefile.ubsan index 27348029b2b8..4e3fff0745e8 100644 --- a/scripts/Makefile.ubsan +++ b/scripts/Makefile.ubsan @@ -4,7 +4,15 @@ ifdef CONFIG_UBSAN_ALIGNMENT endif ifdef CONFIG_UBSAN_BOUNDS - CFLAGS_UBSAN += $(call cc-option, -fsanitize=bounds) + ifdef CONFIG_CC_IS_CLANG + CFLAGS_UBSAN += -fsanitize=array-bounds + else + CFLAGS_UBSAN += $(call cc-option, -fsanitize=bounds) + endif +endif + +ifdef CONFIG_UBSAN_LOCAL_BOUNDS + CFLAGS_UBSAN += -fsanitize=local-bounds endif ifdef CONFIG_UBSAN_MISC -- cgit v1.2.3 From 2c739ced5886cd8c8361faa79a9522ec05174ed0 Mon Sep 17 00:00:00 2001 From: Albert van der Linde Date: Thu, 15 Oct 2020 20:13:46 -0700 Subject: lib, include/linux: add usercopy failure capability Patch series "add fault injection to user memory access", v3. The goal of this series is to improve testing of fault-tolerance in usages of user memory access functions, by adding support for fault injection. syzkaller/syzbot are using the existing fault injection modes and will use this particular feature also. The first patch adds failure injection capability for usercopy functions. The second changes usercopy functions to use this new failure capability (copy_from_user, ...). The third patch adds get/put/clear_user failures to x86. This patch (of 3): Add a failure injection capability to improve testing of fault-tolerance in usages of user memory access functions. Add CONFIG_FAULT_INJECTION_USERCOPY to enable faults in usercopy functions. The should_fail_usercopy function is to be called by these functions (copy_from_user, get_user, ...) in order to fail or not. Signed-off-by: Albert van der Linde Signed-off-by: Andrew Morton Reviewed-by: Akinobu Mita Reviewed-by: Alexander Potapenko Cc: Borislav Petkov Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Thomas Gleixner Cc: Arnd Bergmann Cc: Peter Zijlstra (Intel) Cc: "H. Peter Anvin" Cc: Al Viro Cc: Andrey Konovalov Cc: Dmitry Vyukov Cc: Marco Elver Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20200831171733.955393-1-alinde@google.com Link: http://lkml.kernel.org/r/20200831171733.955393-2-alinde@google.com Signed-off-by: Linus Torvalds --- Documentation/admin-guide/kernel-parameters.txt | 1 + Documentation/fault-injection/fault-injection.rst | 7 +++- include/linux/fault-inject-usercopy.h | 22 +++++++++++++ lib/Kconfig.debug | 7 ++++ lib/Makefile | 1 + lib/fault-inject-usercopy.c | 39 +++++++++++++++++++++++ 6 files changed, 76 insertions(+), 1 deletion(-) create mode 100644 include/linux/fault-inject-usercopy.h create mode 100644 lib/fault-inject-usercopy.c (limited to 'lib') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index f7ac0e663976..6d9afb221ff7 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1343,6 +1343,7 @@ current integrity status. failslab= + fail_usercopy= fail_page_alloc= fail_make_request=[KNL] General fault injection mechanism. diff --git a/Documentation/fault-injection/fault-injection.rst b/Documentation/fault-injection/fault-injection.rst index f850ad018b70..31ecfe44e5b4 100644 --- a/Documentation/fault-injection/fault-injection.rst +++ b/Documentation/fault-injection/fault-injection.rst @@ -16,6 +16,10 @@ Available fault injection capabilities injects page allocation failures. (alloc_pages(), get_free_pages(), ...) +- fail_usercopy + + injects failures in user memory access functions. (copy_from_user(), get_user(), ...) + - fail_futex injects futex deadlock and uaddr fault errors. @@ -177,6 +181,7 @@ use the boot option:: failslab= fail_page_alloc= + fail_usercopy= fail_make_request= fail_futex= mmc_core.fail_request=,,, @@ -222,7 +227,7 @@ How to add new fault injection capability - debugfs entries - failslab, fail_page_alloc, and fail_make_request use this way. + failslab, fail_page_alloc, fail_usercopy, and fail_make_request use this way. Helper functions: fault_create_debugfs_attr(name, parent, attr); diff --git a/include/linux/fault-inject-usercopy.h b/include/linux/fault-inject-usercopy.h new file mode 100644 index 000000000000..56c3a693fdd9 --- /dev/null +++ b/include/linux/fault-inject-usercopy.h @@ -0,0 +1,22 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __LINUX_FAULT_INJECT_USERCOPY_H__ +#define __LINUX_FAULT_INJECT_USERCOPY_H__ + +/* + * This header provides a wrapper for injecting failures to user space memory + * access functions. + */ + +#include + +#ifdef CONFIG_FAULT_INJECTION_USERCOPY + +bool should_fail_usercopy(void); + +#else + +static inline bool should_fail_usercopy(void) { return false; } + +#endif /* CONFIG_FAULT_INJECTION_USERCOPY */ + +#endif /* __LINUX_FAULT_INJECT_USERCOPY_H__ */ diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 491789a793ae..ebe5ab111e65 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1768,6 +1768,13 @@ config FAIL_PAGE_ALLOC help Provide fault-injection capability for alloc_pages(). +config FAULT_INJECTION_USERCOPY + bool "Fault injection capability for usercopy functions" + depends on FAULT_INJECTION + help + Provides fault-injection capability to inject failures + in usercopy functions (copy_from_user(), get_user(), ...). + config FAIL_MAKE_REQUEST bool "Fault-injection capability for disk IO" depends on FAULT_INJECTION && BLOCK diff --git a/lib/Makefile b/lib/Makefile index 49a2a9e36224..1c7577b2e86a 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -210,6 +210,7 @@ obj-$(CONFIG_AUDIT_COMPAT_GENERIC) += compat_audit.o obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o +obj-$(CONFIG_FAULT_INJECTION_USERCOPY) += fault-inject-usercopy.o obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o obj-$(CONFIG_PM_NOTIFIER_ERROR_INJECT) += pm-notifier-error-inject.o obj-$(CONFIG_NETDEV_NOTIFIER_ERROR_INJECT) += netdev-notifier-error-inject.o diff --git a/lib/fault-inject-usercopy.c b/lib/fault-inject-usercopy.c new file mode 100644 index 000000000000..77558b6c29ca --- /dev/null +++ b/lib/fault-inject-usercopy.c @@ -0,0 +1,39 @@ +// SPDX-License-Identifier: GPL-2.0-only +#include +#include + +static struct { + struct fault_attr attr; +} fail_usercopy = { + .attr = FAULT_ATTR_INITIALIZER, +}; + +static int __init setup_fail_usercopy(char *str) +{ + return setup_fault_attr(&fail_usercopy.attr, str); +} +__setup("fail_usercopy=", setup_fail_usercopy); + +#ifdef CONFIG_FAULT_INJECTION_DEBUG_FS + +static int __init fail_usercopy_debugfs(void) +{ + struct dentry *dir; + + dir = fault_create_debugfs_attr("fail_usercopy", NULL, + &fail_usercopy.attr); + if (IS_ERR(dir)) + return PTR_ERR(dir); + + return 0; +} + +late_initcall(fail_usercopy_debugfs); + +#endif /* CONFIG_FAULT_INJECTION_DEBUG_FS */ + +bool should_fail_usercopy(void) +{ + return should_fail(&fail_usercopy.attr, 1); +} +EXPORT_SYMBOL_GPL(should_fail_usercopy); -- cgit v1.2.3 From 4d0e9df5e43dba52d38b251e3b909df8fa1110be Mon Sep 17 00:00:00 2001 From: Albert van der Linde Date: Thu, 15 Oct 2020 20:13:50 -0700 Subject: lib, uaccess: add failure injection to usercopy functions To test fault-tolerance of user memory access functions, introduce fault injection to usercopy functions. If a failure is expected return either -EFAULT or the total amount of bytes that were not copied. Signed-off-by: Albert van der Linde Signed-off-by: Andrew Morton Reviewed-by: Akinobu Mita Reviewed-by: Alexander Potapenko Cc: Al Viro Cc: Andrey Konovalov Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Dmitry Vyukov Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Jonathan Corbet Cc: Marco Elver Cc: Peter Zijlstra (Intel) Cc: Thomas Gleixner Cc: Christoph Hellwig Link: http://lkml.kernel.org/r/20200831171733.955393-3-alinde@google.com Signed-off-by: Linus Torvalds --- include/linux/uaccess.h | 11 ++++++++++- lib/iov_iter.c | 5 +++++ lib/strncpy_from_user.c | 3 +++ lib/usercopy.c | 5 ++++- 4 files changed, 22 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index ef084eacaa7c..1b8c9d6162bc 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -2,6 +2,7 @@ #ifndef __LINUX_UACCESS_H__ #define __LINUX_UACCESS_H__ +#include #include #include #include @@ -84,6 +85,8 @@ static __always_inline __must_check unsigned long __copy_from_user(void *to, const void __user *from, unsigned long n) { might_fault(); + if (should_fail_usercopy()) + return n; instrument_copy_from_user(to, from, n); check_object_size(to, n, false); return raw_copy_from_user(to, from, n); @@ -105,6 +108,8 @@ __copy_from_user(void *to, const void __user *from, unsigned long n) static __always_inline __must_check unsigned long __copy_to_user_inatomic(void __user *to, const void *from, unsigned long n) { + if (should_fail_usercopy()) + return n; instrument_copy_to_user(to, from, n); check_object_size(from, n, true); return raw_copy_to_user(to, from, n); @@ -114,6 +119,8 @@ static __always_inline __must_check unsigned long __copy_to_user(void __user *to, const void *from, unsigned long n) { might_fault(); + if (should_fail_usercopy()) + return n; instrument_copy_to_user(to, from, n); check_object_size(from, n, true); return raw_copy_to_user(to, from, n); @@ -125,7 +132,7 @@ _copy_from_user(void *to, const void __user *from, unsigned long n) { unsigned long res = n; might_fault(); - if (likely(access_ok(from, n))) { + if (!should_fail_usercopy() && likely(access_ok(from, n))) { instrument_copy_from_user(to, from, n); res = raw_copy_from_user(to, from, n); } @@ -143,6 +150,8 @@ static inline __must_check unsigned long _copy_to_user(void __user *to, const void *from, unsigned long n) { might_fault(); + if (should_fail_usercopy()) + return n; if (access_ok(to, n)) { instrument_copy_to_user(to, from, n); n = raw_copy_to_user(to, from, n); diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 14cae2584db4..1635111c5bd2 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -2,6 +2,7 @@ #include #include #include +#include #include #include #include @@ -140,6 +141,8 @@ static int copyout(void __user *to, const void *from, size_t n) { + if (should_fail_usercopy()) + return n; if (access_ok(to, n)) { instrument_copy_to_user(to, from, n); n = raw_copy_to_user(to, from, n); @@ -149,6 +152,8 @@ static int copyout(void __user *to, const void *from, size_t n) static int copyin(void *to, const void __user *from, size_t n) { + if (should_fail_usercopy()) + return n; if (access_ok(from, n)) { instrument_copy_from_user(to, from, n); n = raw_copy_from_user(to, from, n); diff --git a/lib/strncpy_from_user.c b/lib/strncpy_from_user.c index 34696a348864..e6d5fcc2cdf3 100644 --- a/lib/strncpy_from_user.c +++ b/lib/strncpy_from_user.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: GPL-2.0 #include #include +#include #include #include #include @@ -99,6 +100,8 @@ long strncpy_from_user(char *dst, const char __user *src, long count) unsigned long max_addr, src_addr; might_fault(); + if (should_fail_usercopy()) + return -EFAULT; if (unlikely(count <= 0)) return 0; diff --git a/lib/usercopy.c b/lib/usercopy.c index b26509f112f9..7413dd300516 100644 --- a/lib/usercopy.c +++ b/lib/usercopy.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include +#include #include #include @@ -10,7 +11,7 @@ unsigned long _copy_from_user(void *to, const void __user *from, unsigned long n { unsigned long res = n; might_fault(); - if (likely(access_ok(from, n))) { + if (!should_fail_usercopy() && likely(access_ok(from, n))) { instrument_copy_from_user(to, from, n); res = raw_copy_from_user(to, from, n); } @@ -25,6 +26,8 @@ EXPORT_SYMBOL(_copy_from_user); unsigned long _copy_to_user(void __user *to, const void *from, unsigned long n) { might_fault(); + if (should_fail_usercopy()) + return n; if (likely(access_ok(to, n))) { instrument_copy_to_user(to, from, n); n = raw_copy_to_user(to, from, n); -- cgit v1.2.3 From 294a7f1613ee49a608361bd319519561c0ca7e72 Mon Sep 17 00:00:00 2001 From: Vitor Massaru Iha Date: Thu, 15 Oct 2020 09:08:51 -0300 Subject: lib: kunit: Fix compilation test when using TEST_BIT_FIELD_COMPILE A build condition was missing around a compilation test, this compilation test comes from the original test_bitfield code. And removed unnecessary code for this test. Fixes: d2585f5164c2 ("lib: kunit: add bitfield test conversion to KUnit") Reported-by: Stephen Rothwell Signed-off-by: Vitor Massaru Iha Link: https://lore.kernel.org/linux-next/20201015163056.56fcc835@canb.auug.org.au/ Reviewed-by: Brendan Higgins Signed-off-by: Shuah Khan --- lib/bitfield_kunit.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/bitfield_kunit.c b/lib/bitfield_kunit.c index d63a2be5aff8..1473d8b4bf0f 100644 --- a/lib/bitfield_kunit.c +++ b/lib/bitfield_kunit.c @@ -125,7 +125,7 @@ static void __init test_bitfields_variables(struct kunit *context) CHECK(u64, 0x0000001f8000000ull); } - +#ifdef TEST_BITFIELD_COMPILE static void __init test_bitfields_compile(struct kunit *context) { /* these should fail compilation */ @@ -135,13 +135,11 @@ static void __init test_bitfields_compile(struct kunit *context) /* this should at least give a warning */ u16_encode_bits(0, 0x60000); } +#endif static struct kunit_case __refdata bitfields_test_cases[] = { KUNIT_CASE(test_bitfields_constants), KUNIT_CASE(test_bitfields_variables), -#ifdef TEST_BITFIELD_COMPILE - KUNIT_CASE(test_bitfields_compile), -#endif {} }; -- cgit v1.2.3 From 272d70895113ef00c03ab325787d159ee51718c8 Mon Sep 17 00:00:00 2001 From: Peilin Ye Date: Sun, 18 Oct 2020 14:12:04 -0400 Subject: Fonts: Support FONT_EXTRA_WORDS macros for font_6x8 Recently, in commit 6735b4632def ("Fonts: Support FONT_EXTRA_WORDS macros for built-in fonts"), we wrapped each of our built-in data buffers in a `font_data` structure, in order to use the following macros on them, see include/linux/font.h: #define REFCOUNT(fd) (((int *)(fd))[-1]) #define FNTSIZE(fd) (((int *)(fd))[-2]) #define FNTCHARCNT(fd) (((int *)(fd))[-3]) #define FNTSUM(fd) (((int *)(fd))[-4]) #define FONT_EXTRA_WORDS 4 Do the same thing to our new 6x8 font. For built-in fonts, currently we only use FNTSIZE(). Since this is only a temporary solution for an out-of-bounds issue in the framebuffer layer (see commit 5af08640795b ("fbcon: Fix global-out-of-bounds read in fbcon_get_font()")), all the three other fields are intentionally set to zero in order to discourage using these negative-indexing macros. Signed-off-by: Peilin Ye Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/926453876c92caac34cba8545716a491754d04d5.1603037079.git.yepeilin.cs@gmail.com --- lib/fonts/font_6x8.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'lib') diff --git a/lib/fonts/font_6x8.c b/lib/fonts/font_6x8.c index e06447788418..700039a9ceae 100644 --- a/lib/fonts/font_6x8.c +++ b/lib/fonts/font_6x8.c @@ -3,8 +3,8 @@ #define FONTDATAMAX 2048 -static const unsigned char fontdata_6x8[FONTDATAMAX] = { - +static struct font_data fontdata_6x8 = { + { 0, 0, FONTDATAMAX, 0 }, { /* 0 0x00 '^@' */ 0x00, /* 000000 */ 0x00, /* 000000 */ @@ -2564,13 +2564,13 @@ static const unsigned char fontdata_6x8[FONTDATAMAX] = { 0x00, /* 000000 */ 0x00, /* 000000 */ 0x00, /* 000000 */ -}; +} }; const struct font_desc font_6x8 = { .idx = FONT6x8_IDX, .name = "6x8", .width = 6, .height = 8, - .data = fontdata_6x8, + .data = fontdata_6x8.data, .pref = 0, }; -- cgit v1.2.3 From c51f8f88d705e06bd696d7510aff22b33eb8e638 Mon Sep 17 00:00:00 2001 From: George Spelvin Date: Sun, 9 Aug 2020 06:57:44 +0000 Subject: random32: make prandom_u32() output unpredictable Non-cryptographic PRNGs may have great statistical properties, but are usually trivially predictable to someone who knows the algorithm, given a small sample of their output. An LFSR like prandom_u32() is particularly simple, even if the sample is widely scattered bits. It turns out the network stack uses prandom_u32() for some things like random port numbers which it would prefer are *not* trivially predictable. Predictability led to a practical DNS spoofing attack. Oops. This patch replaces the LFSR with a homebrew cryptographic PRNG based on the SipHash round function, which is in turn seeded with 128 bits of strong random key. (The authors of SipHash have *not* been consulted about this abuse of their algorithm.) Speed is prioritized over security; attacks are rare, while performance is always wanted. Replacing all callers of prandom_u32() is the quick fix. Whether to reinstate a weaker PRNG for uses which can tolerate it is an open question. Commit f227e3ec3b5c ("random32: update the net random state on interrupt and activity") was an earlier attempt at a solution. This patch replaces it. Reported-by: Amit Klein Cc: Willy Tarreau Cc: Eric Dumazet Cc: "Jason A. Donenfeld" Cc: Andy Lutomirski Cc: Kees Cook Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Linus Torvalds Cc: tytso@mit.edu Cc: Florian Westphal Cc: Marc Plumb Fixes: f227e3ec3b5c ("random32: update the net random state on interrupt and activity") Signed-off-by: George Spelvin Link: https://lore.kernel.org/netdev/20200808152628.GA27941@SDF.ORG/ [ willy: partial reversal of f227e3ec3b5c; moved SIPROUND definitions to prandom.h for later use; merged George's prandom_seed() proposal; inlined siprand_u32(); replaced the net_rand_state[] array with 4 members to fix a build issue; cosmetic cleanups to make checkpatch happy; fixed RANDOM32_SELFTEST build ] Signed-off-by: Willy Tarreau --- drivers/char/random.c | 1 - include/linux/prandom.h | 36 +++- kernel/time/timer.c | 7 - lib/random32.c | 464 +++++++++++++++++++++++++++++------------------- 4 files changed, 318 insertions(+), 190 deletions(-) (limited to 'lib') diff --git a/drivers/char/random.c b/drivers/char/random.c index d20ba1b104ca..2a41b21623ae 100644 --- a/drivers/char/random.c +++ b/drivers/char/random.c @@ -1277,7 +1277,6 @@ void add_interrupt_randomness(int irq, int irq_flags) fast_mix(fast_pool); add_interrupt_bench(cycles); - this_cpu_add(net_rand_state.s1, fast_pool->pool[cycles & 3]); if (unlikely(crng_init == 0)) { if ((fast_pool->count >= 64) && diff --git a/include/linux/prandom.h b/include/linux/prandom.h index aa16e6468f91..cc1e71334e53 100644 --- a/include/linux/prandom.h +++ b/include/linux/prandom.h @@ -16,12 +16,44 @@ void prandom_bytes(void *buf, size_t nbytes); void prandom_seed(u32 seed); void prandom_reseed_late(void); +#if BITS_PER_LONG == 64 +/* + * The core SipHash round function. Each line can be executed in + * parallel given enough CPU resources. + */ +#define PRND_SIPROUND(v0, v1, v2, v3) ( \ + v0 += v1, v1 = rol64(v1, 13), v2 += v3, v3 = rol64(v3, 16), \ + v1 ^= v0, v0 = rol64(v0, 32), v3 ^= v2, \ + v0 += v3, v3 = rol64(v3, 21), v2 += v1, v1 = rol64(v1, 17), \ + v3 ^= v0, v1 ^= v2, v2 = rol64(v2, 32) \ +) + +#define PRND_K0 (0x736f6d6570736575 ^ 0x6c7967656e657261) +#define PRND_K1 (0x646f72616e646f6d ^ 0x7465646279746573) + +#elif BITS_PER_LONG == 32 +/* + * On 32-bit machines, we use HSipHash, a reduced-width version of SipHash. + * This is weaker, but 32-bit machines are not used for high-traffic + * applications, so there is less output for an attacker to analyze. + */ +#define PRND_SIPROUND(v0, v1, v2, v3) ( \ + v0 += v1, v1 = rol32(v1, 5), v2 += v3, v3 = rol32(v3, 8), \ + v1 ^= v0, v0 = rol32(v0, 16), v3 ^= v2, \ + v0 += v3, v3 = rol32(v3, 7), v2 += v1, v1 = rol32(v1, 13), \ + v3 ^= v0, v1 ^= v2, v2 = rol32(v2, 16) \ +) +#define PRND_K0 0x6c796765 +#define PRND_K1 0x74656462 + +#else +#error Unsupported BITS_PER_LONG +#endif + struct rnd_state { __u32 s1, s2, s3, s4; }; -DECLARE_PER_CPU(struct rnd_state, net_rand_state); - u32 prandom_u32_state(struct rnd_state *state); void prandom_bytes_state(struct rnd_state *state, void *buf, size_t nbytes); void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state); diff --git a/kernel/time/timer.c b/kernel/time/timer.c index dda05f4b7a1f..3e341af741b9 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1717,13 +1717,6 @@ void update_process_times(int user_tick) scheduler_tick(); if (IS_ENABLED(CONFIG_POSIX_TIMERS)) run_posix_cpu_timers(); - - /* The current CPU might make use of net randoms without receiving IRQs - * to renew them often enough. Let's update the net_rand_state from a - * non-constant value that's not affine to the number of calls to make - * sure it's updated when there's some activity (we don't care in idle). - */ - this_cpu_add(net_rand_state.s1, rol32(jiffies, 24) + user_tick); } /** diff --git a/lib/random32.c b/lib/random32.c index dfb9981ab798..be9f242a4207 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -41,16 +41,6 @@ #include #include -#ifdef CONFIG_RANDOM32_SELFTEST -static void __init prandom_state_selftest(void); -#else -static inline void prandom_state_selftest(void) -{ -} -#endif - -DEFINE_PER_CPU(struct rnd_state, net_rand_state) __latent_entropy; - /** * prandom_u32_state - seeded pseudo-random number generator. * @state: pointer to state structure holding seeded state. @@ -70,26 +60,6 @@ u32 prandom_u32_state(struct rnd_state *state) } EXPORT_SYMBOL(prandom_u32_state); -/** - * prandom_u32 - pseudo random number generator - * - * A 32 bit pseudo-random number is generated using a fast - * algorithm suitable for simulation. This algorithm is NOT - * considered safe for cryptographic use. - */ -u32 prandom_u32(void) -{ - struct rnd_state *state = &get_cpu_var(net_rand_state); - u32 res; - - res = prandom_u32_state(state); - trace_prandom_u32(res); - put_cpu_var(net_rand_state); - - return res; -} -EXPORT_SYMBOL(prandom_u32); - /** * prandom_bytes_state - get the requested number of pseudo-random bytes * @@ -121,20 +91,6 @@ void prandom_bytes_state(struct rnd_state *state, void *buf, size_t bytes) } EXPORT_SYMBOL(prandom_bytes_state); -/** - * prandom_bytes - get the requested number of pseudo-random bytes - * @buf: where to copy the pseudo-random bytes to - * @bytes: the requested number of bytes - */ -void prandom_bytes(void *buf, size_t bytes) -{ - struct rnd_state *state = &get_cpu_var(net_rand_state); - - prandom_bytes_state(state, buf, bytes); - put_cpu_var(net_rand_state); -} -EXPORT_SYMBOL(prandom_bytes); - static void prandom_warmup(struct rnd_state *state) { /* Calling RNG ten times to satisfy recurrence condition */ @@ -150,96 +106,6 @@ static void prandom_warmup(struct rnd_state *state) prandom_u32_state(state); } -static u32 __extract_hwseed(void) -{ - unsigned int val = 0; - - (void)(arch_get_random_seed_int(&val) || - arch_get_random_int(&val)); - - return val; -} - -static void prandom_seed_early(struct rnd_state *state, u32 seed, - bool mix_with_hwseed) -{ -#define LCG(x) ((x) * 69069U) /* super-duper LCG */ -#define HWSEED() (mix_with_hwseed ? __extract_hwseed() : 0) - state->s1 = __seed(HWSEED() ^ LCG(seed), 2U); - state->s2 = __seed(HWSEED() ^ LCG(state->s1), 8U); - state->s3 = __seed(HWSEED() ^ LCG(state->s2), 16U); - state->s4 = __seed(HWSEED() ^ LCG(state->s3), 128U); -} - -/** - * prandom_seed - add entropy to pseudo random number generator - * @entropy: entropy value - * - * Add some additional entropy to the prandom pool. - */ -void prandom_seed(u32 entropy) -{ - int i; - /* - * No locking on the CPUs, but then somewhat random results are, well, - * expected. - */ - for_each_possible_cpu(i) { - struct rnd_state *state = &per_cpu(net_rand_state, i); - - state->s1 = __seed(state->s1 ^ entropy, 2U); - prandom_warmup(state); - } -} -EXPORT_SYMBOL(prandom_seed); - -/* - * Generate some initially weak seeding values to allow - * to start the prandom_u32() engine. - */ -static int __init prandom_init(void) -{ - int i; - - prandom_state_selftest(); - - for_each_possible_cpu(i) { - struct rnd_state *state = &per_cpu(net_rand_state, i); - u32 weak_seed = (i + jiffies) ^ random_get_entropy(); - - prandom_seed_early(state, weak_seed, true); - prandom_warmup(state); - } - - return 0; -} -core_initcall(prandom_init); - -static void __prandom_timer(struct timer_list *unused); - -static DEFINE_TIMER(seed_timer, __prandom_timer); - -static void __prandom_timer(struct timer_list *unused) -{ - u32 entropy; - unsigned long expires; - - get_random_bytes(&entropy, sizeof(entropy)); - prandom_seed(entropy); - - /* reseed every ~60 seconds, in [40 .. 80) interval with slack */ - expires = 40 + prandom_u32_max(40); - seed_timer.expires = jiffies + msecs_to_jiffies(expires * MSEC_PER_SEC); - - add_timer(&seed_timer); -} - -static void __init __prandom_start_seed_timer(void) -{ - seed_timer.expires = jiffies + msecs_to_jiffies(40 * MSEC_PER_SEC); - add_timer(&seed_timer); -} - void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state) { int i; @@ -259,51 +125,6 @@ void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state) } EXPORT_SYMBOL(prandom_seed_full_state); -/* - * Generate better values after random number generator - * is fully initialized. - */ -static void __prandom_reseed(bool late) -{ - unsigned long flags; - static bool latch = false; - static DEFINE_SPINLOCK(lock); - - /* Asking for random bytes might result in bytes getting - * moved into the nonblocking pool and thus marking it - * as initialized. In this case we would double back into - * this function and attempt to do a late reseed. - * Ignore the pointless attempt to reseed again if we're - * already waiting for bytes when the nonblocking pool - * got initialized. - */ - - /* only allow initial seeding (late == false) once */ - if (!spin_trylock_irqsave(&lock, flags)) - return; - - if (latch && !late) - goto out; - - latch = true; - prandom_seed_full_state(&net_rand_state); -out: - spin_unlock_irqrestore(&lock, flags); -} - -void prandom_reseed_late(void) -{ - __prandom_reseed(true); -} - -static int __init prandom_reseed(void) -{ - __prandom_reseed(false); - __prandom_start_seed_timer(); - return 0; -} -late_initcall(prandom_reseed); - #ifdef CONFIG_RANDOM32_SELFTEST static struct prandom_test1 { u32 seed; @@ -423,7 +244,28 @@ static struct prandom_test2 { { 407983964U, 921U, 728767059U }, }; -static void __init prandom_state_selftest(void) +static u32 __extract_hwseed(void) +{ + unsigned int val = 0; + + (void)(arch_get_random_seed_int(&val) || + arch_get_random_int(&val)); + + return val; +} + +static void prandom_seed_early(struct rnd_state *state, u32 seed, + bool mix_with_hwseed) +{ +#define LCG(x) ((x) * 69069U) /* super-duper LCG */ +#define HWSEED() (mix_with_hwseed ? __extract_hwseed() : 0) + state->s1 = __seed(HWSEED() ^ LCG(seed), 2U); + state->s2 = __seed(HWSEED() ^ LCG(state->s1), 8U); + state->s3 = __seed(HWSEED() ^ LCG(state->s2), 16U); + state->s4 = __seed(HWSEED() ^ LCG(state->s3), 128U); +} + +static int __init prandom_state_selftest(void) { int i, j, errors = 0, runs = 0; bool error = false; @@ -463,5 +305,267 @@ static void __init prandom_state_selftest(void) pr_warn("prandom: %d/%d self tests failed\n", errors, runs); else pr_info("prandom: %d self tests passed\n", runs); + return 0; } +core_initcall(prandom_state_selftest); #endif + +/* + * The prandom_u32() implementation is now completely separate from the + * prandom_state() functions, which are retained (for now) for compatibility. + * + * Because of (ab)use in the networking code for choosing random TCP/UDP port + * numbers, which open DoS possibilities if guessable, we want something + * stronger than a standard PRNG. But the performance requirements of + * the network code do not allow robust crypto for this application. + * + * So this is a homebrew Junior Spaceman implementation, based on the + * lowest-latency trustworthy crypto primitive available, SipHash. + * (The authors of SipHash have not been consulted about this abuse of + * their work.) + * + * Standard SipHash-2-4 uses 2n+4 rounds to hash n words of input to + * one word of output. This abbreviated version uses 2 rounds per word + * of output. + */ + +struct siprand_state { + unsigned long v0; + unsigned long v1; + unsigned long v2; + unsigned long v3; +}; + +static DEFINE_PER_CPU(struct siprand_state, net_rand_state) __latent_entropy; + +/* + * This is the core CPRNG function. As "pseudorandom", this is not used + * for truly valuable things, just intended to be a PITA to guess. + * For maximum speed, we do just two SipHash rounds per word. This is + * the same rate as 4 rounds per 64 bits that SipHash normally uses, + * so hopefully it's reasonably secure. + * + * There are two changes from the official SipHash finalization: + * - We omit some constants XORed with v2 in the SipHash spec as irrelevant; + * they are there only to make the output rounds distinct from the input + * rounds, and this application has no input rounds. + * - Rather than returning v0^v1^v2^v3, return v1+v3. + * If you look at the SipHash round, the last operation on v3 is + * "v3 ^= v0", so "v0 ^ v3" just undoes that, a waste of time. + * Likewise "v1 ^= v2". (The rotate of v2 makes a difference, but + * it still cancels out half of the bits in v2 for no benefit.) + * Second, since the last combining operation was xor, continue the + * pattern of alternating xor/add for a tiny bit of extra non-linearity. + */ +static inline u32 siprand_u32(struct siprand_state *s) +{ + unsigned long v0 = s->v0, v1 = s->v1, v2 = s->v2, v3 = s->v3; + + PRND_SIPROUND(v0, v1, v2, v3); + PRND_SIPROUND(v0, v1, v2, v3); + s->v0 = v0; s->v1 = v1; s->v2 = v2; s->v3 = v3; + return v1 + v3; +} + + +/** + * prandom_u32 - pseudo random number generator + * + * A 32 bit pseudo-random number is generated using a fast + * algorithm suitable for simulation. This algorithm is NOT + * considered safe for cryptographic use. + */ +u32 prandom_u32(void) +{ + struct siprand_state *state = get_cpu_ptr(&net_rand_state); + u32 res = siprand_u32(state); + + trace_prandom_u32(res); + put_cpu_ptr(&net_rand_state); + return res; +} +EXPORT_SYMBOL(prandom_u32); + +/** + * prandom_bytes - get the requested number of pseudo-random bytes + * @buf: where to copy the pseudo-random bytes to + * @bytes: the requested number of bytes + */ +void prandom_bytes(void *buf, size_t bytes) +{ + struct siprand_state *state = get_cpu_ptr(&net_rand_state); + u8 *ptr = buf; + + while (bytes >= sizeof(u32)) { + put_unaligned(siprand_u32(state), (u32 *)ptr); + ptr += sizeof(u32); + bytes -= sizeof(u32); + } + + if (bytes > 0) { + u32 rem = siprand_u32(state); + + do { + *ptr++ = (u8)rem; + rem >>= BITS_PER_BYTE; + } while (--bytes > 0); + } + put_cpu_ptr(&net_rand_state); +} +EXPORT_SYMBOL(prandom_bytes); + +/** + * prandom_seed - add entropy to pseudo random number generator + * @entropy: entropy value + * + * Add some additional seed material to the prandom pool. + * The "entropy" is actually our IP address (the only caller is + * the network code), not for unpredictability, but to ensure that + * different machines are initialized differently. + */ +void prandom_seed(u32 entropy) +{ + int i; + + add_device_randomness(&entropy, sizeof(entropy)); + + for_each_possible_cpu(i) { + struct siprand_state *state = per_cpu_ptr(&net_rand_state, i); + unsigned long v0 = state->v0, v1 = state->v1; + unsigned long v2 = state->v2, v3 = state->v3; + + do { + v3 ^= entropy; + PRND_SIPROUND(v0, v1, v2, v3); + PRND_SIPROUND(v0, v1, v2, v3); + v0 ^= entropy; + } while (unlikely(!v0 || !v1 || !v2 || !v3)); + + WRITE_ONCE(state->v0, v0); + WRITE_ONCE(state->v1, v1); + WRITE_ONCE(state->v2, v2); + WRITE_ONCE(state->v3, v3); + } +} +EXPORT_SYMBOL(prandom_seed); + +/* + * Generate some initially weak seeding values to allow + * the prandom_u32() engine to be started. + */ +static int __init prandom_init_early(void) +{ + int i; + unsigned long v0, v1, v2, v3; + + if (!arch_get_random_long(&v0)) + v0 = jiffies; + if (!arch_get_random_long(&v1)) + v1 = random_get_entropy(); + v2 = v0 ^ PRND_K0; + v3 = v1 ^ PRND_K1; + + for_each_possible_cpu(i) { + struct siprand_state *state; + + v3 ^= i; + PRND_SIPROUND(v0, v1, v2, v3); + PRND_SIPROUND(v0, v1, v2, v3); + v0 ^= i; + + state = per_cpu_ptr(&net_rand_state, i); + state->v0 = v0; state->v1 = v1; + state->v2 = v2; state->v3 = v3; + } + + return 0; +} +core_initcall(prandom_init_early); + + +/* Stronger reseeding when available, and periodically thereafter. */ +static void prandom_reseed(struct timer_list *unused); + +static DEFINE_TIMER(seed_timer, prandom_reseed); + +static void prandom_reseed(struct timer_list *unused) +{ + unsigned long expires; + int i; + + /* + * Reinitialize each CPU's PRNG with 128 bits of key. + * No locking on the CPUs, but then somewhat random results are, + * well, expected. + */ + for_each_possible_cpu(i) { + struct siprand_state *state; + unsigned long v0 = get_random_long(), v2 = v0 ^ PRND_K0; + unsigned long v1 = get_random_long(), v3 = v1 ^ PRND_K1; +#if BITS_PER_LONG == 32 + int j; + + /* + * On 32-bit machines, hash in two extra words to + * approximate 128-bit key length. Not that the hash + * has that much security, but this prevents a trivial + * 64-bit brute force. + */ + for (j = 0; j < 2; j++) { + unsigned long m = get_random_long(); + + v3 ^= m; + PRND_SIPROUND(v0, v1, v2, v3); + PRND_SIPROUND(v0, v1, v2, v3); + v0 ^= m; + } +#endif + /* + * Probably impossible in practice, but there is a + * theoretical risk that a race between this reseeding + * and the target CPU writing its state back could + * create the all-zero SipHash fixed point. + * + * To ensure that never happens, ensure the state + * we write contains no zero words. + */ + state = per_cpu_ptr(&net_rand_state, i); + WRITE_ONCE(state->v0, v0 ? v0 : -1ul); + WRITE_ONCE(state->v1, v1 ? v1 : -1ul); + WRITE_ONCE(state->v2, v2 ? v2 : -1ul); + WRITE_ONCE(state->v3, v3 ? v3 : -1ul); + } + + /* reseed every ~60 seconds, in [40 .. 80) interval with slack */ + expires = round_jiffies(jiffies + 40 * HZ + prandom_u32_max(40 * HZ)); + mod_timer(&seed_timer, expires); +} + +/* + * The random ready callback can be called from almost any interrupt. + * To avoid worrying about whether it's safe to delay that interrupt + * long enough to seed all CPUs, just schedule an immediate timer event. + */ +static void prandom_timer_start(struct random_ready_callback *unused) +{ + mod_timer(&seed_timer, jiffies); +} + +/* + * Start periodic full reseeding as soon as strong + * random numbers are available. + */ +static int __init prandom_init_late(void) +{ + static struct random_ready_callback random_ready = { + .func = prandom_timer_start + }; + int ret = add_random_ready_callback(&random_ready); + + if (ret == -EALREADY) { + prandom_timer_start(&random_ready); + ret = 0; + } + return ret; +} +late_initcall(prandom_init_late); -- cgit v1.2.3 From 3744741adab6d9195551ce30e65e726c7a408421 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Mon, 10 Aug 2020 10:27:42 +0200 Subject: random32: add noise from network and scheduling activity With the removal of the interrupt perturbations in previous random32 change (random32: make prandom_u32() output unpredictable), the PRNG has become 100% deterministic again. While SipHash is expected to be way more robust against brute force than the previous Tausworthe LFSR, there's still the risk that whoever has even one temporary access to the PRNG's internal state is able to predict all subsequent draws till the next reseed (roughly every minute). This may happen through a side channel attack or any data leak. This patch restores the spirit of commit f227e3ec3b5c ("random32: update the net random state on interrupt and activity") in that it will perturb the internal PRNG's statee using externally collected noise, except that it will not pick that noise from the random pool's bits nor upon interrupt, but will rather combine a few elements along the Tx path that are collectively hard to predict, such as dev, skb and txq pointers, packet length and jiffies values. These ones are combined using a single round of SipHash into a single long variable that is mixed with the net_rand_state upon each invocation. The operation was inlined because it produces very small and efficient code, typically 3 xor, 2 add and 2 rol. The performance was measured to be the same (even very slightly better) than before the switch to SipHash; on a 6-core 12-thread Core i7-8700k equipped with a 40G NIC (i40e), the connection rate dropped from 556k/s to 555k/s while the SYN cookie rate grew from 5.38 Mpps to 5.45 Mpps. Link: https://lore.kernel.org/netdev/20200808152628.GA27941@SDF.ORG/ Cc: George Spelvin Cc: Amit Klein Cc: Eric Dumazet Cc: "Jason A. Donenfeld" Cc: Andy Lutomirski Cc: Kees Cook Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Linus Torvalds Cc: tytso@mit.edu Cc: Florian Westphal Cc: Marc Plumb Tested-by: Sedat Dilek Signed-off-by: Willy Tarreau --- include/linux/prandom.h | 19 +++++++++++++++++++ kernel/time/timer.c | 2 ++ lib/random32.c | 5 +++++ net/core/dev.c | 4 ++++ 4 files changed, 30 insertions(+) (limited to 'lib') diff --git a/include/linux/prandom.h b/include/linux/prandom.h index cc1e71334e53..bbf4b4ad61df 100644 --- a/include/linux/prandom.h +++ b/include/linux/prandom.h @@ -16,6 +16,12 @@ void prandom_bytes(void *buf, size_t nbytes); void prandom_seed(u32 seed); void prandom_reseed_late(void); +DECLARE_PER_CPU(unsigned long, net_rand_noise); + +#define PRANDOM_ADD_NOISE(a, b, c, d) \ + prandom_u32_add_noise((unsigned long)(a), (unsigned long)(b), \ + (unsigned long)(c), (unsigned long)(d)) + #if BITS_PER_LONG == 64 /* * The core SipHash round function. Each line can be executed in @@ -50,6 +56,18 @@ void prandom_reseed_late(void); #error Unsupported BITS_PER_LONG #endif +static inline void prandom_u32_add_noise(unsigned long a, unsigned long b, + unsigned long c, unsigned long d) +{ + /* + * This is not used cryptographically; it's just + * a convenient 4-word hash function. (3 xor, 2 add, 2 rol) + */ + a ^= raw_cpu_read(net_rand_noise); + PRND_SIPROUND(a, b, c, d); + raw_cpu_write(net_rand_noise, d); +} + struct rnd_state { __u32 s1, s2, s3, s4; }; @@ -99,6 +117,7 @@ static inline void prandom_seed_state(struct rnd_state *state, u64 seed) state->s2 = __seed(i, 8U); state->s3 = __seed(i, 16U); state->s4 = __seed(i, 128U); + PRANDOM_ADD_NOISE(state, i, 0, 0); } /* Pseudo random number generator from numerical recipes. */ diff --git a/kernel/time/timer.c b/kernel/time/timer.c index 3e341af741b9..de37e33a868d 100644 --- a/kernel/time/timer.c +++ b/kernel/time/timer.c @@ -1706,6 +1706,8 @@ void update_process_times(int user_tick) { struct task_struct *p = current; + PRANDOM_ADD_NOISE(jiffies, user_tick, p, 0); + /* Note: this timer irq context must be accounted for as well. */ account_process_tick(p, user_tick); run_local_timers(); diff --git a/lib/random32.c b/lib/random32.c index be9f242a4207..7f047844e494 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -337,6 +337,8 @@ struct siprand_state { }; static DEFINE_PER_CPU(struct siprand_state, net_rand_state) __latent_entropy; +DEFINE_PER_CPU(unsigned long, net_rand_noise); +EXPORT_PER_CPU_SYMBOL(net_rand_noise); /* * This is the core CPRNG function. As "pseudorandom", this is not used @@ -360,9 +362,12 @@ static DEFINE_PER_CPU(struct siprand_state, net_rand_state) __latent_entropy; static inline u32 siprand_u32(struct siprand_state *s) { unsigned long v0 = s->v0, v1 = s->v1, v2 = s->v2, v3 = s->v3; + unsigned long n = raw_cpu_read(net_rand_noise); + v3 ^= n; PRND_SIPROUND(v0, v1, v2, v3); PRND_SIPROUND(v0, v1, v2, v3); + v0 ^= n; s->v0 = v0; s->v1 = v1; s->v2 = v2; s->v3 = v3; return v1 + v3; } diff --git a/net/core/dev.c b/net/core/dev.c index 9499a414d67e..82dc6b48e45f 100644 --- a/net/core/dev.c +++ b/net/core/dev.c @@ -145,6 +145,7 @@ #include #include #include +#include #include "net-sysfs.h" @@ -3558,6 +3559,7 @@ static int xmit_one(struct sk_buff *skb, struct net_device *dev, dev_queue_xmit_nit(skb, dev); len = skb->len; + PRANDOM_ADD_NOISE(skb, dev, txq, len + jiffies); trace_net_dev_start_xmit(skb, dev); rc = netdev_start_xmit(skb, dev, txq, more); trace_net_dev_xmit(skb, rc, dev, len); @@ -4130,6 +4132,7 @@ static int __dev_queue_xmit(struct sk_buff *skb, struct net_device *sb_dev) if (!skb) goto out; + PRANDOM_ADD_NOISE(skb, dev, txq, jiffies); HARD_TX_LOCK(dev, txq, cpu); if (!netif_xmit_stopped(txq)) { @@ -4195,6 +4198,7 @@ int dev_direct_xmit(struct sk_buff *skb, u16 queue_id) skb_set_queue_mapping(skb, queue_id); txq = skb_get_tx_queue(dev, skb); + PRANDOM_ADD_NOISE(skb, dev, txq, jiffies); local_bh_disable(); -- cgit v1.2.3 From c6e169bc146a76d5ccbf4d3825f705414352bd03 Mon Sep 17 00:00:00 2001 From: Willy Tarreau Date: Sat, 24 Oct 2020 18:36:27 +0200 Subject: random32: add a selftest for the prandom32 code Given that this code is new, let's add a selftest for it as well. It doesn't rely on fixed sets, instead it picks 1024 numbers and verifies that they're not more correlated than desired. Link: https://lore.kernel.org/netdev/20200808152628.GA27941@SDF.ORG/ Cc: George Spelvin Cc: Amit Klein Cc: Eric Dumazet Cc: "Jason A. Donenfeld" Cc: Andy Lutomirski Cc: Kees Cook Cc: Thomas Gleixner Cc: Peter Zijlstra Cc: Linus Torvalds Cc: tytso@mit.edu Cc: Florian Westphal Cc: Marc Plumb Signed-off-by: Willy Tarreau --- lib/random32.c | 56 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) (limited to 'lib') diff --git a/lib/random32.c b/lib/random32.c index 7f047844e494..4d0e05e471d7 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -38,6 +38,7 @@ #include #include #include +#include #include #include @@ -556,6 +557,61 @@ static void prandom_timer_start(struct random_ready_callback *unused) mod_timer(&seed_timer, jiffies); } +#ifdef CONFIG_RANDOM32_SELFTEST +/* Principle: True 32-bit random numbers will all have 16 differing bits on + * average. For each 32-bit number, there are 601M numbers differing by 16 + * bits, and 89% of the numbers differ by at least 12 bits. Note that more + * than 16 differing bits also implies a correlation with inverted bits. Thus + * we take 1024 random numbers and compare each of them to the other ones, + * counting the deviation of correlated bits to 16. Constants report 32, + * counters 32-log2(TEST_SIZE), and pure randoms, around 6 or lower. With the + * u32 total, TEST_SIZE may be as large as 4096 samples. + */ +#define TEST_SIZE 1024 +static int __init prandom32_state_selftest(void) +{ + unsigned int x, y, bits, samples; + u32 xor, flip; + u32 total; + u32 *data; + + data = kmalloc(sizeof(*data) * TEST_SIZE, GFP_KERNEL); + if (!data) + return 0; + + for (samples = 0; samples < TEST_SIZE; samples++) + data[samples] = prandom_u32(); + + flip = total = 0; + for (x = 0; x < samples; x++) { + for (y = 0; y < samples; y++) { + if (x == y) + continue; + xor = data[x] ^ data[y]; + flip |= xor; + bits = hweight32(xor); + total += (bits - 16) * (bits - 16); + } + } + + /* We'll return the average deviation as 2*sqrt(corr/samples), which + * is also sqrt(4*corr/samples) which provides a better resolution. + */ + bits = int_sqrt(total / (samples * (samples - 1)) * 4); + if (bits > 6) + pr_warn("prandom32: self test failed (at least %u bits" + " correlated, fixed_mask=%#x fixed_value=%#x\n", + bits, ~flip, data[0] & ~flip); + else + pr_info("prandom32: self test passed (less than %u bits" + " correlated)\n", + bits+1); + kfree(data); + return 0; +} +core_initcall(prandom32_state_selftest); +#endif /* CONFIG_RANDOM32_SELFTEST */ + /* * Start periodic full reseeding as soon as strong * random numbers are available. -- cgit v1.2.3 From 1f41be7d4e90e36084037cecba5978b3d7f849db Mon Sep 17 00:00:00 2001 From: David Disseldorp Date: Mon, 26 Oct 2020 22:03:10 +0100 Subject: lib/scatterlist: use consistent sg_copy_buffer() return type sg_copy_buffer() returns a size_t with the number of bytes copied. Return 0 instead of false if the copy is skipped. Signed-off-by: David Disseldorp Reviewed-by: Douglas Gilbert Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- lib/scatterlist.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 0a482ef988e5..a59778946404 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -933,7 +933,7 @@ size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, void *buf, sg_miter_start(&miter, sgl, nents, sg_flags); if (!sg_miter_skip(&miter, skip)) - return false; + return 0; while ((offset < buflen) && sg_miter_next(&miter)) { unsigned int len; -- cgit v1.2.3 From 75442fb0ccaacddf1654a5304401a9f556c61004 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Fri, 30 Oct 2020 08:40:45 +0100 Subject: docs: Kconfig/Makefile: add a check for broken ABI files The files under Documentation/ABI should follow the syntax as defined at Documentation/ABI/README. Allow checking if they're following the syntax by running the ABI parser script on COMPILE_TEST. With that, when there's a problem with a file under Documentation/ABI, it would produce a warning like: Warning: file ./Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats#14: What '/sys/bus/pci/devices//aer_stats/aer_rootport_total_err_cor' doesn't have a description Warning: file ./Documentation/ABI/testing/sysfs-bus-pci-devices-aer_stats#21: What '/sys/bus/pci/devices//aer_stats/aer_rootport_total_err_fatal' doesn't have a description Acked-by: Jonathan Corbet Signed-off-by: Mauro Carvalho Chehab Link: https://lore.kernel.org/r/57a38de85cb4b548857207cf1fc1bf1ee08613c9.1604042072.git.mchehab+huawei@kernel.org Signed-off-by: Greg Kroah-Hartman --- Documentation/Kconfig | 10 ++++++++++ Documentation/Makefile | 5 +++++ lib/Kconfig.debug | 2 ++ scripts/get_abi.pl | 14 +++++++++++--- 4 files changed, 28 insertions(+), 3 deletions(-) (limited to 'lib') diff --git a/Documentation/Kconfig b/Documentation/Kconfig index 66046fa1c341..e549a61f4d96 100644 --- a/Documentation/Kconfig +++ b/Documentation/Kconfig @@ -10,4 +10,14 @@ config WARN_MISSING_DOCUMENTS If unsure, select 'N'. +config WARN_ABI_ERRORS + bool "Warn if there are errors at ABI files" + depends on COMPILE_TEST + help + The files under Documentation/ABI should follow what's + described at Documentation/ABI/README. Yet, as they're manually + written, it would be possible that some of those files would + have errors that would break them for being parsed by + scripts/get_abi.pl. Add a check to verify them. + If unsure, select 'N'. diff --git a/Documentation/Makefile b/Documentation/Makefile index 6b12dd82f712..6a59a13d3c53 100644 --- a/Documentation/Makefile +++ b/Documentation/Makefile @@ -10,6 +10,11 @@ ifeq ($(CONFIG_WARN_MISSING_DOCUMENTS),y) $(shell $(srctree)/scripts/documentation-file-ref-check --warn) endif +# Check for broken ABI files +ifeq ($(CONFIG_WARN_ABI_ERRORS),y) +$(shell $(srctree)/scripts/get_abi.pl validate --dir $(srctree)/Documentation/ABI) +endif + # You can set these variables from the command line. SPHINXBUILD = sphinx-build SPHINXOPTS = diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index d7a7bc3b6098..c789b39ed527 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2446,4 +2446,6 @@ config HYPERV_TESTING endmenu # "Kernel Testing and Coverage" +source "Documentation/Kconfig" + endmenu # Kernel hacking diff --git a/scripts/get_abi.pl b/scripts/get_abi.pl index ff4f9f82ecad..f6adf4b38a2b 100755 --- a/scripts/get_abi.pl +++ b/scripts/get_abi.pl @@ -50,7 +50,15 @@ my %symbols; sub parse_error($$$$) { my ($file, $ln, $msg, $data) = @_; - print STDERR "file $file#$ln: $msg at\n\t$data"; + $data =~ s/\s+$/\n/; + + print STDERR "Warning: file $file#$ln:\n\t$msg"; + + if ($data ne "") { + print STDERR ". Line\n\t\t$data"; + } else { + print STDERR "\n"; + } } # @@ -110,7 +118,7 @@ sub parse_abi { # Invalid, but it is a common mistake if ($new_tag eq "where") { - parse_error($file, $ln, "tag 'Where' is invalid. Should be 'What:' instead", $_); + parse_error($file, $ln, "tag 'Where' is invalid. Should be 'What:' instead", ""); $new_tag = "what"; } @@ -225,7 +233,7 @@ sub parse_abi { } # Everything else is error - parse_error($file, $ln, "Unexpected line:", $_); + parse_error($file, $ln, "Unexpected content", $_); } $data{$nametag}->{description} =~ s/^\n+// if ($data{$nametag}->{description}); if ($what) { -- cgit v1.2.3 From 58b999d7a22c59313e1e84832607c7a61640f4e7 Mon Sep 17 00:00:00 2001 From: Andrey Konovalov Date: Sun, 1 Nov 2020 17:07:37 -0800 Subject: kasan: adopt KUNIT tests to SW_TAGS mode Now that we have KASAN-KUNIT tests integration, it's easy to see that some KASAN tests are not adopted to the SW_TAGS mode and are failing. Adjust the allocation size for kasan_memchr() and kasan_memcmp() by roung it up to OOB_TAG_OFF so the bad access ends up in a separate memory granule. Add a new kmalloc_uaf_16() tests that relies on UAF, and a new kasan_bitops_tags() test that is tailored to tag-based mode, as it's hard to adopt the existing kmalloc_oob_16() and kasan_bitops_generic() (renamed from kasan_bitops()) without losing the precision. Add new kmalloc_uaf_16() and kasan_bitops_uaf() tests that rely on UAFs, as it's hard to adopt the existing kmalloc_oob_16() and kasan_bitops_oob() (rename from kasan_bitops()) without losing the precision. Disable kasan_global_oob() and kasan_alloca_oob_left/right() as SW_TAGS mode doesn't instrument globals nor dynamic allocas. Signed-off-by: Andrey Konovalov Signed-off-by: Andrew Morton Tested-by: David Gow Link: https://lkml.kernel.org/r/76eee17b6531ca8b3ca92b240cb2fd23204aaff7.1603129942.git.andreyknvl@google.com Signed-off-by: Linus Torvalds --- lib/test_kasan.c | 149 +++++++++++++++++++++++++++++++++++++++---------------- 1 file changed, 107 insertions(+), 42 deletions(-) (limited to 'lib') diff --git a/lib/test_kasan.c b/lib/test_kasan.c index 63c26171a791..662f862702fc 100644 --- a/lib/test_kasan.c +++ b/lib/test_kasan.c @@ -216,6 +216,12 @@ static void kmalloc_oob_16(struct kunit *test) u64 words[2]; } *ptr1, *ptr2; + /* This test is specifically crafted for the generic mode. */ + if (!IS_ENABLED(CONFIG_KASAN_GENERIC)) { + kunit_info(test, "CONFIG_KASAN_GENERIC required\n"); + return; + } + ptr1 = kmalloc(sizeof(*ptr1) - 3, GFP_KERNEL); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr1); @@ -227,6 +233,23 @@ static void kmalloc_oob_16(struct kunit *test) kfree(ptr2); } +static void kmalloc_uaf_16(struct kunit *test) +{ + struct { + u64 words[2]; + } *ptr1, *ptr2; + + ptr1 = kmalloc(sizeof(*ptr1), GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr1); + + ptr2 = kmalloc(sizeof(*ptr2), GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr2); + kfree(ptr2); + + KUNIT_EXPECT_KASAN_FAIL(test, *ptr1 = *ptr2); + kfree(ptr1); +} + static void kmalloc_oob_memset_2(struct kunit *test) { char *ptr; @@ -429,6 +452,12 @@ static void kasan_global_oob(struct kunit *test) volatile int i = 3; char *p = &global_array[ARRAY_SIZE(global_array) + i]; + /* Only generic mode instruments globals. */ + if (!IS_ENABLED(CONFIG_KASAN_GENERIC)) { + kunit_info(test, "CONFIG_KASAN_GENERIC required"); + return; + } + KUNIT_EXPECT_KASAN_FAIL(test, *(volatile char *)p); } @@ -467,6 +496,12 @@ static void kasan_alloca_oob_left(struct kunit *test) char alloca_array[i]; char *p = alloca_array - 1; + /* Only generic mode instruments dynamic allocas. */ + if (!IS_ENABLED(CONFIG_KASAN_GENERIC)) { + kunit_info(test, "CONFIG_KASAN_GENERIC required"); + return; + } + if (!IS_ENABLED(CONFIG_KASAN_STACK)) { kunit_info(test, "CONFIG_KASAN_STACK is not enabled"); return; @@ -481,6 +516,12 @@ static void kasan_alloca_oob_right(struct kunit *test) char alloca_array[i]; char *p = alloca_array + i; + /* Only generic mode instruments dynamic allocas. */ + if (!IS_ENABLED(CONFIG_KASAN_GENERIC)) { + kunit_info(test, "CONFIG_KASAN_GENERIC required"); + return; + } + if (!IS_ENABLED(CONFIG_KASAN_STACK)) { kunit_info(test, "CONFIG_KASAN_STACK is not enabled"); return; @@ -551,6 +592,9 @@ static void kasan_memchr(struct kunit *test) return; } + if (OOB_TAG_OFF) + size = round_up(size, OOB_TAG_OFF); + ptr = kmalloc(size, GFP_KERNEL | __GFP_ZERO); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); @@ -573,6 +617,9 @@ static void kasan_memcmp(struct kunit *test) return; } + if (OOB_TAG_OFF) + size = round_up(size, OOB_TAG_OFF); + ptr = kmalloc(size, GFP_KERNEL | __GFP_ZERO); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, ptr); memset(arr, 0, sizeof(arr)); @@ -619,13 +666,50 @@ static void kasan_strings(struct kunit *test) KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = strnlen(ptr, 1)); } -static void kasan_bitops(struct kunit *test) +static void kasan_bitops_modify(struct kunit *test, int nr, void *addr) +{ + KUNIT_EXPECT_KASAN_FAIL(test, set_bit(nr, addr)); + KUNIT_EXPECT_KASAN_FAIL(test, __set_bit(nr, addr)); + KUNIT_EXPECT_KASAN_FAIL(test, clear_bit(nr, addr)); + KUNIT_EXPECT_KASAN_FAIL(test, __clear_bit(nr, addr)); + KUNIT_EXPECT_KASAN_FAIL(test, clear_bit_unlock(nr, addr)); + KUNIT_EXPECT_KASAN_FAIL(test, __clear_bit_unlock(nr, addr)); + KUNIT_EXPECT_KASAN_FAIL(test, change_bit(nr, addr)); + KUNIT_EXPECT_KASAN_FAIL(test, __change_bit(nr, addr)); +} + +static void kasan_bitops_test_and_modify(struct kunit *test, int nr, void *addr) +{ + KUNIT_EXPECT_KASAN_FAIL(test, test_and_set_bit(nr, addr)); + KUNIT_EXPECT_KASAN_FAIL(test, __test_and_set_bit(nr, addr)); + KUNIT_EXPECT_KASAN_FAIL(test, test_and_set_bit_lock(nr, addr)); + KUNIT_EXPECT_KASAN_FAIL(test, test_and_clear_bit(nr, addr)); + KUNIT_EXPECT_KASAN_FAIL(test, __test_and_clear_bit(nr, addr)); + KUNIT_EXPECT_KASAN_FAIL(test, test_and_change_bit(nr, addr)); + KUNIT_EXPECT_KASAN_FAIL(test, __test_and_change_bit(nr, addr)); + KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = test_bit(nr, addr)); + +#if defined(clear_bit_unlock_is_negative_byte) + KUNIT_EXPECT_KASAN_FAIL(test, kasan_int_result = + clear_bit_unlock_is_negative_byte(nr, addr)); +#endif +} + +static void kasan_bitops_generic(struct kunit *test) { + long *bits; + + /* This test is specifically crafted for the generic mode. */ + if (!IS_ENABLED(CONFIG_KASAN_GENERIC)) { + kunit_info(test, "CONFIG_KASAN_GENERIC required\n"); + return; + } + /* * Allocate 1 more byte, which causes kzalloc to round up to 16-bytes; * this way we do not actually corrupt other memory. */ - long *bits = kzalloc(sizeof(*bits) + 1, GFP_KERNEL); + bits = kzalloc(sizeof(*bits) + 1, GFP_KERNEL); KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bits); /* @@ -633,55 +717,34 @@ static void kasan_bitops(struct kunit *test) * below accesses are still out-of-bounds, since bitops are defined to * operate on the whole long the bit is in. */ - KUNIT_EXPECT_KASAN_FAIL(test, set_bit(BITS_PER_LONG, bits)); - - KUNIT_EXPECT_KASAN_FAIL(test, __set_bit(BITS_PER_LONG, bits)); - - KUNIT_EXPECT_KASAN_FAIL(test, clear_bit(BITS_PER_LONG, bits)); - - KUNIT_EXPECT_KASAN_FAIL(test, __clear_bit(BITS_PER_LONG, bits)); - - KUNIT_EXPECT_KASAN_FAIL(test, clear_bit_unlock(BITS_PER_LONG, bits)); - - KUNIT_EXPECT_KASAN_FAIL(test, __clear_bit_unlock(BITS_PER_LONG, bits)); - - KUNIT_EXPECT_KASAN_FAIL(test, change_bit(BITS_PER_LONG, bits)); - - KUNIT_EXPECT_KASAN_FAIL(test, __change_bit(BITS_PER_LONG, bits)); + kasan_bitops_modify(test, BITS_PER_LONG, bits); /* * Below calls try to access bit beyond allocated memory. */ - KUNIT_EXPECT_KASAN_FAIL(test, - test_and_set_bit(BITS_PER_LONG + BITS_PER_BYTE, bits)); - - KUNIT_EXPECT_KASAN_FAIL(test, - __test_and_set_bit(BITS_PER_LONG + BITS_PER_BYTE, bits)); - - KUNIT_EXPECT_KASAN_FAIL(test, - test_and_set_bit_lock(BITS_PER_LONG + BITS_PER_BYTE, bits)); + kasan_bitops_test_and_modify(test, BITS_PER_LONG + BITS_PER_BYTE, bits); - KUNIT_EXPECT_KASAN_FAIL(test, - test_and_clear_bit(BITS_PER_LONG + BITS_PER_BYTE, bits)); + kfree(bits); +} - KUNIT_EXPECT_KASAN_FAIL(test, - __test_and_clear_bit(BITS_PER_LONG + BITS_PER_BYTE, bits)); +static void kasan_bitops_tags(struct kunit *test) +{ + long *bits; - KUNIT_EXPECT_KASAN_FAIL(test, - test_and_change_bit(BITS_PER_LONG + BITS_PER_BYTE, bits)); + /* This test is specifically crafted for the tag-based mode. */ + if (IS_ENABLED(CONFIG_KASAN_GENERIC)) { + kunit_info(test, "CONFIG_KASAN_SW_TAGS required\n"); + return; + } - KUNIT_EXPECT_KASAN_FAIL(test, - __test_and_change_bit(BITS_PER_LONG + BITS_PER_BYTE, bits)); + /* Allocation size will be rounded to up granule size, which is 16. */ + bits = kzalloc(sizeof(*bits), GFP_KERNEL); + KUNIT_ASSERT_NOT_ERR_OR_NULL(test, bits); - KUNIT_EXPECT_KASAN_FAIL(test, - kasan_int_result = - test_bit(BITS_PER_LONG + BITS_PER_BYTE, bits)); + /* Do the accesses past the 16 allocated bytes. */ + kasan_bitops_modify(test, BITS_PER_LONG, &bits[1]); + kasan_bitops_test_and_modify(test, BITS_PER_LONG + BITS_PER_BYTE, &bits[1]); -#if defined(clear_bit_unlock_is_negative_byte) - KUNIT_EXPECT_KASAN_FAIL(test, - kasan_int_result = clear_bit_unlock_is_negative_byte( - BITS_PER_LONG + BITS_PER_BYTE, bits)); -#endif kfree(bits); } @@ -728,6 +791,7 @@ static struct kunit_case kasan_kunit_test_cases[] = { KUNIT_CASE(kmalloc_oob_krealloc_more), KUNIT_CASE(kmalloc_oob_krealloc_less), KUNIT_CASE(kmalloc_oob_16), + KUNIT_CASE(kmalloc_uaf_16), KUNIT_CASE(kmalloc_oob_in_memset), KUNIT_CASE(kmalloc_oob_memset_2), KUNIT_CASE(kmalloc_oob_memset_4), @@ -751,7 +815,8 @@ static struct kunit_case kasan_kunit_test_cases[] = { KUNIT_CASE(kasan_memchr), KUNIT_CASE(kasan_memcmp), KUNIT_CASE(kasan_strings), - KUNIT_CASE(kasan_bitops), + KUNIT_CASE(kasan_bitops_generic), + KUNIT_CASE(kasan_bitops_tags), KUNIT_CASE(kmalloc_double_kzfree), KUNIT_CASE(vmalloc_oob), {} -- cgit v1.2.3 From aa4e460f0976351fddd2f5ac6e08b74320c277a1 Mon Sep 17 00:00:00 2001 From: Vasily Gorbik Date: Sun, 1 Nov 2020 17:07:47 -0800 Subject: lib/crc32test: remove extra local_irq_disable/enable Commit 4d004099a668 ("lockdep: Fix lockdep recursion") uncovered the following issue in lib/crc32test reported on s390: BUG: using __this_cpu_read() in preemptible [00000000] code: swapper/0/1 caller is lockdep_hardirqs_on_prepare+0x48/0x270 CPU: 6 PID: 1 Comm: swapper/0 Not tainted 5.9.0-next-20201015-15164-g03d992bd2de6 #19 Hardware name: IBM 3906 M04 704 (LPAR) Call Trace: lockdep_hardirqs_on_prepare+0x48/0x270 trace_hardirqs_on+0x9c/0x1b8 crc32_test.isra.0+0x170/0x1c0 crc32test_init+0x1c/0x40 do_one_initcall+0x40/0x130 do_initcalls+0x126/0x150 kernel_init_freeable+0x1f6/0x230 kernel_init+0x22/0x150 ret_from_fork+0x24/0x2c no locks held by swapper/0/1. Remove extra local_irq_disable/local_irq_enable helpers calls. Fixes: 5fb7f87408f1 ("lib: add module support to crc32 tests") Signed-off-by: Vasily Gorbik Signed-off-by: Andrew Morton Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Greg Kroah-Hartman Link: https://lkml.kernel.org/r/patch.git-4369da00c06e.your-ad-here.call-01602859837-ext-1679@work.hours Signed-off-by: Linus Torvalds --- lib/crc32test.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'lib') diff --git a/lib/crc32test.c b/lib/crc32test.c index 97d6a57cefcc..61ddce2cff77 100644 --- a/lib/crc32test.c +++ b/lib/crc32test.c @@ -683,7 +683,6 @@ static int __init crc32c_test(void) /* reduce OS noise */ local_irq_save(flags); - local_irq_disable(); nsec = ktime_get_ns(); for (i = 0; i < 100; i++) { @@ -694,7 +693,6 @@ static int __init crc32c_test(void) nsec = ktime_get_ns() - nsec; local_irq_restore(flags); - local_irq_enable(); pr_info("crc32c: CRC_LE_BITS = %d\n", CRC_LE_BITS); @@ -768,7 +766,6 @@ static int __init crc32_test(void) /* reduce OS noise */ local_irq_save(flags); - local_irq_disable(); nsec = ktime_get_ns(); for (i = 0; i < 100; i++) { @@ -783,7 +780,6 @@ static int __init crc32_test(void) nsec = ktime_get_ns() - nsec; local_irq_restore(flags); - local_irq_enable(); pr_info("crc32: CRC_LE_BITS = %d, CRC_BE BITS = %d\n", CRC_LE_BITS, CRC_BE_BITS); -- cgit v1.2.3 From 9522750c66c689b739e151fcdf895420dc81efc0 Mon Sep 17 00:00:00 2001 From: Lee Jones Date: Mon, 2 Nov 2020 13:32:42 -0500 Subject: Fonts: Replace discarded const qualifier Commit 6735b4632def ("Fonts: Support FONT_EXTRA_WORDS macros for built-in fonts") introduced the following error when building rpc_defconfig (only this build appears to be affected): `acorndata_8x8' referenced in section `.text' of arch/arm/boot/compressed/ll_char_wr.o: defined in discarded section `.data' of arch/arm/boot/compressed/font.o `acorndata_8x8' referenced in section `.data.rel.ro' of arch/arm/boot/compressed/font.o: defined in discarded section `.data' of arch/arm/boot/compressed/font.o make[3]: *** [/scratch/linux/arch/arm/boot/compressed/Makefile:191: arch/arm/boot/compressed/vmlinux] Error 1 make[2]: *** [/scratch/linux/arch/arm/boot/Makefile:61: arch/arm/boot/compressed/vmlinux] Error 2 make[1]: *** [/scratch/linux/arch/arm/Makefile:317: zImage] Error 2 The .data section is discarded at link time. Reinstating acorndata_8x8 as const ensures it is still available after linking. Do the same for the other 12 built-in fonts as well, for consistency purposes. Cc: Cc: Russell King Reviewed-by: Greg Kroah-Hartman Fixes: 6735b4632def ("Fonts: Support FONT_EXTRA_WORDS macros for built-in fonts") Signed-off-by: Lee Jones Co-developed-by: Peilin Ye Signed-off-by: Peilin Ye Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20201102183242.2031659-1-yepeilin.cs@gmail.com --- lib/fonts/font_10x18.c | 2 +- lib/fonts/font_6x10.c | 2 +- lib/fonts/font_6x11.c | 2 +- lib/fonts/font_6x8.c | 2 +- lib/fonts/font_7x14.c | 2 +- lib/fonts/font_8x16.c | 2 +- lib/fonts/font_8x8.c | 2 +- lib/fonts/font_acorn_8x8.c | 2 +- lib/fonts/font_mini_4x6.c | 2 +- lib/fonts/font_pearl_8x8.c | 2 +- lib/fonts/font_sun12x22.c | 2 +- lib/fonts/font_sun8x16.c | 2 +- lib/fonts/font_ter16x32.c | 2 +- 13 files changed, 13 insertions(+), 13 deletions(-) (limited to 'lib') diff --git a/lib/fonts/font_10x18.c b/lib/fonts/font_10x18.c index 0e2deac97da0..e02f9df24d1e 100644 --- a/lib/fonts/font_10x18.c +++ b/lib/fonts/font_10x18.c @@ -8,7 +8,7 @@ #define FONTDATAMAX 9216 -static struct font_data fontdata_10x18 = { +static const struct font_data fontdata_10x18 = { { 0, 0, FONTDATAMAX, 0 }, { /* 0 0x00 '^@' */ 0x00, 0x00, /* 0000000000 */ diff --git a/lib/fonts/font_6x10.c b/lib/fonts/font_6x10.c index 87da8acd07db..6e3c4b7691c8 100644 --- a/lib/fonts/font_6x10.c +++ b/lib/fonts/font_6x10.c @@ -3,7 +3,7 @@ #define FONTDATAMAX 2560 -static struct font_data fontdata_6x10 = { +static const struct font_data fontdata_6x10 = { { 0, 0, FONTDATAMAX, 0 }, { /* 0 0x00 '^@' */ 0x00, /* 00000000 */ diff --git a/lib/fonts/font_6x11.c b/lib/fonts/font_6x11.c index 5e975dfa10a5..2d22a24e816f 100644 --- a/lib/fonts/font_6x11.c +++ b/lib/fonts/font_6x11.c @@ -9,7 +9,7 @@ #define FONTDATAMAX (11*256) -static struct font_data fontdata_6x11 = { +static const struct font_data fontdata_6x11 = { { 0, 0, FONTDATAMAX, 0 }, { /* 0 0x00 '^@' */ 0x00, /* 00000000 */ diff --git a/lib/fonts/font_6x8.c b/lib/fonts/font_6x8.c index 700039a9ceae..e7442a0d183d 100644 --- a/lib/fonts/font_6x8.c +++ b/lib/fonts/font_6x8.c @@ -3,7 +3,7 @@ #define FONTDATAMAX 2048 -static struct font_data fontdata_6x8 = { +static const struct font_data fontdata_6x8 = { { 0, 0, FONTDATAMAX, 0 }, { /* 0 0x00 '^@' */ 0x00, /* 000000 */ diff --git a/lib/fonts/font_7x14.c b/lib/fonts/font_7x14.c index 86d298f38505..9cc7ae2e03f7 100644 --- a/lib/fonts/font_7x14.c +++ b/lib/fonts/font_7x14.c @@ -8,7 +8,7 @@ #define FONTDATAMAX 3584 -static struct font_data fontdata_7x14 = { +static const struct font_data fontdata_7x14 = { { 0, 0, FONTDATAMAX, 0 }, { /* 0 0x00 '^@' */ 0x00, /* 0000000 */ diff --git a/lib/fonts/font_8x16.c b/lib/fonts/font_8x16.c index 37cedd36ca5e..bab25dc59e8d 100644 --- a/lib/fonts/font_8x16.c +++ b/lib/fonts/font_8x16.c @@ -10,7 +10,7 @@ #define FONTDATAMAX 4096 -static struct font_data fontdata_8x16 = { +static const struct font_data fontdata_8x16 = { { 0, 0, FONTDATAMAX, 0 }, { /* 0 0x00 '^@' */ 0x00, /* 00000000 */ diff --git a/lib/fonts/font_8x8.c b/lib/fonts/font_8x8.c index 8ab695538395..109d0572368f 100644 --- a/lib/fonts/font_8x8.c +++ b/lib/fonts/font_8x8.c @@ -9,7 +9,7 @@ #define FONTDATAMAX 2048 -static struct font_data fontdata_8x8 = { +static const struct font_data fontdata_8x8 = { { 0, 0, FONTDATAMAX, 0 }, { /* 0 0x00 '^@' */ 0x00, /* 00000000 */ diff --git a/lib/fonts/font_acorn_8x8.c b/lib/fonts/font_acorn_8x8.c index 069b3e80c434..fb395f0d4031 100644 --- a/lib/fonts/font_acorn_8x8.c +++ b/lib/fonts/font_acorn_8x8.c @@ -5,7 +5,7 @@ #define FONTDATAMAX 2048 -static struct font_data acorndata_8x8 = { +static const struct font_data acorndata_8x8 = { { 0, 0, FONTDATAMAX, 0 }, { /* 00 */ 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* ^@ */ /* 01 */ 0x7e, 0x81, 0xa5, 0x81, 0xbd, 0x99, 0x81, 0x7e, /* ^A */ diff --git a/lib/fonts/font_mini_4x6.c b/lib/fonts/font_mini_4x6.c index 1449876c6a27..592774a90917 100644 --- a/lib/fonts/font_mini_4x6.c +++ b/lib/fonts/font_mini_4x6.c @@ -43,7 +43,7 @@ __END__; #define FONTDATAMAX 1536 -static struct font_data fontdata_mini_4x6 = { +static const struct font_data fontdata_mini_4x6 = { { 0, 0, FONTDATAMAX, 0 }, { /*{*/ /* Char 0: ' ' */ diff --git a/lib/fonts/font_pearl_8x8.c b/lib/fonts/font_pearl_8x8.c index 32d65551e7ed..a6f95ebce950 100644 --- a/lib/fonts/font_pearl_8x8.c +++ b/lib/fonts/font_pearl_8x8.c @@ -14,7 +14,7 @@ #define FONTDATAMAX 2048 -static struct font_data fontdata_pearl8x8 = { +static const struct font_data fontdata_pearl8x8 = { { 0, 0, FONTDATAMAX, 0 }, { /* 0 0x00 '^@' */ 0x00, /* 00000000 */ diff --git a/lib/fonts/font_sun12x22.c b/lib/fonts/font_sun12x22.c index 641a6b4dca42..a5b65bd49604 100644 --- a/lib/fonts/font_sun12x22.c +++ b/lib/fonts/font_sun12x22.c @@ -3,7 +3,7 @@ #define FONTDATAMAX 11264 -static struct font_data fontdata_sun12x22 = { +static const struct font_data fontdata_sun12x22 = { { 0, 0, FONTDATAMAX, 0 }, { /* 0 0x00 '^@' */ 0x00, 0x00, /* 000000000000 */ diff --git a/lib/fonts/font_sun8x16.c b/lib/fonts/font_sun8x16.c index 193fe6d988e0..e577e76a6a7c 100644 --- a/lib/fonts/font_sun8x16.c +++ b/lib/fonts/font_sun8x16.c @@ -3,7 +3,7 @@ #define FONTDATAMAX 4096 -static struct font_data fontdata_sun8x16 = { +static const struct font_data fontdata_sun8x16 = { { 0, 0, FONTDATAMAX, 0 }, { /* */ 0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* */ 0x00,0x00,0x7e,0x81,0xa5,0x81,0x81,0xbd,0x99,0x81,0x81,0x7e,0x00,0x00,0x00,0x00, diff --git a/lib/fonts/font_ter16x32.c b/lib/fonts/font_ter16x32.c index 91b9c283bd9c..f7c3abb6b99e 100644 --- a/lib/fonts/font_ter16x32.c +++ b/lib/fonts/font_ter16x32.c @@ -4,7 +4,7 @@ #define FONTDATAMAX 16384 -static struct font_data fontdata_ter16x32 = { +static const struct font_data fontdata_ter16x32 = { { 0, 0, FONTDATAMAX, 0 }, { 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x7f, 0xfc, 0x7f, 0xfc, -- cgit v1.2.3 From 6fa6d28051e9fcaa1570e69648ea13a353a5d218 Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Tue, 17 Nov 2020 12:05:45 -0800 Subject: lib/strncpy_from_user.c: Mask out bytes after NUL terminator. do_strncpy_from_user() may copy some extra bytes after the NUL terminator into the destination buffer. This usually does not matter for normal string operations. However, when BPF programs key BPF maps with strings, this matters a lot. A BPF program may read strings from user memory by calling the bpf_probe_read_user_str() helper which eventually calls do_strncpy_from_user(). The program can then key a map with the destination buffer. BPF map keys are fixed-width and string-agnostic, meaning that map keys are treated as a set of bytes. The issue is when do_strncpy_from_user() overcopies bytes after the NUL terminator, it can result in seemingly identical strings occupying multiple slots in a BPF map. This behavior is subtle and totally unexpected by the user. This commit masks out the bytes following the NUL while preserving long-sized stride in the fast path. Fixes: 6ae08ae3dea2 ("bpf: Add probe_read_{user, kernel} and probe_read_{user, kernel}_str helpers") Signed-off-by: Daniel Xu Signed-off-by: Alexei Starovoitov Link: https://lore.kernel.org/bpf/21efc982b3e9f2f7b0379eed642294caaa0c27a7.1605642949.git.dxu@dxuuu.xyz --- kernel/trace/bpf_trace.c | 10 ++++++++++ lib/strncpy_from_user.c | 19 +++++++++++++++++-- 2 files changed, 27 insertions(+), 2 deletions(-) (limited to 'lib') diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c index 5113fd423cdf..048c655315f1 100644 --- a/kernel/trace/bpf_trace.c +++ b/kernel/trace/bpf_trace.c @@ -181,6 +181,16 @@ bpf_probe_read_user_str_common(void *dst, u32 size, { int ret; + /* + * NB: We rely on strncpy_from_user() not copying junk past the NUL + * terminator into `dst`. + * + * strncpy_from_user() does long-sized strides in the fast path. If the + * strncpy does not mask out the bytes after the NUL in `unsafe_ptr`, + * then there could be junk after the NUL in `dst`. If user takes `dst` + * and keys a hash map with it, then semantically identical strings can + * occupy multiple entries in the map. + */ ret = strncpy_from_user_nofault(dst, unsafe_ptr, size); if (unlikely(ret < 0)) memset(dst, 0, size); diff --git a/lib/strncpy_from_user.c b/lib/strncpy_from_user.c index e6d5fcc2cdf3..122d8d0e253c 100644 --- a/lib/strncpy_from_user.c +++ b/lib/strncpy_from_user.c @@ -35,17 +35,32 @@ static inline long do_strncpy_from_user(char *dst, const char __user *src, goto byte_at_a_time; while (max >= sizeof(unsigned long)) { - unsigned long c, data; + unsigned long c, data, mask; /* Fall back to byte-at-a-time if we get a page fault */ unsafe_get_user(c, (unsigned long __user *)(src+res), byte_at_a_time); - *(unsigned long *)(dst+res) = c; + /* + * Note that we mask out the bytes following the NUL. This is + * important to do because string oblivious code may read past + * the NUL. For those routines, we don't want to give them + * potentially random bytes after the NUL in `src`. + * + * One example of such code is BPF map keys. BPF treats map keys + * as an opaque set of bytes. Without the post-NUL mask, any BPF + * maps keyed by strings returned from strncpy_from_user() may + * have multiple entries for semantically identical strings. + */ if (has_zero(c, &data, &constants)) { data = prep_zero_mask(c, data, &constants); data = create_zero_mask(data); + mask = zero_bytemask(data); + *(unsigned long *)(dst+res) = c & mask; return res + find_zero(data); } + + *(unsigned long *)(dst+res) = c; + res += sizeof(unsigned long); max -= sizeof(unsigned long); } -- cgit v1.2.3