From 560af5dc839eef08a273908f390cfefefb82aa04 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 9 Oct 2024 17:45:03 +0200 Subject: lockdep: Enable PROVE_RAW_LOCK_NESTING with PROVE_LOCKING. With the printk issues solved, the last known splat created by PROVE_RAW_LOCK_NESTING is gone. Enable PROVE_RAW_LOCK_NESTING by default as part of PROVE_LOCKING. Keep the defines around in case something serious pops up and it needs to be disabled. Signed-off-by: Sebastian Andrzej Siewior Acked-by: Waiman Long Signed-off-by: Boqun Feng Link: https://lore.kernel.org/r/20241009161041.1018375-2-bigeasy@linutronix.de --- lib/Kconfig.debug | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'lib/Kconfig.debug') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 7315f643817a..5b67816f4a62 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1409,22 +1409,14 @@ config PROVE_LOCKING For more details, see Documentation/locking/lockdep-design.rst. config PROVE_RAW_LOCK_NESTING - bool "Enable raw_spinlock - spinlock nesting checks" + bool depends on PROVE_LOCKING - default n + default y help Enable the raw_spinlock vs. spinlock nesting checks which ensure that the lock nesting rules for PREEMPT_RT enabled kernels are not violated. - NOTE: There are known nesting problems. So if you enable this - option expect lockdep splats until these problems have been fully - addressed which is work in progress. This config switch allows to - identify and analyze these problems. It will be removed and the - check permanently enabled once the main issues have been fixed. - - If unsure, select N. - config LOCK_STAT bool "Lock usage statistics" depends on DEBUG_KERNEL && LOCK_DEBUGGING_SUPPORT -- cgit v1.2.3 From 84b4a51fce4ccc6605113ed8af41a3d91609a756 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Mon, 21 Oct 2024 12:11:44 -0700 Subject: selftests: add new kallsyms selftests We lack find_symbol() selftests, so add one. This let's us stress test improvements easily on find_symbol() or optimizations. It also inherently allows us to test the limits of kallsyms on Linux today. We test a pathalogical use case for kallsyms by introducing modules which are automatically written for us with a larger number of symbols. We have 4 kallsyms test modules: A: has KALLSYSMS_NUMSYMS exported symbols B: uses one of A's symbols C: adds KALLSYMS_SCALE_FACTOR * KALLSYSMS_NUMSYMS exported D: adds 2 * the symbols than C By using anything much larger than KALLSYSMS_NUMSYMS as 10,000 and KALLSYMS_SCALE_FACTOR of 8 we segfault today. So we're capped at around 160000 symbols somehow today. We can inpsect that issue at our leasure later, but for now the real value to this test is that this will easily allow us to test improvements on find_symbol(). We want to enable this test on allyesmodconfig builds so we can't use this combination, so instead just use a safe value for now and be informative on the Kconfig symbol documentation about where our thresholds are for testers. We default then to KALLSYSMS_NUMSYMS of just 100 and KALLSYMS_SCALE_FACTOR of 8. On x86_64 we can use perf, for other architectures we just use 'time' and allow for customizations. For example a future enhancements could be done for parisc to check for unaligned accesses which triggers a special special exception handler assembler code inside the kernel. The negative impact on performance is so large on parisc that it keeps track of its accesses on /proc/cpuinfo as UAH: IRQ: CPU0 CPU1 3: 1332 0 SuperIO ttyS0 7: 1270013 0 SuperIO pata_ns87415 64: 320023012 320021431 CPU timer 65: 17080507 20624423 CPU IPI UAH: 10948640 58104 Unaligned access handler traps While at it, this tidies up lib/ test modules to allow us to have a new directory for them. The amount of test modules under lib/ is insane. This should also hopefully showcase how to start doing basic self module writing code, which may be more useful for more complex cases later in the future. Signed-off-by: Luis Chamberlain --- lib/Kconfig.debug | 105 +++++++++++++++++++++ lib/Makefile | 1 + lib/tests/Makefile | 1 + lib/tests/module/.gitignore | 4 + lib/tests/module/Makefile | 15 +++ lib/tests/module/gen_test_kallsyms.sh | 128 ++++++++++++++++++++++++++ tools/testing/selftests/module/Makefile | 12 +++ tools/testing/selftests/module/config | 3 + tools/testing/selftests/module/find_symbol.sh | 81 ++++++++++++++++ 9 files changed, 350 insertions(+) create mode 100644 lib/tests/Makefile create mode 100644 lib/tests/module/.gitignore create mode 100644 lib/tests/module/Makefile create mode 100755 lib/tests/module/gen_test_kallsyms.sh create mode 100644 tools/testing/selftests/module/Makefile create mode 100644 tools/testing/selftests/module/config create mode 100755 tools/testing/selftests/module/find_symbol.sh (limited to 'lib/Kconfig.debug') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 7315f643817a..b5929721fc63 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2903,6 +2903,111 @@ config TEST_KMOD If unsure, say N. +config TEST_RUNTIME + bool + +config TEST_RUNTIME_MODULE + bool + +config TEST_KALLSYMS + tristate "module kallsyms find_symbol() test" + depends on m + select TEST_RUNTIME + select TEST_RUNTIME_MODULE + select TEST_KALLSYMS_A + select TEST_KALLSYMS_B + select TEST_KALLSYMS_C + select TEST_KALLSYMS_D + help + This allows us to stress test find_symbol() through the kallsyms + used to place symbols on the kernel ELF kallsyms and modules kallsyms + where we place kernel symbols such as exported symbols. + + We have four test modules: + + A: has KALLSYSMS_NUMSYMS exported symbols + B: uses one of A's symbols + C: adds KALLSYMS_SCALE_FACTOR * KALLSYSMS_NUMSYMS exported + D: adds 2 * the symbols than C + + We stress test find_symbol() through two means: + + 1) Upon load of B it will trigger simplify_symbols() to look for the + one symbol it uses from the module A with tons of symbols. This is an + indirect way for us to have B call resolve_symbol_wait() upon module + load. This will eventually call find_symbol() which will eventually + try to find the symbols used with find_exported_symbol_in_section(). + find_exported_symbol_in_section() uses bsearch() so a binary search + for each symbol. Binary search will at worst be O(log(n)) so the + larger TEST_MODULE_KALLSYSMS the worse the search. + + 2) The selftests should load C first, before B. Upon B's load towards + the end right before we call module B's init routine we get + complete_formation() called on the module. That will first check + for duplicate symbols with the call to verify_exported_symbols(). + That is when we'll force iteration on module C's insane symbol list. + Since it has 10 * KALLSYMS_NUMSYMS it means we can first test + just loading B without C. The amount of time it takes to load C Vs + B can give us an idea of the impact growth of the symbol space and + give us projection. Module A only uses one symbol from B so to allow + this scaling in module C to be proportional, if it used more symbols + then the first test would be doing more and increasing just the + search space would be slightly different. The last module, module D + will just increase the search space by twice the number of symbols in + C so to allow for full projects. + + tools/testing/selftests/module/find_symbol.sh + + The current defaults will incur a build delay of about 7 minutes + on an x86_64 with only 8 cores. Enable this only if you want to + stress test find_symbol() with thousands of symbols. At the same + time this is also useful to test building modules with thousands of + symbols, and if BTF is enabled this also stress tests adding BTF + information for each module. Currently enabling many more symbols + will segfault the build system. + + If unsure, say N. + +if TEST_KALLSYMS + +config TEST_KALLSYMS_A + tristate + depends on m + +config TEST_KALLSYMS_B + tristate + depends on m + +config TEST_KALLSYMS_C + tristate + depends on m + +config TEST_KALLSYMS_D + tristate + depends on m + +config TEST_KALLSYMS_NUMSYMS + int "test kallsyms number of symbols" + default 100 + help + The number of symbols to create on TEST_KALLSYMS_A, only one of which + module TEST_KALLSYMS_B will use. This also will be used + for how many symbols TEST_KALLSYMS_C will have, scaled up by + TEST_KALLSYMS_SCALE_FACTOR. Note that setting this to 10,000 will + trigger a segfault today, don't use anything close to it unless + you are aware that this should not be used for automated build tests. + +config TEST_KALLSYMS_SCALE_FACTOR + int "test kallsyms scale factor" + default 8 + help + How many more unusued symbols will TEST_KALLSYSMS_C have than + TEST_KALLSYMS_A. If 8, then module C will have 8 * syms + than module A. Then TEST_KALLSYMS_D will have double the amount + of symbols than C so to allow projections. + +endif # TEST_KALLSYMS + config TEST_DEBUG_VIRTUAL tristate "Test CONFIG_DEBUG_VIRTUAL feature" depends on DEBUG_VIRTUAL diff --git a/lib/Makefile b/lib/Makefile index 773adf88af41..ae720c7eb996 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -96,6 +96,7 @@ obj-$(CONFIG_TEST_XARRAY) += test_xarray.o obj-$(CONFIG_TEST_MAPLE_TREE) += test_maple_tree.o obj-$(CONFIG_TEST_PARMAN) += test_parman.o obj-$(CONFIG_TEST_KMOD) += test_kmod.o +obj-$(CONFIG_TEST_RUNTIME) += tests/ obj-$(CONFIG_TEST_DEBUG_VIRTUAL) += test_debug_virtual.o obj-$(CONFIG_TEST_MEMCAT_P) += test_memcat_p.o obj-$(CONFIG_TEST_OBJAGG) += test_objagg.o diff --git a/lib/tests/Makefile b/lib/tests/Makefile new file mode 100644 index 000000000000..8e4f42cb9c54 --- /dev/null +++ b/lib/tests/Makefile @@ -0,0 +1 @@ +obj-$(CONFIG_TEST_RUNTIME_MODULE) += module/ diff --git a/lib/tests/module/.gitignore b/lib/tests/module/.gitignore new file mode 100644 index 000000000000..8be7891b250f --- /dev/null +++ b/lib/tests/module/.gitignore @@ -0,0 +1,4 @@ +test_kallsyms_a.c +test_kallsyms_b.c +test_kallsyms_c.c +test_kallsyms_d.c diff --git a/lib/tests/module/Makefile b/lib/tests/module/Makefile new file mode 100644 index 000000000000..af5c27b996cb --- /dev/null +++ b/lib/tests/module/Makefile @@ -0,0 +1,15 @@ +obj-$(CONFIG_TEST_KALLSYMS_A) += test_kallsyms_a.o +obj-$(CONFIG_TEST_KALLSYMS_B) += test_kallsyms_b.o +obj-$(CONFIG_TEST_KALLSYMS_C) += test_kallsyms_c.o +obj-$(CONFIG_TEST_KALLSYMS_D) += test_kallsyms_d.o + +$(obj)/%.c: FORCE + @$(kecho) " GEN $@" + $(Q)$(srctree)/lib/tests/module/gen_test_kallsyms.sh $@\ + $(CONFIG_TEST_KALLSYMS_NUMSYMS) \ + $(CONFIG_TEST_KALLSYMS_SCALE_FACTOR) + +clean-files += test_kallsyms_a.c +clean-files += test_kallsyms_b.c +clean-files += test_kallsyms_c.c +clean-files += test_kallsyms_d.c diff --git a/lib/tests/module/gen_test_kallsyms.sh b/lib/tests/module/gen_test_kallsyms.sh new file mode 100755 index 000000000000..e85f10dc11bd --- /dev/null +++ b/lib/tests/module/gen_test_kallsyms.sh @@ -0,0 +1,128 @@ +#!/bin/bash + +TARGET=$(basename $1) +DIR=lib/tests/module +TARGET="$DIR/$TARGET" +NUM_SYMS=$2 +SCALE_FACTOR=$3 +TEST_TYPE=$(echo $TARGET | sed -e 's|lib/tests/module/test_kallsyms_||g') +TEST_TYPE=$(echo $TEST_TYPE | sed -e 's|.c||g') + +gen_template_module_header() +{ + cat <<____END_MODULE +// SPDX-License-Identifier: GPL-2.0-or-later OR copyleft-next-0.3.1 +/* + * Copyright (C) 2023 Luis Chamberlain + * + * Automatically generated code for testing, do not edit manually. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include +#include + +____END_MODULE +} + +gen_num_syms() +{ + PREFIX=$1 + NUM=$2 + for i in $(seq 1 $NUM); do + printf "int auto_test_%s_%010d = 0xff;\n" $PREFIX $i + printf "EXPORT_SYMBOL_GPL(auto_test_%s_%010d);\n" $PREFIX $i + done + echo +} + +gen_template_module_data_a() +{ + gen_num_syms a $1 + cat <<____END_MODULE +static int auto_runtime_test(void) +{ + return 0; +} + +____END_MODULE +} + +gen_template_module_data_b() +{ + printf "\nextern int auto_test_a_%010d;\n\n" 28 + echo "static int auto_runtime_test(void)" + echo "{" + printf "\nreturn auto_test_a_%010d;\n" 28 + echo "}" +} + +gen_template_module_data_c() +{ + gen_num_syms c $1 + cat <<____END_MODULE +static int auto_runtime_test(void) +{ + return 0; +} + +____END_MODULE +} + +gen_template_module_data_d() +{ + gen_num_syms d $1 + cat <<____END_MODULE +static int auto_runtime_test(void) +{ + return 0; +} + +____END_MODULE +} + +gen_template_module_exit() +{ + cat <<____END_MODULE +static int __init auto_test_module_init(void) +{ + return auto_runtime_test(); +} +module_init(auto_test_module_init); + +static void __exit auto_test_module_exit(void) +{ +} +module_exit(auto_test_module_exit); + +MODULE_AUTHOR("Luis Chamberlain "); +MODULE_LICENSE("GPL"); +____END_MODULE +} + +case $TEST_TYPE in + a) + gen_template_module_header > $TARGET + gen_template_module_data_a $NUM_SYMS >> $TARGET + gen_template_module_exit >> $TARGET + ;; + b) + gen_template_module_header > $TARGET + gen_template_module_data_b >> $TARGET + gen_template_module_exit >> $TARGET + ;; + c) + gen_template_module_header > $TARGET + gen_template_module_data_c $((NUM_SYMS * SCALE_FACTOR)) >> $TARGET + gen_template_module_exit >> $TARGET + ;; + d) + gen_template_module_header > $TARGET + gen_template_module_data_d $((NUM_SYMS * SCALE_FACTOR * 2)) >> $TARGET + gen_template_module_exit >> $TARGET + ;; + *) + ;; +esac diff --git a/tools/testing/selftests/module/Makefile b/tools/testing/selftests/module/Makefile new file mode 100644 index 000000000000..6132d7ddb08b --- /dev/null +++ b/tools/testing/selftests/module/Makefile @@ -0,0 +1,12 @@ +# SPDX-License-Identifier: GPL-2.0-only +# Makefile for module loading selftests + +# No binaries, but make sure arg-less "make" doesn't trigger "run_tests" +all: + +TEST_PROGS := find_symbol.sh + +include ../lib.mk + +# Nothing to clean up. +clean: diff --git a/tools/testing/selftests/module/config b/tools/testing/selftests/module/config new file mode 100644 index 000000000000..b0c206b1ad47 --- /dev/null +++ b/tools/testing/selftests/module/config @@ -0,0 +1,3 @@ +CONFIG_TEST_RUNTIME=y +CONFIG_TEST_RUNTIME_MODULE=y +CONFIG_TEST_KALLSYMS=m diff --git a/tools/testing/selftests/module/find_symbol.sh b/tools/testing/selftests/module/find_symbol.sh new file mode 100755 index 000000000000..140364d3c49f --- /dev/null +++ b/tools/testing/selftests/module/find_symbol.sh @@ -0,0 +1,81 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-or-later OR copyleft-next-0.3.1 +# Copyright (C) 2023 Luis Chamberlain +# +# This is a stress test script for kallsyms through find_symbol() + +set -e + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 + +test_reqs() +{ + if ! which modprobe 2> /dev/null > /dev/null; then + echo "$0: You need modprobe installed" >&2 + exit $ksft_skip + fi + + if ! which kmod 2> /dev/null > /dev/null; then + echo "$0: You need kmod installed" >&2 + exit $ksft_skip + fi + + if ! which perf 2> /dev/null > /dev/null; then + echo "$0: You need perf installed" >&2 + exit $ksft_skip + fi + + uid=$(id -u) + if [ $uid -ne 0 ]; then + echo $msg must be run as root >&2 + exit $ksft_skip + fi +} + +load_mod() +{ + local STATS="-e duration_time" + STATS="$STATS -e user_time" + STATS="$STATS -e system_time" + STATS="$STATS -e page-faults" + local MOD=$1 + + local ARCH="$(uname -m)" + case "${ARCH}" in + x86_64) + perf stat $STATS $MODPROBE test_kallsyms_b + ;; + *) + time $MODPROBE test_kallsyms_b + exit 1 + ;; + esac +} + +remove_all() +{ + $MODPROBE -r test_kallsyms_b + for i in a b c d; do + $MODPROBE -r test_kallsyms_$i + done +} +test_reqs + +MODPROBE=$( Date: Thu, 31 Oct 2024 13:04:07 +0100 Subject: timekeeping: Remove CONFIG_DEBUG_TIMEKEEPING Since 135225a363ae timekeeping_cycles_to_ns() handles large offsets which would lead to 64bit multiplication overflows correctly. It's also protected against negative motion of the clocksource unconditionally, which was exclusive to x86 before. timekeeping_advance() handles large offsets already correctly. That means the value of CONFIG_DEBUG_TIMEKEEPING which analyzed these cases is very close to zero. Remove all of it. Signed-off-by: Thomas Gleixner Acked-by: John Stultz Link: https://lore.kernel.org/all/20241031120328.536010148@linutronix.de --- arch/riscv/configs/defconfig | 1 - include/linux/timekeeper_internal.h | 16 --- kernel/time/timekeeping.c | 108 +-------------------- lib/Kconfig.debug | 13 --- .../testing/selftests/wireguard/qemu/debug.config | 1 - 5 files changed, 3 insertions(+), 136 deletions(-) (limited to 'lib/Kconfig.debug') diff --git a/arch/riscv/configs/defconfig b/arch/riscv/configs/defconfig index 2341393cfac1..26c01b9e3434 100644 --- a/arch/riscv/configs/defconfig +++ b/arch/riscv/configs/defconfig @@ -301,7 +301,6 @@ CONFIG_DEBUG_MEMORY_INIT=y CONFIG_DEBUG_PER_CPU_MAPS=y CONFIG_SOFTLOCKUP_DETECTOR=y CONFIG_WQ_WATCHDOG=y -CONFIG_DEBUG_TIMEKEEPING=y CONFIG_DEBUG_RT_MUTEXES=y CONFIG_DEBUG_SPINLOCK=y CONFIG_DEBUG_MUTEXES=y diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index a3b6380a7777..e39d4d563b19 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -76,9 +76,6 @@ struct tk_read_base { * ntp shifted nano seconds. * @ntp_err_mult: Multiplication factor for scaled math conversion * @skip_second_overflow: Flag used to avoid updating NTP twice with same second - * @last_warning: Warning ratelimiter (DEBUG_TIMEKEEPING) - * @underflow_seen: Underflow warning flag (DEBUG_TIMEKEEPING) - * @overflow_seen: Overflow warning flag (DEBUG_TIMEKEEPING) * * Note: For timespec(64) based interfaces wall_to_monotonic is what * we need to add to xtime (or xtime corrected for sub jiffy times) @@ -147,19 +144,6 @@ struct timekeeper { u32 ntp_error_shift; u32 ntp_err_mult; u32 skip_second_overflow; - -#ifdef CONFIG_DEBUG_TIMEKEEPING - long last_warning; - /* - * These simple flag variables are managed - * without locks, which is racy, but they are - * ok since we don't really care about being - * super precise about how many events were - * seen, just that a problem was observed. - */ - int underflow_seen; - int overflow_seen; -#endif }; #ifdef CONFIG_GENERIC_TIME_VSYSCALL diff --git a/kernel/time/timekeeping.c b/kernel/time/timekeeping.c index 17cae886ca82..d115adebc418 100644 --- a/kernel/time/timekeeping.c +++ b/kernel/time/timekeeping.c @@ -226,97 +226,6 @@ static inline u64 tk_clock_read(const struct tk_read_base *tkr) return clock->read(clock); } -#ifdef CONFIG_DEBUG_TIMEKEEPING -#define WARNING_FREQ (HZ*300) /* 5 minute rate-limiting */ - -static void timekeeping_check_update(struct timekeeper *tk, u64 offset) -{ - - u64 max_cycles = tk->tkr_mono.clock->max_cycles; - const char *name = tk->tkr_mono.clock->name; - - if (offset > max_cycles) { - printk_deferred("WARNING: timekeeping: Cycle offset (%lld) is larger than allowed by the '%s' clock's max_cycles value (%lld): time overflow danger\n", - offset, name, max_cycles); - printk_deferred(" timekeeping: Your kernel is sick, but tries to cope by capping time updates\n"); - } else { - if (offset > (max_cycles >> 1)) { - printk_deferred("INFO: timekeeping: Cycle offset (%lld) is larger than the '%s' clock's 50%% safety margin (%lld)\n", - offset, name, max_cycles >> 1); - printk_deferred(" timekeeping: Your kernel is still fine, but is feeling a bit nervous\n"); - } - } - - if (tk->underflow_seen) { - if (jiffies - tk->last_warning > WARNING_FREQ) { - printk_deferred("WARNING: Underflow in clocksource '%s' observed, time update ignored.\n", name); - printk_deferred(" Please report this, consider using a different clocksource, if possible.\n"); - printk_deferred(" Your kernel is probably still fine.\n"); - tk->last_warning = jiffies; - } - tk->underflow_seen = 0; - } - - if (tk->overflow_seen) { - if (jiffies - tk->last_warning > WARNING_FREQ) { - printk_deferred("WARNING: Overflow in clocksource '%s' observed, time update capped.\n", name); - printk_deferred(" Please report this, consider using a different clocksource, if possible.\n"); - printk_deferred(" Your kernel is probably still fine.\n"); - tk->last_warning = jiffies; - } - tk->overflow_seen = 0; - } -} - -static inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 cycles); - -static inline u64 timekeeping_debug_get_ns(const struct tk_read_base *tkr) -{ - struct timekeeper *tk = &tk_core.timekeeper; - u64 now, last, mask, max, delta; - unsigned int seq; - - /* - * Since we're called holding a seqcount, the data may shift - * under us while we're doing the calculation. This can cause - * false positives, since we'd note a problem but throw the - * results away. So nest another seqcount here to atomically - * grab the points we are checking with. - */ - do { - seq = read_seqcount_begin(&tk_core.seq); - now = tk_clock_read(tkr); - last = tkr->cycle_last; - mask = tkr->mask; - max = tkr->clock->max_cycles; - } while (read_seqcount_retry(&tk_core.seq, seq)); - - delta = clocksource_delta(now, last, mask); - - /* - * Try to catch underflows by checking if we are seeing small - * mask-relative negative values. - */ - if (unlikely((~delta & mask) < (mask >> 3))) - tk->underflow_seen = 1; - - /* Check for multiplication overflows */ - if (unlikely(delta > max)) - tk->overflow_seen = 1; - - /* timekeeping_cycles_to_ns() handles both under and overflow */ - return timekeeping_cycles_to_ns(tkr, now); -} -#else -static inline void timekeeping_check_update(struct timekeeper *tk, u64 offset) -{ -} -static inline u64 timekeeping_debug_get_ns(const struct tk_read_base *tkr) -{ - BUG(); -} -#endif - /** * tk_setup_internals - Set up internals to use clocksource clock. * @@ -421,19 +330,11 @@ static inline u64 timekeeping_cycles_to_ns(const struct tk_read_base *tkr, u64 c return ((delta * tkr->mult) + tkr->xtime_nsec) >> tkr->shift; } -static __always_inline u64 __timekeeping_get_ns(const struct tk_read_base *tkr) +static __always_inline u64 timekeeping_get_ns(const struct tk_read_base *tkr) { return timekeeping_cycles_to_ns(tkr, tk_clock_read(tkr)); } -static inline u64 timekeeping_get_ns(const struct tk_read_base *tkr) -{ - if (IS_ENABLED(CONFIG_DEBUG_TIMEKEEPING)) - return timekeeping_debug_get_ns(tkr); - - return __timekeeping_get_ns(tkr); -} - /** * update_fast_timekeeper - Update the fast and NMI safe monotonic timekeeper. * @tkr: Timekeeping readout base from which we take the update @@ -477,7 +378,7 @@ static __always_inline u64 __ktime_get_fast_ns(struct tk_fast *tkf) seq = raw_read_seqcount_latch(&tkf->seq); tkr = tkf->base + (seq & 0x01); now = ktime_to_ns(tkr->base); - now += __timekeeping_get_ns(tkr); + now += timekeeping_get_ns(tkr); } while (raw_read_seqcount_latch_retry(&tkf->seq, seq)); return now; @@ -593,7 +494,7 @@ static __always_inline u64 __ktime_get_real_fast(struct tk_fast *tkf, u64 *mono) tkr = tkf->base + (seq & 0x01); basem = ktime_to_ns(tkr->base); baser = ktime_to_ns(tkr->base_real); - delta = __timekeeping_get_ns(tkr); + delta = timekeeping_get_ns(tkr); } while (raw_read_seqcount_latch_retry(&tkf->seq, seq)); if (mono) @@ -2333,9 +2234,6 @@ static bool timekeeping_advance(enum timekeeping_adv_mode mode) if (offset < real_tk->cycle_interval && mode == TK_ADV_TICK) return false; - /* Do some additional sanity checking */ - timekeeping_check_update(tk, offset); - /* * With NO_HZ we may have to accumulate many cycle_intervals * (think "ticks") worth of time at once. To do this efficiently, diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 7315f643817a..14977b9fc254 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1328,19 +1328,6 @@ config SCHEDSTATS endmenu -config DEBUG_TIMEKEEPING - bool "Enable extra timekeeping sanity checking" - help - This option will enable additional timekeeping sanity checks - which may be helpful when diagnosing issues where timekeeping - problems are suspected. - - This may include checks in the timekeeping hotpaths, so this - option may have a (very small) performance impact to some - workloads. - - If unsure, say N. - config DEBUG_PREEMPT bool "Debug preemptible kernel" depends on DEBUG_KERNEL && PREEMPTION && TRACE_IRQFLAGS_SUPPORT diff --git a/tools/testing/selftests/wireguard/qemu/debug.config b/tools/testing/selftests/wireguard/qemu/debug.config index 9d172210e2c6..139fd9aa8b12 100644 --- a/tools/testing/selftests/wireguard/qemu/debug.config +++ b/tools/testing/selftests/wireguard/qemu/debug.config @@ -31,7 +31,6 @@ CONFIG_SCHED_DEBUG=y CONFIG_SCHED_INFO=y CONFIG_SCHEDSTATS=y CONFIG_SCHED_STACK_END_CHECK=y -CONFIG_DEBUG_TIMEKEEPING=y CONFIG_DEBUG_PREEMPT=y CONFIG_DEBUG_RT_MUTEXES=y CONFIG_DEBUG_SPINLOCK=y -- cgit v1.2.3 From b42166427b46af0d963242283fc99d429623d303 Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Sun, 6 Oct 2024 06:22:21 +0800 Subject: lib/Kconfig.debug: move int_pow test option to runtime testing section When executing 'make menuconfig' with KUNIT enabled, the int_pow test option appears on the first page of the main menu instead of under the runtime testing section. Relocate the int_pow test configuration to the appropriate runtime testing submenu, ensuring a more organized and logical structure in the menu configuration. Link: https://lkml.kernel.org/r/20241005222221.2154393-1-visitorckw@gmail.com Fixes: 7fcc9b53216c ("lib/math: Add int_pow test suite") Signed-off-by: Kuan-Wei Chiu Cc: Ching-Chun (Jim) Huang Cc: David Gow Cc: Luis Felipe Hernandez Cc: Shuah Khan Signed-off-by: Andrew Morton --- lib/Kconfig.debug | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'lib/Kconfig.debug') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 7312ae7c3cc5..409dd193c09b 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2993,6 +2993,22 @@ config TEST_OBJPOOL If unsure, say N. +config INT_POW_TEST + tristate "Integer exponentiation (int_pow) test" if !KUNIT_ALL_TESTS + depends on KUNIT + default KUNIT_ALL_TESTS + help + This option enables the KUnit test suite for the int_pow function, + which performs integer exponentiation. The test suite is designed to + verify that the implementation of int_pow correctly computes the power + of a given base raised to a given exponent. + + Enabling this option will include tests that check various scenarios + and edge cases to ensure the accuracy and reliability of the exponentiation + function. + + If unsure, say N + endif # RUNTIME_TESTING_MENU config ARCH_USE_MEMTEST @@ -3088,19 +3104,3 @@ config RUST_KERNEL_DOCTESTS endmenu # "Rust" endmenu # Kernel hacking - -config INT_POW_TEST - tristate "Integer exponentiation (int_pow) test" if !KUNIT_ALL_TESTS - depends on KUNIT - default KUNIT_ALL_TESTS - help - This option enables the KUnit test suite for the int_pow function, - which performs integer exponentiation. The test suite is designed to - verify that the implementation of int_pow correctly computes the power - of a given base raised to a given exponent. - - Enabling this option will include tests that check various scenarios - and edge cases to ensure the accuracy and reliability of the exponentiation - function. - - If unsure, say N -- cgit v1.2.3 From 5d042707089f0d0c49473d05250e4f319a71e1df Mon Sep 17 00:00:00 2001 From: Vinicius Peixoto Date: Sat, 12 Oct 2024 04:43:49 -0300 Subject: lib/crc16_kunit.c: add KUnit tests for crc16 Add Kunit tests for the kernel's implementation of the standard CRC-16 algorithm (). The test data consists of 100 randomly-generated test cases, validated against a naive CRC-16 implementation. This test follows roughly the same logic as lib/crc32test.c, but without the performance measurements. Link: https://lkml.kernel.org/r/20241012-crc16-kunit-v3-1-0ca75cb58ca9@lkcamp.dev Signed-off-by: Vinicius Peixoto Co-developed-by: Enzo Bertoloti Signed-off-by: Enzo Bertoloti Co-developed-by: Fabricio Gasperin Signed-off-by: Fabricio Gasperin Suggested-by: David Laight Cc: Brendan Higgins Cc: David Gow Cc: Rae Moar Signed-off-by: Andrew Morton --- lib/Kconfig.debug | 9 ++++ lib/Makefile | 1 + lib/crc16_kunit.c | 155 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 165 insertions(+) create mode 100644 lib/crc16_kunit.c (limited to 'lib/Kconfig.debug') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 409dd193c09b..eda319e9d569 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2850,6 +2850,15 @@ config USERCOPY_KUNIT_TEST on the copy_to/from_user infrastructure, making sure basic user/kernel boundary testing is working. +config CRC16_KUNIT_TEST + tristate "KUnit tests for CRC16" + depends on KUNIT + default KUNIT_ALL_TESTS + select CRC16 + help + Enable this option to run unit tests for the kernel's CRC16 + implementation (). + config TEST_UDELAY tristate "udelay test driver" help diff --git a/lib/Makefile b/lib/Makefile index 773adf88af41..1faed6414a85 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -389,6 +389,7 @@ CFLAGS_fortify_kunit.o += $(DISABLE_STRUCTLEAK_PLUGIN) obj-$(CONFIG_FORTIFY_KUNIT_TEST) += fortify_kunit.o obj-$(CONFIG_SIPHASH_KUNIT_TEST) += siphash_kunit.o obj-$(CONFIG_USERCOPY_KUNIT_TEST) += usercopy_kunit.o +obj-$(CONFIG_CRC16_KUNIT_TEST) += crc16_kunit.o obj-$(CONFIG_GENERIC_LIB_DEVMEM_IS_ALLOWED) += devmem_is_allowed.o diff --git a/lib/crc16_kunit.c b/lib/crc16_kunit.c new file mode 100644 index 000000000000..0918c98a96d2 --- /dev/null +++ b/lib/crc16_kunit.c @@ -0,0 +1,155 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * KUnits tests for CRC16. + * + * Copyright (C) 2024, LKCAMP + * Author: Vinicius Peixoto + * Author: Fabricio Gasperin + * Author: Enzo Bertoloti + */ +#include +#include +#include + +#define CRC16_KUNIT_DATA_SIZE 4096 +#define CRC16_KUNIT_TEST_SIZE 100 +#define CRC16_KUNIT_SEED 0x12345678 + +/** + * struct crc16_test - CRC16 test data + * @crc: initial input value to CRC16 + * @start: Start index within the data buffer + * @length: Length of the data + */ +static struct crc16_test { + u16 crc; + u16 start; + u16 length; +} tests[CRC16_KUNIT_TEST_SIZE]; + +u8 data[CRC16_KUNIT_DATA_SIZE]; + + +/* Naive implementation of CRC16 for validation purposes */ +static inline u16 _crc16_naive_byte(u16 crc, u8 data) +{ + u8 i = 0; + + crc ^= (u16) data; + for (i = 0; i < 8; i++) { + if (crc & 0x01) + crc = (crc >> 1) ^ 0xa001; + else + crc = crc >> 1; + } + + return crc; +} + + +static inline u16 _crc16_naive(u16 crc, u8 *buffer, size_t len) +{ + while (len--) + crc = _crc16_naive_byte(crc, *buffer++); + return crc; +} + + +/* Small helper for generating pseudorandom 16-bit data */ +static inline u16 _rand16(void) +{ + static u32 rand = CRC16_KUNIT_SEED; + + rand = next_pseudo_random32(rand); + return rand & 0xFFFF; +} + + +static int crc16_init_test_data(struct kunit_suite *suite) +{ + size_t i; + + /* Fill the data buffer with random bytes */ + for (i = 0; i < CRC16_KUNIT_DATA_SIZE; i++) + data[i] = _rand16() & 0xFF; + + /* Generate random test data while ensuring the random + * start + length values won't overflow the 4096-byte + * buffer (0x7FF * 2 = 0xFFE < 0x1000) + */ + for (size_t i = 0; i < CRC16_KUNIT_TEST_SIZE; i++) { + tests[i].crc = _rand16(); + tests[i].start = _rand16() & 0x7FF; + tests[i].length = _rand16() & 0x7FF; + } + + return 0; +} + +static void crc16_test_empty(struct kunit *test) +{ + u16 crc; + + /* The result for empty data should be the same as the + * initial crc + */ + crc = crc16(0x00, data, 0); + KUNIT_EXPECT_EQ(test, crc, 0); + crc = crc16(0xFF, data, 0); + KUNIT_EXPECT_EQ(test, crc, 0xFF); +} + +static void crc16_test_correctness(struct kunit *test) +{ + size_t i; + u16 crc, crc_naive; + + for (i = 0; i < CRC16_KUNIT_TEST_SIZE; i++) { + /* Compare results with the naive crc16 implementation */ + crc = crc16(tests[i].crc, data + tests[i].start, + tests[i].length); + crc_naive = _crc16_naive(tests[i].crc, data + tests[i].start, + tests[i].length); + KUNIT_EXPECT_EQ(test, crc, crc_naive); + } +} + + +static void crc16_test_combine(struct kunit *test) +{ + size_t i, j; + u16 crc, crc_naive; + + /* Make sure that combining two consecutive crc16 calculations + * yields the same result as calculating the crc16 for the whole thing + */ + for (i = 0; i < CRC16_KUNIT_TEST_SIZE; i++) { + crc_naive = crc16(tests[i].crc, data + tests[i].start, tests[i].length); + for (j = 0; j < tests[i].length; j++) { + crc = crc16(tests[i].crc, data + tests[i].start, j); + crc = crc16(crc, data + tests[i].start + j, tests[i].length - j); + KUNIT_EXPECT_EQ(test, crc, crc_naive); + } + } +} + + +static struct kunit_case crc16_test_cases[] = { + KUNIT_CASE(crc16_test_empty), + KUNIT_CASE(crc16_test_combine), + KUNIT_CASE(crc16_test_correctness), + {}, +}; + +static struct kunit_suite crc16_test_suite = { + .name = "crc16", + .test_cases = crc16_test_cases, + .suite_init = crc16_init_test_data, +}; +kunit_test_suite(crc16_test_suite); + +MODULE_AUTHOR("Fabricio Gasperin "); +MODULE_AUTHOR("Vinicius Peixoto "); +MODULE_AUTHOR("Enzo Bertoloti "); +MODULE_DESCRIPTION("Unit tests for crc16"); +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 92a8b224b833e82d286d2100432adbac8cf8a2a1 Mon Sep 17 00:00:00 2001 From: Kuan-Wei Chiu Date: Sun, 20 Oct 2024 12:01:51 +0800 Subject: lib/min_heap: introduce non-inline versions of min heap API functions Patch series "Enhance min heap API with non-inline functions and optimizations", v2. Add non-inline versions of the min heap API functions in lib/min_heap.c and updates all users outside of kernel/events/core.c to use these non-inline versions. To mitigate the performance impact of indirect function calls caused by the non-inline versions of the swap and compare functions, a builtin swap has been introduced that swaps elements based on their size. Additionally, it micro-optimizes the efficiency of the min heap by pre-scaling the counter, following the same approach as in lib/sort.c. Documentation for the min heap API has also been added to the core-api section. This patch (of 10): All current min heap API functions are marked with '__always_inline'. However, as the number of users increases, inlining these functions everywhere leads to a increase in kernel size. In performance-critical paths, such as when perf events are enabled and min heap functions are called on every context switch, it is important to retain the inline versions for optimal performance. To balance this, the original inline functions are kept, and additional non-inline versions of the functions have been added in lib/min_heap.c. Link: https://lkml.kernel.org/r/20241020040200.939973-1-visitorckw@gmail.com Link: https://lore.kernel.org/20240522161048.8d8bbc7b153b4ecd92c50666@linux-foundation.org Link: https://lkml.kernel.org/r/20241020040200.939973-2-visitorckw@gmail.com Signed-off-by: Kuan-Wei Chiu Suggested-by: Andrew Morton Cc: Adrian Hunter Cc: Arnaldo Carvalho de Melo Cc: Ching-Chun (Jim) Huang Cc: Coly Li Cc: Ian Rogers Cc: Ingo Molnar Cc: Jiri Olsa Cc: Jonathan Corbet Cc: Kent Overstreet Cc: Kuan-Wei Chiu Cc: "Liang, Kan" Cc: Mark Rutland Cc: Matthew Sakai Cc: Matthew Wilcox (Oracle) Cc: Namhyung Kim Cc: Peter Zijlstra Signed-off-by: Andrew Morton --- drivers/md/bcache/Kconfig | 1 + drivers/md/dm-vdo/Kconfig | 1 + fs/bcachefs/Kconfig | 1 + include/linux/min_heap.h | 129 ++++++++++++++++++++++++++++++---------------- kernel/events/core.c | 6 +-- lib/Kconfig | 3 ++ lib/Kconfig.debug | 1 + lib/Makefile | 1 + lib/min_heap.c | 70 +++++++++++++++++++++++++ 9 files changed, 167 insertions(+), 46 deletions(-) create mode 100644 lib/min_heap.c (limited to 'lib/Kconfig.debug') diff --git a/drivers/md/bcache/Kconfig b/drivers/md/bcache/Kconfig index b2d10063d35f..d4697e79d5a3 100644 --- a/drivers/md/bcache/Kconfig +++ b/drivers/md/bcache/Kconfig @@ -5,6 +5,7 @@ config BCACHE select BLOCK_HOLDER_DEPRECATED if SYSFS select CRC64 select CLOSURES + select MIN_HEAP help Allows a block device to be used as cache for other devices; uses a btree for indexing and the layout is optimized for SSDs. diff --git a/drivers/md/dm-vdo/Kconfig b/drivers/md/dm-vdo/Kconfig index 111ecd2c2a24..2400b2bc4bc7 100644 --- a/drivers/md/dm-vdo/Kconfig +++ b/drivers/md/dm-vdo/Kconfig @@ -7,6 +7,7 @@ config DM_VDO select DM_BUFIO select LZ4_COMPRESS select LZ4_DECOMPRESS + select MIN_HEAP help This device mapper target presents a block device with deduplication, compression and thin-provisioning. diff --git a/fs/bcachefs/Kconfig b/fs/bcachefs/Kconfig index 5bac803ea367..ab6c95b895b3 100644 --- a/fs/bcachefs/Kconfig +++ b/fs/bcachefs/Kconfig @@ -24,6 +24,7 @@ config BCACHEFS_FS select XXHASH select SRCU select SYMBOLIC_ERRNAME + select MIN_HEAP help The bcachefs filesystem - a modern, copy on write filesystem, with support for multiple devices, compression, checksumming, etc. diff --git a/include/linux/min_heap.h b/include/linux/min_heap.h index 43a7b9dcf15e..0abb21173979 100644 --- a/include/linux/min_heap.h +++ b/include/linux/min_heap.h @@ -40,7 +40,7 @@ struct min_heap_callbacks { /* Initialize a min-heap. */ static __always_inline -void __min_heap_init(min_heap_char *heap, void *data, int size) +void __min_heap_init_inline(min_heap_char *heap, void *data, int size) { heap->nr = 0; heap->size = size; @@ -50,33 +50,33 @@ void __min_heap_init(min_heap_char *heap, void *data, int size) heap->data = heap->preallocated; } -#define min_heap_init(_heap, _data, _size) \ - __min_heap_init((min_heap_char *)_heap, _data, _size) +#define min_heap_init_inline(_heap, _data, _size) \ + __min_heap_init_inline((min_heap_char *)_heap, _data, _size) /* Get the minimum element from the heap. */ static __always_inline -void *__min_heap_peek(struct min_heap_char *heap) +void *__min_heap_peek_inline(struct min_heap_char *heap) { return heap->nr ? heap->data : NULL; } -#define min_heap_peek(_heap) \ - (__minheap_cast(_heap) __min_heap_peek((min_heap_char *)_heap)) +#define min_heap_peek_inline(_heap) \ + (__minheap_cast(_heap) __min_heap_peek_inline((min_heap_char *)_heap)) /* Check if the heap is full. */ static __always_inline -bool __min_heap_full(min_heap_char *heap) +bool __min_heap_full_inline(min_heap_char *heap) { return heap->nr == heap->size; } -#define min_heap_full(_heap) \ - __min_heap_full((min_heap_char *)_heap) +#define min_heap_full_inline(_heap) \ + __min_heap_full_inline((min_heap_char *)_heap) /* Sift the element at pos down the heap. */ static __always_inline -void __min_heap_sift_down(min_heap_char *heap, int pos, size_t elem_size, - const struct min_heap_callbacks *func, void *args) +void __min_heap_sift_down_inline(min_heap_char *heap, int pos, size_t elem_size, + const struct min_heap_callbacks *func, void *args) { void *left, *right; void *data = heap->data; @@ -108,13 +108,14 @@ void __min_heap_sift_down(min_heap_char *heap, int pos, size_t elem_size, } } -#define min_heap_sift_down(_heap, _pos, _func, _args) \ - __min_heap_sift_down((min_heap_char *)_heap, _pos, __minheap_obj_size(_heap), _func, _args) +#define min_heap_sift_down_inline(_heap, _pos, _func, _args) \ + __min_heap_sift_down_inline((min_heap_char *)_heap, _pos, __minheap_obj_size(_heap), \ + _func, _args) /* Sift up ith element from the heap, O(log2(nr)). */ static __always_inline -void __min_heap_sift_up(min_heap_char *heap, size_t elem_size, size_t idx, - const struct min_heap_callbacks *func, void *args) +void __min_heap_sift_up_inline(min_heap_char *heap, size_t elem_size, size_t idx, + const struct min_heap_callbacks *func, void *args) { void *data = heap->data; size_t parent; @@ -128,27 +129,28 @@ void __min_heap_sift_up(min_heap_char *heap, size_t elem_size, size_t idx, } } -#define min_heap_sift_up(_heap, _idx, _func, _args) \ - __min_heap_sift_up((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, _func, _args) +#define min_heap_sift_up_inline(_heap, _idx, _func, _args) \ + __min_heap_sift_up_inline((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, \ + _func, _args) /* Floyd's approach to heapification that is O(nr). */ static __always_inline -void __min_heapify_all(min_heap_char *heap, size_t elem_size, - const struct min_heap_callbacks *func, void *args) +void __min_heapify_all_inline(min_heap_char *heap, size_t elem_size, + const struct min_heap_callbacks *func, void *args) { int i; for (i = heap->nr / 2 - 1; i >= 0; i--) - __min_heap_sift_down(heap, i, elem_size, func, args); + __min_heap_sift_down_inline(heap, i, elem_size, func, args); } -#define min_heapify_all(_heap, _func, _args) \ - __min_heapify_all((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args) +#define min_heapify_all_inline(_heap, _func, _args) \ + __min_heapify_all_inline((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args) /* Remove minimum element from the heap, O(log2(nr)). */ static __always_inline -bool __min_heap_pop(min_heap_char *heap, size_t elem_size, - const struct min_heap_callbacks *func, void *args) +bool __min_heap_pop_inline(min_heap_char *heap, size_t elem_size, + const struct min_heap_callbacks *func, void *args) { void *data = heap->data; @@ -158,13 +160,13 @@ bool __min_heap_pop(min_heap_char *heap, size_t elem_size, /* Place last element at the root (position 0) and then sift down. */ heap->nr--; memcpy(data, data + (heap->nr * elem_size), elem_size); - __min_heap_sift_down(heap, 0, elem_size, func, args); + __min_heap_sift_down_inline(heap, 0, elem_size, func, args); return true; } -#define min_heap_pop(_heap, _func, _args) \ - __min_heap_pop((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args) +#define min_heap_pop_inline(_heap, _func, _args) \ + __min_heap_pop_inline((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args) /* * Remove the minimum element and then push the given element. The @@ -172,22 +174,21 @@ bool __min_heap_pop(min_heap_char *heap, size_t elem_size, * efficient than a pop followed by a push that does 2. */ static __always_inline -void __min_heap_pop_push(min_heap_char *heap, - const void *element, size_t elem_size, - const struct min_heap_callbacks *func, - void *args) +void __min_heap_pop_push_inline(min_heap_char *heap, const void *element, size_t elem_size, + const struct min_heap_callbacks *func, void *args) { memcpy(heap->data, element, elem_size); - __min_heap_sift_down(heap, 0, elem_size, func, args); + __min_heap_sift_down_inline(heap, 0, elem_size, func, args); } -#define min_heap_pop_push(_heap, _element, _func, _args) \ - __min_heap_pop_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), _func, _args) +#define min_heap_pop_push_inline(_heap, _element, _func, _args) \ + __min_heap_pop_push_inline((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), \ + _func, _args) /* Push an element on to the heap, O(log2(nr)). */ static __always_inline -bool __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size, - const struct min_heap_callbacks *func, void *args) +bool __min_heap_push_inline(min_heap_char *heap, const void *element, size_t elem_size, + const struct min_heap_callbacks *func, void *args) { void *data = heap->data; int pos; @@ -201,18 +202,19 @@ bool __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size, heap->nr++; /* Sift child at pos up. */ - __min_heap_sift_up(heap, elem_size, pos, func, args); + __min_heap_sift_up_inline(heap, elem_size, pos, func, args); return true; } -#define min_heap_push(_heap, _element, _func, _args) \ - __min_heap_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), _func, _args) +#define min_heap_push_inline(_heap, _element, _func, _args) \ + __min_heap_push_inline((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), \ + _func, _args) /* Remove ith element from the heap, O(log2(nr)). */ static __always_inline -bool __min_heap_del(min_heap_char *heap, size_t elem_size, size_t idx, - const struct min_heap_callbacks *func, void *args) +bool __min_heap_del_inline(min_heap_char *heap, size_t elem_size, size_t idx, + const struct min_heap_callbacks *func, void *args) { void *data = heap->data; @@ -224,12 +226,53 @@ bool __min_heap_del(min_heap_char *heap, size_t elem_size, size_t idx, if (idx == heap->nr) return true; func->swp(data + (idx * elem_size), data + (heap->nr * elem_size), args); - __min_heap_sift_up(heap, elem_size, idx, func, args); - __min_heap_sift_down(heap, idx, elem_size, func, args); + __min_heap_sift_up_inline(heap, elem_size, idx, func, args); + __min_heap_sift_down_inline(heap, idx, elem_size, func, args); return true; } +#define min_heap_del_inline(_heap, _idx, _func, _args) \ + __min_heap_del_inline((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, \ + _func, _args) + +void __min_heap_init(min_heap_char *heap, void *data, int size); +void *__min_heap_peek(struct min_heap_char *heap); +bool __min_heap_full(min_heap_char *heap); +void __min_heap_sift_down(min_heap_char *heap, int pos, size_t elem_size, + const struct min_heap_callbacks *func, void *args); +void __min_heap_sift_up(min_heap_char *heap, size_t elem_size, size_t idx, + const struct min_heap_callbacks *func, void *args); +void __min_heapify_all(min_heap_char *heap, size_t elem_size, + const struct min_heap_callbacks *func, void *args); +bool __min_heap_pop(min_heap_char *heap, size_t elem_size, + const struct min_heap_callbacks *func, void *args); +void __min_heap_pop_push(min_heap_char *heap, const void *element, size_t elem_size, + const struct min_heap_callbacks *func, void *args); +bool __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size, + const struct min_heap_callbacks *func, void *args); +bool __min_heap_del(min_heap_char *heap, size_t elem_size, size_t idx, + const struct min_heap_callbacks *func, void *args); + +#define min_heap_init(_heap, _data, _size) \ + __min_heap_init((min_heap_char *)_heap, _data, _size) +#define min_heap_peek(_heap) \ + (__minheap_cast(_heap) __min_heap_peek((min_heap_char *)_heap)) +#define min_heap_full(_heap) \ + __min_heap_full((min_heap_char *)_heap) +#define min_heap_sift_down(_heap, _pos, _func, _args) \ + __min_heap_sift_down((min_heap_char *)_heap, _pos, __minheap_obj_size(_heap), _func, _args) +#define min_heap_sift_up(_heap, _idx, _func, _args) \ + __min_heap_sift_up((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, _func, _args) +#define min_heapify_all(_heap, _func, _args) \ + __min_heapify_all((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args) +#define min_heap_pop(_heap, _func, _args) \ + __min_heap_pop((min_heap_char *)_heap, __minheap_obj_size(_heap), _func, _args) +#define min_heap_pop_push(_heap, _element, _func, _args) \ + __min_heap_pop_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), \ + _func, _args) +#define min_heap_push(_heap, _element, _func, _args) \ + __min_heap_push((min_heap_char *)_heap, _element, __minheap_obj_size(_heap), _func, _args) #define min_heap_del(_heap, _idx, _func, _args) \ __min_heap_del((min_heap_char *)_heap, __minheap_obj_size(_heap), _idx, _func, _args) diff --git a/kernel/events/core.c b/kernel/events/core.c index df27d08a7232..1b3c1198b2af 100644 --- a/kernel/events/core.c +++ b/kernel/events/core.c @@ -3870,7 +3870,7 @@ static noinline int visit_groups_merge(struct perf_event_context *ctx, perf_assert_pmu_disabled((*evt)->pmu_ctx->pmu); } - min_heapify_all(&event_heap, &perf_min_heap, NULL); + min_heapify_all_inline(&event_heap, &perf_min_heap, NULL); while (event_heap.nr) { ret = func(*evt, data); @@ -3879,9 +3879,9 @@ static noinline int visit_groups_merge(struct perf_event_context *ctx, *evt = perf_event_groups_next(*evt, pmu); if (*evt) - min_heap_sift_down(&event_heap, 0, &perf_min_heap, NULL); + min_heap_sift_down_inline(&event_heap, 0, &perf_min_heap, NULL); else - min_heap_pop(&event_heap, &perf_min_heap, NULL); + min_heap_pop_inline(&event_heap, &perf_min_heap, NULL); } return 0; diff --git a/lib/Kconfig b/lib/Kconfig index cf303bd91dda..f5a2781669ea 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -780,3 +780,6 @@ config FIRMWARE_TABLE config UNION_FIND bool + +config MIN_HEAP + bool diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index eda319e9d569..2549b64b2280 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2279,6 +2279,7 @@ config TEST_LIST_SORT config TEST_MIN_HEAP tristate "Min heap test" depends on DEBUG_KERNEL || m + select MIN_HEAP help Enable this to turn on min heap function tests. This test is executed only once during system boot (so affects only boot time), diff --git a/lib/Makefile b/lib/Makefile index feebed74fc7a..1eb89962daef 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -40,6 +40,7 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ lib-$(CONFIG_UNION_FIND) += union_find.o lib-$(CONFIG_PRINTK) += dump_stack.o lib-$(CONFIG_SMP) += cpumask.o +lib-$(CONFIG_MIN_HEAP) += min_heap.o lib-y += kobject.o klist.o obj-y += lockref.o diff --git a/lib/min_heap.c b/lib/min_heap.c new file mode 100644 index 000000000000..4485372ff3b1 --- /dev/null +++ b/lib/min_heap.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include + +void __min_heap_init(min_heap_char *heap, void *data, int size) +{ + __min_heap_init_inline(heap, data, size); +} +EXPORT_SYMBOL(__min_heap_init); + +void *__min_heap_peek(struct min_heap_char *heap) +{ + return __min_heap_peek_inline(heap); +} +EXPORT_SYMBOL(__min_heap_peek); + +bool __min_heap_full(min_heap_char *heap) +{ + return __min_heap_full_inline(heap); +} +EXPORT_SYMBOL(__min_heap_full); + +void __min_heap_sift_down(min_heap_char *heap, int pos, size_t elem_size, + const struct min_heap_callbacks *func, void *args) +{ + __min_heap_sift_down_inline(heap, pos, elem_size, func, args); +} +EXPORT_SYMBOL(__min_heap_sift_down); + +void __min_heap_sift_up(min_heap_char *heap, size_t elem_size, size_t idx, + const struct min_heap_callbacks *func, void *args) +{ + __min_heap_sift_up_inline(heap, elem_size, idx, func, args); +} +EXPORT_SYMBOL(__min_heap_sift_up); + +void __min_heapify_all(min_heap_char *heap, size_t elem_size, + const struct min_heap_callbacks *func, void *args) +{ + __min_heapify_all_inline(heap, elem_size, func, args); +} +EXPORT_SYMBOL(__min_heapify_all); + +bool __min_heap_pop(min_heap_char *heap, size_t elem_size, + const struct min_heap_callbacks *func, void *args) +{ + return __min_heap_pop_inline(heap, elem_size, func, args); +} +EXPORT_SYMBOL(__min_heap_pop); + +void __min_heap_pop_push(min_heap_char *heap, const void *element, size_t elem_size, + const struct min_heap_callbacks *func, void *args) +{ + __min_heap_pop_push_inline(heap, element, elem_size, func, args); +} +EXPORT_SYMBOL(__min_heap_pop_push); + +bool __min_heap_push(min_heap_char *heap, const void *element, size_t elem_size, + const struct min_heap_callbacks *func, void *args) +{ + return __min_heap_push_inline(heap, element, elem_size, func, args); +} +EXPORT_SYMBOL(__min_heap_push); + +bool __min_heap_del(min_heap_char *heap, size_t elem_size, size_t idx, + const struct min_heap_callbacks *func, void *args) +{ + return __min_heap_del_inline(heap, elem_size, idx, func, args); +} +EXPORT_SYMBOL(__min_heap_del); -- cgit v1.2.3 From 2b37c814aab74130bcf2573a9dc1551301283cea Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 25 Oct 2024 16:14:50 +0200 Subject: lib/Kconfig.debug: Default STRICT_DEVMEM to "y" on s390 virtio-mem currently depends on !DEVMEM | STRICT_DEVMEM. Let's default STRICT_DEVMEM to "y" just like we do for arm64 and x86. There could be ways in the future to filter access to virtio-mem device memory even without STRICT_DEVMEM, but for now let's just keep it simple. Tested-by: Mario Casquero Acked-by: Heiko Carstens Signed-off-by: David Hildenbrand Tested-by: Sumanth Korikkar Acked-by: Christian Borntraeger Link: https://lore.kernel.org/r/20241025141453.1210600-6-david@redhat.com Signed-off-by: Heiko Carstens --- lib/Kconfig.debug | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib/Kconfig.debug') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 7315f643817a..e7a917540e2a 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1905,7 +1905,7 @@ config STRICT_DEVMEM bool "Filter access to /dev/mem" depends on MMU && DEVMEM depends on ARCH_HAS_DEVMEM_IS_ALLOWED || GENERIC_LIB_DEVMEM_IS_ALLOWED - default y if PPC || X86 || ARM64 + default y if PPC || X86 || ARM64 || S390 help If this option is disabled, you allow userspace (root) access to all of memory, including kernel and userspace memory. Accidental -- cgit v1.2.3 From 0f9b685626daa2f8e19a9788625c9b624c223e45 Mon Sep 17 00:00:00 2001 From: Suren Baghdasaryan Date: Wed, 23 Oct 2024 10:07:57 -0700 Subject: alloc_tag: populate memory for module tags as needed The memory reserved for module tags does not need to be backed by physical pages until there are tags to store there. Change the way we reserve this memory to allocate only virtual area for the tags and populate it with physical pages as needed when we load a module. [surenb@google.com: avoid execmem_vmap() when !MMU] Link: https://lkml.kernel.org/r/20241031233611.3833002-1-surenb@google.com Link: https://lkml.kernel.org/r/20241023170759.999909-5-surenb@google.com Signed-off-by: Suren Baghdasaryan Reviewed-by: Pasha Tatashin Cc: Ard Biesheuvel Cc: Arnd Bergmann Cc: Borislav Petkov (AMD) Cc: Christoph Hellwig Cc: Daniel Gomez Cc: David Hildenbrand Cc: Davidlohr Bueso Cc: David Rientjes Cc: Dennis Zhou Cc: Johannes Weiner Cc: John Hubbard Cc: Jonathan Corbet Cc: Joonsoo Kim Cc: Kalesh Singh Cc: Kees Cook Cc: Kent Overstreet Cc: Liam R. Howlett Cc: Luis Chamberlain Cc: Matthew Wilcox Cc: Michal Hocko Cc: Mike Rapoport (Microsoft) Cc: Minchan Kim Cc: Paul E. McKenney Cc: Petr Pavlu Cc: Roman Gushchin Cc: Sami Tolvanen Cc: Sourav Panda Cc: Steven Rostedt (Google) Cc: Thomas Gleixner Cc: Thomas Huth Cc: Uladzislau Rezki (Sony) Cc: Vlastimil Babka Cc: Xiongwei Song Cc: Yu Zhao Signed-off-by: Andrew Morton --- include/linux/execmem.h | 12 ++++++++ include/linux/vmalloc.h | 3 ++ lib/Kconfig.debug | 1 + lib/alloc_tag.c | 73 +++++++++++++++++++++++++++++++++++++++++++------ mm/execmem.c | 16 +++++++++++ mm/internal.h | 6 ++++ mm/vmalloc.c | 4 +-- 7 files changed, 104 insertions(+), 11 deletions(-) (limited to 'lib/Kconfig.debug') diff --git a/include/linux/execmem.h b/include/linux/execmem.h index 1517fa196bf7..64130ae19690 100644 --- a/include/linux/execmem.h +++ b/include/linux/execmem.h @@ -139,6 +139,18 @@ void *execmem_alloc(enum execmem_type type, size_t size); */ void execmem_free(void *ptr); +#ifdef CONFIG_MMU +/** + * execmem_vmap - create virtual mapping for EXECMEM_MODULE_DATA memory + * @size: size of the virtual mapping in bytes + * + * Maps virtually contiguous area in the range suitable for EXECMEM_MODULE_DATA. + * + * Return: the area descriptor on success or %NULL on failure. + */ +struct vm_struct *execmem_vmap(size_t size); +#endif + /** * execmem_update_copy - copy an update to executable memory * @dst: destination address to update diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 27408f21e501..31e9ffd936e3 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -202,6 +202,9 @@ extern int remap_vmalloc_range_partial(struct vm_area_struct *vma, extern int remap_vmalloc_range(struct vm_area_struct *vma, void *addr, unsigned long pgoff); +int vmap_pages_range(unsigned long addr, unsigned long end, pgprot_t prot, + struct page **pages, unsigned int page_shift); + /* * Architectures can set this mask to a combination of PGTBL_P?D_MODIFIED values * and let generic vmalloc and ioremap code know when arch_sync_kernel_mappings() diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 7312ae7c3cc5..6798bbbcbd32 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -993,6 +993,7 @@ config CODE_TAGGING config MEM_ALLOC_PROFILING bool "Enable memory allocation profiling" default n + depends on MMU depends on PROC_FS depends on !DEBUG_FORCE_WEAK_PER_CPU select CODE_TAGGING diff --git a/lib/alloc_tag.c b/lib/alloc_tag.c index 5f9cd1642d58..4a7fc081b789 100644 --- a/lib/alloc_tag.c +++ b/lib/alloc_tag.c @@ -8,14 +8,15 @@ #include #include #include +#include #define ALLOCINFO_FILE_NAME "allocinfo" #define MODULE_ALLOC_TAG_VMAP_SIZE (100000UL * sizeof(struct alloc_tag)) #ifdef CONFIG_MEM_ALLOC_PROFILING_ENABLED_BY_DEFAULT -static bool mem_profiling_support __meminitdata = true; +static bool mem_profiling_support = true; #else -static bool mem_profiling_support __meminitdata; +static bool mem_profiling_support; #endif static struct codetag_type *alloc_tag_cttype; @@ -154,7 +155,7 @@ size_t alloc_tag_top_users(struct codetag_bytes *tags, size_t count, bool can_sl return nr; } -static void __init shutdown_mem_profiling(void) +static void shutdown_mem_profiling(void) { if (mem_alloc_profiling_enabled()) static_branch_disable(&mem_alloc_profiling_key); @@ -179,6 +180,7 @@ static void __init procfs_init(void) #ifdef CONFIG_MODULES static struct maple_tree mod_area_mt = MTREE_INIT(mod_area_mt, MT_FLAGS_ALLOC_RANGE); +static struct vm_struct *vm_module_tags; /* A dummy object used to indicate an unloaded module */ static struct module unloaded_mod; /* A dummy object used to indicate a module prepended area */ @@ -252,6 +254,33 @@ repeat: return false; } +static int vm_module_tags_populate(void) +{ + unsigned long phys_size = vm_module_tags->nr_pages << PAGE_SHIFT; + + if (phys_size < module_tags.size) { + struct page **next_page = vm_module_tags->pages + vm_module_tags->nr_pages; + unsigned long addr = module_tags.start_addr + phys_size; + unsigned long more_pages; + unsigned long nr; + + more_pages = ALIGN(module_tags.size - phys_size, PAGE_SIZE) >> PAGE_SHIFT; + nr = alloc_pages_bulk_array_node(GFP_KERNEL | __GFP_NOWARN, + NUMA_NO_NODE, more_pages, next_page); + if (nr < more_pages || + vmap_pages_range(addr, addr + (nr << PAGE_SHIFT), PAGE_KERNEL, + next_page, PAGE_SHIFT) < 0) { + /* Clean up and error out */ + for (int i = 0; i < nr; i++) + __free_page(next_page[i]); + return -ENOMEM; + } + vm_module_tags->nr_pages += nr; + } + + return 0; +} + static void *reserve_module_tags(struct module *mod, unsigned long size, unsigned int prepend, unsigned long align) { @@ -310,8 +339,18 @@ unlock: if (IS_ERR(ret)) return ret; - if (module_tags.size < offset + size) + if (module_tags.size < offset + size) { + int grow_res; + module_tags.size = offset + size; + grow_res = vm_module_tags_populate(); + if (grow_res) { + shutdown_mem_profiling(); + pr_err("Failed to allocate memory for allocation tags in the module %s. Memory allocation profiling is disabled!\n", + mod->name); + return ERR_PTR(grow_res); + } + } return (struct alloc_tag *)(module_tags.start_addr + offset); } @@ -372,12 +411,23 @@ static void replace_module(struct module *mod, struct module *new_mod) static int __init alloc_mod_tags_mem(void) { - /* Allocate space to copy allocation tags */ - module_tags.start_addr = (unsigned long)execmem_alloc(EXECMEM_MODULE_DATA, - MODULE_ALLOC_TAG_VMAP_SIZE); - if (!module_tags.start_addr) + /* Map space to copy allocation tags */ + vm_module_tags = execmem_vmap(MODULE_ALLOC_TAG_VMAP_SIZE); + if (!vm_module_tags) { + pr_err("Failed to map %lu bytes for module allocation tags\n", + MODULE_ALLOC_TAG_VMAP_SIZE); + module_tags.start_addr = 0; return -ENOMEM; + } + vm_module_tags->pages = kmalloc_array(get_vm_area_size(vm_module_tags) >> PAGE_SHIFT, + sizeof(struct page *), GFP_KERNEL | __GFP_ZERO); + if (!vm_module_tags->pages) { + free_vm_area(vm_module_tags); + return -ENOMEM; + } + + module_tags.start_addr = (unsigned long)vm_module_tags->addr; module_tags.end_addr = module_tags.start_addr + MODULE_ALLOC_TAG_VMAP_SIZE; return 0; @@ -385,8 +435,13 @@ static int __init alloc_mod_tags_mem(void) static void __init free_mod_tags_mem(void) { - execmem_free((void *)module_tags.start_addr); + int i; + module_tags.start_addr = 0; + for (i = 0; i < vm_module_tags->nr_pages; i++) + __free_page(vm_module_tags->pages[i]); + kfree(vm_module_tags->pages); + free_vm_area(vm_module_tags); } #else /* CONFIG_MODULES */ diff --git a/mm/execmem.c b/mm/execmem.c index 576a57e2161f..317b6a8d35be 100644 --- a/mm/execmem.c +++ b/mm/execmem.c @@ -64,6 +64,22 @@ static void *execmem_vmalloc(struct execmem_range *range, size_t size, return p; } + +struct vm_struct *execmem_vmap(size_t size) +{ + struct execmem_range *range = &execmem_info->ranges[EXECMEM_MODULE_DATA]; + struct vm_struct *area; + + area = __get_vm_area_node(size, range->alignment, PAGE_SHIFT, VM_ALLOC, + range->start, range->end, NUMA_NO_NODE, + GFP_KERNEL, __builtin_return_address(0)); + if (!area && range->fallback_start) + area = __get_vm_area_node(size, range->alignment, PAGE_SHIFT, VM_ALLOC, + range->fallback_start, range->fallback_end, + NUMA_NO_NODE, GFP_KERNEL, __builtin_return_address(0)); + + return area; +} #else static void *execmem_vmalloc(struct execmem_range *range, size_t size, pgprot_t pgprot, unsigned long vm_flags) diff --git a/mm/internal.h b/mm/internal.h index 3dc745ba76dd..cd96848be245 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -1263,6 +1263,12 @@ int numa_migrate_check(struct folio *folio, struct vm_fault *vmf, void free_zone_device_folio(struct folio *folio); int migrate_device_coherent_folio(struct folio *folio); +struct vm_struct *__get_vm_area_node(unsigned long size, + unsigned long align, unsigned long shift, + unsigned long flags, unsigned long start, + unsigned long end, int node, gfp_t gfp_mask, + const void *caller); + /* * mm/gup.c */ diff --git a/mm/vmalloc.c b/mm/vmalloc.c index 74c0a5eae210..7ed39d104201 100644 --- a/mm/vmalloc.c +++ b/mm/vmalloc.c @@ -653,7 +653,7 @@ int vmap_pages_range_noflush(unsigned long addr, unsigned long end, * RETURNS: * 0 on success, -errno on failure. */ -static int vmap_pages_range(unsigned long addr, unsigned long end, +int vmap_pages_range(unsigned long addr, unsigned long end, pgprot_t prot, struct page **pages, unsigned int page_shift) { int err; @@ -3106,7 +3106,7 @@ static void clear_vm_uninitialized_flag(struct vm_struct *vm) vm->flags &= ~VM_UNINITIALIZED; } -static struct vm_struct *__get_vm_area_node(unsigned long size, +struct vm_struct *__get_vm_area_node(unsigned long size, unsigned long align, unsigned long shift, unsigned long flags, unsigned long start, unsigned long end, int node, gfp_t gfp_mask, const void *caller) -- cgit v1.2.3 From 111314157f7891da7a51a8f95df42eeb22f4268a Mon Sep 17 00:00:00 2001 From: Alexandru Ardelean Date: Tue, 5 Nov 2024 16:54:06 +0200 Subject: lib: util_macros_kunit: add kunit test for util_macros.h A bug was found in the find_closest() (find_closest_descending() is also affected after some testing), where for certain values with small progressions of 1, 2 & 3, the rounding (done by averaging 2 values) causes an incorrect index to be returned. The bug is described in more detail in the commit which fixes the bug. This commit adds a kunit test to validate that the fix works correctly. This kunit test adds some of the arrays (from the driver-sphere) that seem to produce issues with the 'find_closest()' macro. Specifically the one from ad7606 driver (with which the bug was found) and from the ina2xx drivers, which shows the quirk with 'find_closest()' with elements in a array that have an interval of 3. For the find_closest_descending() tests, the same arrays are used as for the find_closest(), but in reverse; the idea is that 'find_closest_descending()' should return the sames indices as 'find_closest()' but in reverse. For testing both macros, there are 4 special arrays created, one for testing find_closest{_descending}() for arrays of progressions 1, 2, 3 and 4. The idea is to show that (for progressions of 1, 2 & 3) the fix works as expected. When removing the fix, the issues should start to show up. Then an extra array of negative and positive values is added. There are currently no such arrays within drivers, but one could expect that these macros behave correctly even for such arrays. To run this kunit: ./tools/testing/kunit/kunit.py run "*util_macros*" Link: https://lkml.kernel.org/r/20241105145406.554365-2-aardelean@baylibre.com Signed-off-by: Alexandru Ardelean Cc: Bartosz Golaszewski Cc: Greg Kroah-Hartman Signed-off-by: Andrew Morton --- lib/Kconfig.debug | 17 ++++ lib/Makefile | 1 + lib/util_macros_kunit.c | 240 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 258 insertions(+) create mode 100644 lib/util_macros_kunit.c (limited to 'lib/Kconfig.debug') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 2549b64b2280..d7dd38f14333 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2630,6 +2630,23 @@ config CHECKSUM_KUNIT If unsure, say N. +config UTIL_MACROS_KUNIT + tristate "KUnit test util_macros.h functions at runtime" if !KUNIT_ALL_TESTS + depends on KUNIT + default KUNIT_ALL_TESTS + help + Enable this option to test the util_macros.h function at boot. + + KUnit tests run during boot and output the results to the debug log + in TAP format (http://testanything.org/). Only useful for kernel devs + running the KUnit test harness, and not intended for inclusion into a + production build. + + For more information on KUnit and unit tests in general please refer + to the KUnit documentation in Documentation/dev-tools/kunit/. + + If unsure, say N. + config HASH_KUNIT_TEST tristate "KUnit Test for integer hash functions" if !KUNIT_ALL_TESTS depends on KUNIT diff --git a/lib/Makefile b/lib/Makefile index 1eb89962daef..cc26f81722a5 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -372,6 +372,7 @@ obj-$(CONFIG_PLDMFW) += pldmfw/ CFLAGS_bitfield_kunit.o := $(DISABLE_STRUCTLEAK_PLUGIN) obj-$(CONFIG_BITFIELD_KUNIT) += bitfield_kunit.o obj-$(CONFIG_CHECKSUM_KUNIT) += checksum_kunit.o +obj-$(CONFIG_UTIL_MACROS_KUNIT) += util_macros_kunit.o obj-$(CONFIG_LIST_KUNIT_TEST) += list-test.o obj-$(CONFIG_HASHTABLE_KUNIT_TEST) += hashtable_test.o obj-$(CONFIG_LINEAR_RANGES_TEST) += test_linear_ranges.o diff --git a/lib/util_macros_kunit.c b/lib/util_macros_kunit.c new file mode 100644 index 000000000000..94cc9f0de50a --- /dev/null +++ b/lib/util_macros_kunit.c @@ -0,0 +1,240 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Test cases for bitfield helpers. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include +#include + +#define FIND_CLOSEST_RANGE_CHECK(from, to, array, exp_idx) \ +{ \ + int i; \ + for (i = from; i <= to; i++) { \ + int found = find_closest(i, array, ARRAY_SIZE(array)); \ + KUNIT_ASSERT_EQ(ctx, exp_idx, found); \ + } \ +} + +static void test_find_closest(struct kunit *ctx) +{ + /* This will test a few arrays that are found in drivers */ + static const int ina226_avg_tab[] = { 1, 4, 16, 64, 128, 256, 512, 1024 }; + static const unsigned int ad7616_oversampling_avail[] = { + 1, 2, 4, 8, 16, 32, 64, 128, + }; + static u32 wd_timeout_table[] = { 2, 4, 6, 8, 16, 32, 48, 64 }; + static int array_prog1a[] = { 1, 2, 3, 4, 5 }; + static u32 array_prog1b[] = { 2, 3, 4, 5, 6 }; + static int array_prog1mix[] = { -2, -1, 0, 1, 2 }; + static int array_prog2a[] = { 1, 3, 5, 7 }; + static u32 array_prog2b[] = { 2, 4, 6, 8 }; + static int array_prog3a[] = { 1, 4, 7, 10 }; + static u32 array_prog3b[] = { 2, 5, 8, 11 }; + static int array_prog4a[] = { 1, 5, 9, 13 }; + static u32 array_prog4b[] = { 2, 6, 10, 14 }; + + FIND_CLOSEST_RANGE_CHECK(-3, 2, ina226_avg_tab, 0); + FIND_CLOSEST_RANGE_CHECK(3, 10, ina226_avg_tab, 1); + FIND_CLOSEST_RANGE_CHECK(11, 40, ina226_avg_tab, 2); + FIND_CLOSEST_RANGE_CHECK(41, 96, ina226_avg_tab, 3); + FIND_CLOSEST_RANGE_CHECK(97, 192, ina226_avg_tab, 4); + FIND_CLOSEST_RANGE_CHECK(193, 384, ina226_avg_tab, 5); + FIND_CLOSEST_RANGE_CHECK(385, 768, ina226_avg_tab, 6); + FIND_CLOSEST_RANGE_CHECK(769, 2048, ina226_avg_tab, 7); + + /* The array that found the bug that caused this kunit to exist */ + FIND_CLOSEST_RANGE_CHECK(-3, 1, ad7616_oversampling_avail, 0); + FIND_CLOSEST_RANGE_CHECK(2, 3, ad7616_oversampling_avail, 1); + FIND_CLOSEST_RANGE_CHECK(4, 6, ad7616_oversampling_avail, 2); + FIND_CLOSEST_RANGE_CHECK(7, 12, ad7616_oversampling_avail, 3); + FIND_CLOSEST_RANGE_CHECK(13, 24, ad7616_oversampling_avail, 4); + FIND_CLOSEST_RANGE_CHECK(25, 48, ad7616_oversampling_avail, 5); + FIND_CLOSEST_RANGE_CHECK(49, 96, ad7616_oversampling_avail, 6); + FIND_CLOSEST_RANGE_CHECK(97, 256, ad7616_oversampling_avail, 7); + + FIND_CLOSEST_RANGE_CHECK(-3, 3, wd_timeout_table, 0); + FIND_CLOSEST_RANGE_CHECK(4, 5, wd_timeout_table, 1); + FIND_CLOSEST_RANGE_CHECK(6, 7, wd_timeout_table, 2); + FIND_CLOSEST_RANGE_CHECK(8, 12, wd_timeout_table, 3); + FIND_CLOSEST_RANGE_CHECK(13, 24, wd_timeout_table, 4); + FIND_CLOSEST_RANGE_CHECK(25, 40, wd_timeout_table, 5); + FIND_CLOSEST_RANGE_CHECK(41, 56, wd_timeout_table, 6); + FIND_CLOSEST_RANGE_CHECK(57, 128, wd_timeout_table, 7); + + /* One could argue that find_closest() should not be used for monotonic + * arrays (like 1,2,3,4,5), but even so, it should work as long as the + * array is sorted ascending. */ + FIND_CLOSEST_RANGE_CHECK(-3, 1, array_prog1a, 0); + FIND_CLOSEST_RANGE_CHECK(2, 2, array_prog1a, 1); + FIND_CLOSEST_RANGE_CHECK(3, 3, array_prog1a, 2); + FIND_CLOSEST_RANGE_CHECK(4, 4, array_prog1a, 3); + FIND_CLOSEST_RANGE_CHECK(5, 8, array_prog1a, 4); + + FIND_CLOSEST_RANGE_CHECK(-3, 2, array_prog1b, 0); + FIND_CLOSEST_RANGE_CHECK(3, 3, array_prog1b, 1); + FIND_CLOSEST_RANGE_CHECK(4, 4, array_prog1b, 2); + FIND_CLOSEST_RANGE_CHECK(5, 5, array_prog1b, 3); + FIND_CLOSEST_RANGE_CHECK(6, 8, array_prog1b, 4); + + FIND_CLOSEST_RANGE_CHECK(-4, -2, array_prog1mix, 0); + FIND_CLOSEST_RANGE_CHECK(-1, -1, array_prog1mix, 1); + FIND_CLOSEST_RANGE_CHECK(0, 0, array_prog1mix, 2); + FIND_CLOSEST_RANGE_CHECK(1, 1, array_prog1mix, 3); + FIND_CLOSEST_RANGE_CHECK(2, 5, array_prog1mix, 4); + + FIND_CLOSEST_RANGE_CHECK(-3, 2, array_prog2a, 0); + FIND_CLOSEST_RANGE_CHECK(3, 4, array_prog2a, 1); + FIND_CLOSEST_RANGE_CHECK(5, 6, array_prog2a, 2); + FIND_CLOSEST_RANGE_CHECK(7, 10, array_prog2a, 3); + + FIND_CLOSEST_RANGE_CHECK(-3, 3, array_prog2b, 0); + FIND_CLOSEST_RANGE_CHECK(4, 5, array_prog2b, 1); + FIND_CLOSEST_RANGE_CHECK(6, 7, array_prog2b, 2); + FIND_CLOSEST_RANGE_CHECK(8, 10, array_prog2b, 3); + + FIND_CLOSEST_RANGE_CHECK(-3, 2, array_prog3a, 0); + FIND_CLOSEST_RANGE_CHECK(3, 5, array_prog3a, 1); + FIND_CLOSEST_RANGE_CHECK(6, 8, array_prog3a, 2); + FIND_CLOSEST_RANGE_CHECK(9, 20, array_prog3a, 3); + + FIND_CLOSEST_RANGE_CHECK(-3, 3, array_prog3b, 0); + FIND_CLOSEST_RANGE_CHECK(4, 6, array_prog3b, 1); + FIND_CLOSEST_RANGE_CHECK(7, 9, array_prog3b, 2); + FIND_CLOSEST_RANGE_CHECK(10, 20, array_prog3b, 3); + + FIND_CLOSEST_RANGE_CHECK(-3, 3, array_prog4a, 0); + FIND_CLOSEST_RANGE_CHECK(4, 7, array_prog4a, 1); + FIND_CLOSEST_RANGE_CHECK(8, 11, array_prog4a, 2); + FIND_CLOSEST_RANGE_CHECK(12, 20, array_prog4a, 3); + + FIND_CLOSEST_RANGE_CHECK(-3, 4, array_prog4b, 0); + FIND_CLOSEST_RANGE_CHECK(5, 8, array_prog4b, 1); + FIND_CLOSEST_RANGE_CHECK(9, 12, array_prog4b, 2); + FIND_CLOSEST_RANGE_CHECK(13, 20, array_prog4b, 3); +} + +#define FIND_CLOSEST_DESC_RANGE_CHECK(from, to, array, exp_idx) \ +{ \ + int i; \ + for (i = from; i <= to; i++) { \ + int found = find_closest_descending(i, array, \ + ARRAY_SIZE(array)); \ + KUNIT_ASSERT_EQ(ctx, exp_idx, found); \ + } \ +} + +static void test_find_closest_descending(struct kunit *ctx) +{ + /* Same arrays as 'test_find_closest' but reversed */ + static const int ina226_avg_tab[] = { 1024, 512, 256, 128, 64, 16, 4, 1 }; + static const unsigned int ad7616_oversampling_avail[] = { + 128, 64, 32, 16, 8, 4, 2, 1 + }; + static u32 wd_timeout_table[] = { 64, 48, 32, 16, 8, 6, 4, 2 }; + static int array_prog1a[] = { 5, 4, 3, 2, 1 }; + static u32 array_prog1b[] = { 6, 5, 4, 3, 2 }; + static int array_prog1mix[] = { 2, 1, 0, -1, -2 }; + static int array_prog2a[] = { 7, 5, 3, 1 }; + static u32 array_prog2b[] = { 8, 6, 4, 2 }; + static int array_prog3a[] = { 10, 7, 4, 1 }; + static u32 array_prog3b[] = { 11, 8, 5, 2 }; + static int array_prog4a[] = { 13, 9, 5, 1 }; + static u32 array_prog4b[] = { 14, 10, 6, 2 }; + + FIND_CLOSEST_DESC_RANGE_CHECK(-3, 2, ina226_avg_tab, 7); + FIND_CLOSEST_DESC_RANGE_CHECK(3, 10, ina226_avg_tab, 6); + FIND_CLOSEST_DESC_RANGE_CHECK(11, 40, ina226_avg_tab, 5); + FIND_CLOSEST_DESC_RANGE_CHECK(41, 96, ina226_avg_tab, 4); + FIND_CLOSEST_DESC_RANGE_CHECK(97, 192, ina226_avg_tab, 3); + FIND_CLOSEST_DESC_RANGE_CHECK(193, 384, ina226_avg_tab, 2); + FIND_CLOSEST_DESC_RANGE_CHECK(385, 768, ina226_avg_tab, 1); + FIND_CLOSEST_DESC_RANGE_CHECK(769, 2048, ina226_avg_tab, 0); + + FIND_CLOSEST_DESC_RANGE_CHECK(-3, 1, ad7616_oversampling_avail, 7); + FIND_CLOSEST_DESC_RANGE_CHECK(2, 3, ad7616_oversampling_avail, 6); + FIND_CLOSEST_DESC_RANGE_CHECK(4, 6, ad7616_oversampling_avail, 5); + FIND_CLOSEST_DESC_RANGE_CHECK(7, 12, ad7616_oversampling_avail, 4); + FIND_CLOSEST_DESC_RANGE_CHECK(13, 24, ad7616_oversampling_avail, 3); + FIND_CLOSEST_DESC_RANGE_CHECK(25, 48, ad7616_oversampling_avail, 2); + FIND_CLOSEST_DESC_RANGE_CHECK(49, 96, ad7616_oversampling_avail, 1); + FIND_CLOSEST_DESC_RANGE_CHECK(97, 256, ad7616_oversampling_avail, 0); + + FIND_CLOSEST_DESC_RANGE_CHECK(-3, 3, wd_timeout_table, 7); + FIND_CLOSEST_DESC_RANGE_CHECK(4, 5, wd_timeout_table, 6); + FIND_CLOSEST_DESC_RANGE_CHECK(6, 7, wd_timeout_table, 5); + FIND_CLOSEST_DESC_RANGE_CHECK(8, 12, wd_timeout_table, 4); + FIND_CLOSEST_DESC_RANGE_CHECK(13, 24, wd_timeout_table, 3); + FIND_CLOSEST_DESC_RANGE_CHECK(25, 40, wd_timeout_table, 2); + FIND_CLOSEST_DESC_RANGE_CHECK(41, 56, wd_timeout_table, 1); + FIND_CLOSEST_DESC_RANGE_CHECK(57, 128, wd_timeout_table, 0); + + /* One could argue that find_closest_descending() should not be used + * for monotonic arrays (like 5,4,3,2,1), but even so, it should still + * it should work as long as the array is sorted descending. */ + FIND_CLOSEST_DESC_RANGE_CHECK(-3, 1, array_prog1a, 4); + FIND_CLOSEST_DESC_RANGE_CHECK(2, 2, array_prog1a, 3); + FIND_CLOSEST_DESC_RANGE_CHECK(3, 3, array_prog1a, 2); + FIND_CLOSEST_DESC_RANGE_CHECK(4, 4, array_prog1a, 1); + FIND_CLOSEST_DESC_RANGE_CHECK(5, 8, array_prog1a, 0); + + FIND_CLOSEST_DESC_RANGE_CHECK(-3, 2, array_prog1b, 4); + FIND_CLOSEST_DESC_RANGE_CHECK(3, 3, array_prog1b, 3); + FIND_CLOSEST_DESC_RANGE_CHECK(4, 4, array_prog1b, 2); + FIND_CLOSEST_DESC_RANGE_CHECK(5, 5, array_prog1b, 1); + FIND_CLOSEST_DESC_RANGE_CHECK(6, 8, array_prog1b, 0); + + FIND_CLOSEST_DESC_RANGE_CHECK(-4, -2, array_prog1mix, 4); + FIND_CLOSEST_DESC_RANGE_CHECK(-1, -1, array_prog1mix, 3); + FIND_CLOSEST_DESC_RANGE_CHECK(0, 0, array_prog1mix, 2); + FIND_CLOSEST_DESC_RANGE_CHECK(1, 1, array_prog1mix, 1); + FIND_CLOSEST_DESC_RANGE_CHECK(2, 5, array_prog1mix, 0); + + FIND_CLOSEST_DESC_RANGE_CHECK(-3, 2, array_prog2a, 3); + FIND_CLOSEST_DESC_RANGE_CHECK(3, 4, array_prog2a, 2); + FIND_CLOSEST_DESC_RANGE_CHECK(5, 6, array_prog2a, 1); + FIND_CLOSEST_DESC_RANGE_CHECK(7, 10, array_prog2a, 0); + + FIND_CLOSEST_DESC_RANGE_CHECK(-3, 3, array_prog2b, 3); + FIND_CLOSEST_DESC_RANGE_CHECK(4, 5, array_prog2b, 2); + FIND_CLOSEST_DESC_RANGE_CHECK(6, 7, array_prog2b, 1); + FIND_CLOSEST_DESC_RANGE_CHECK(8, 10, array_prog2b, 0); + + FIND_CLOSEST_DESC_RANGE_CHECK(-3, 2, array_prog3a, 3); + FIND_CLOSEST_DESC_RANGE_CHECK(3, 5, array_prog3a, 2); + FIND_CLOSEST_DESC_RANGE_CHECK(6, 8, array_prog3a, 1); + FIND_CLOSEST_DESC_RANGE_CHECK(9, 20, array_prog3a, 0); + + FIND_CLOSEST_DESC_RANGE_CHECK(-3, 3, array_prog3b, 3); + FIND_CLOSEST_DESC_RANGE_CHECK(4, 6, array_prog3b, 2); + FIND_CLOSEST_DESC_RANGE_CHECK(7, 9, array_prog3b, 1); + FIND_CLOSEST_DESC_RANGE_CHECK(10, 20, array_prog3b, 0); + + FIND_CLOSEST_DESC_RANGE_CHECK(-3, 3, array_prog4a, 3); + FIND_CLOSEST_DESC_RANGE_CHECK(4, 7, array_prog4a, 2); + FIND_CLOSEST_DESC_RANGE_CHECK(8, 11, array_prog4a, 1); + FIND_CLOSEST_DESC_RANGE_CHECK(12, 20, array_prog4a, 0); + + FIND_CLOSEST_DESC_RANGE_CHECK(-3, 4, array_prog4b, 3); + FIND_CLOSEST_DESC_RANGE_CHECK(5, 8, array_prog4b, 2); + FIND_CLOSEST_DESC_RANGE_CHECK(9, 12, array_prog4b, 1); + FIND_CLOSEST_DESC_RANGE_CHECK(13, 20, array_prog4b, 0); +} + +static struct kunit_case __refdata util_macros_test_cases[] = { + KUNIT_CASE(test_find_closest), + KUNIT_CASE(test_find_closest_descending), + {} +}; + +static struct kunit_suite util_macros_test_suite = { + .name = "util_macros.h", + .test_cases = util_macros_test_cases, +}; + +kunit_test_suites(&util_macros_test_suite); + +MODULE_AUTHOR("Alexandru Ardelean "); +MODULE_DESCRIPTION("Test cases for util_macros.h helpers"); +MODULE_LICENSE("GPL"); -- cgit v1.2.3 From 12079a59ce52e72a342c49cfacf0281213fd6f32 Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Thu, 7 Nov 2024 08:11:44 -0800 Subject: net: Implement fault injection forcing skb reallocation Introduce a fault injection mechanism to force skb reallocation. The primary goal is to catch bugs related to pointer invalidation after potential skb reallocation. The fault injection mechanism aims to identify scenarios where callers retain pointers to various headers in the skb but fail to reload these pointers after calling a function that may reallocate the data. This type of bug can lead to memory corruption or crashes if the old, now-invalid pointers are used. By forcing reallocation through fault injection, we can stress-test code paths and ensure proper pointer management after potential skb reallocations. Add a hook for fault injection in the following functions: * pskb_trim_rcsum() * pskb_may_pull_reason() * pskb_trim() As the other fault injection mechanism, protect it under a debug Kconfig called CONFIG_FAIL_SKB_REALLOC. This patch was *heavily* inspired by Jakub's proposal from: https://lore.kernel.org/all/20240719174140.47a868e6@kernel.org/ CC: Akinobu Mita Suggested-by: Jakub Kicinski Signed-off-by: Breno Leitao Reviewed-by: Akinobu Mita Acked-by: Paolo Abeni Acked-by: Guillaume Nault Link: https://patch.msgid.link/20241107-fault_v6-v6-1-1b82cb6ecacd@debian.org Signed-off-by: Paolo Abeni --- Documentation/admin-guide/kernel-parameters.txt | 1 + Documentation/fault-injection/fault-injection.rst | 40 ++++++++ include/linux/skbuff.h | 9 ++ lib/Kconfig.debug | 10 ++ net/core/Makefile | 1 + net/core/skb_fault_injection.c | 106 ++++++++++++++++++++++ 6 files changed, 167 insertions(+) create mode 100644 net/core/skb_fault_injection.c (limited to 'lib/Kconfig.debug') diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt index 1518343bbe22..2fb830453dcc 100644 --- a/Documentation/admin-guide/kernel-parameters.txt +++ b/Documentation/admin-guide/kernel-parameters.txt @@ -1546,6 +1546,7 @@ failslab= fail_usercopy= fail_page_alloc= + fail_skb_realloc= fail_make_request=[KNL] General fault injection mechanism. Format: ,,, diff --git a/Documentation/fault-injection/fault-injection.rst b/Documentation/fault-injection/fault-injection.rst index 8b8aeea71c68..1c14ba08fbfc 100644 --- a/Documentation/fault-injection/fault-injection.rst +++ b/Documentation/fault-injection/fault-injection.rst @@ -45,6 +45,32 @@ Available fault injection capabilities ALLOW_ERROR_INJECTION() macro, by setting debugfs entries under /sys/kernel/debug/fail_function. No boot option supported. +- fail_skb_realloc + + inject skb (socket buffer) reallocation events into the network path. The + primary goal is to identify and prevent issues related to pointer + mismanagement in the network subsystem. By forcing skb reallocation at + strategic points, this feature creates scenarios where existing pointers to + skb headers become invalid. + + When the fault is injected and the reallocation is triggered, cached pointers + to skb headers and data no longer reference valid memory locations. This + deliberate invalidation helps expose code paths where proper pointer updating + is neglected after a reallocation event. + + By creating these controlled fault scenarios, the system can catch instances + where stale pointers are used, potentially leading to memory corruption or + system instability. + + To select the interface to act on, write the network name to + /sys/kernel/debug/fail_skb_realloc/devname. + If this field is left empty (which is the default value), skb reallocation + will be forced on all network interfaces. + + The effectiveness of this fault detection is enhanced when KASAN is + enabled, as it helps identify invalid memory references and use-after-free + (UAF) issues. + - NVMe fault injection inject NVMe status code and retry flag on devices permitted by setting @@ -216,6 +242,19 @@ configuration of fault-injection capabilities. use a negative errno, you better use 'printf' instead of 'echo', e.g.: $ printf %#x -12 > retval +- /sys/kernel/debug/fail_skb_realloc/devname: + + Specifies the network interface on which to force SKB reallocation. If + left empty, SKB reallocation will be applied to all network interfaces. + + Example usage:: + + # Force skb reallocation on eth0 + echo "eth0" > /sys/kernel/debug/fail_skb_realloc/devname + + # Clear the selection and force skb reallocation on all interfaces + echo "" > /sys/kernel/debug/fail_skb_realloc/devname + Boot option ^^^^^^^^^^^ @@ -227,6 +266,7 @@ use the boot option:: fail_usercopy= fail_make_request= fail_futex= + fail_skb_realloc= mmc_core.fail_request=,,, proc entries diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 60535c706851..58009fa66102 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2682,6 +2682,12 @@ static inline void skb_assert_len(struct sk_buff *skb) #endif /* CONFIG_DEBUG_NET */ } +#if defined(CONFIG_FAIL_SKB_REALLOC) +void skb_might_realloc(struct sk_buff *skb); +#else +static inline void skb_might_realloc(struct sk_buff *skb) {} +#endif + /* * Add data to an sk_buff */ @@ -2782,6 +2788,7 @@ static inline enum skb_drop_reason pskb_may_pull_reason(struct sk_buff *skb, unsigned int len) { DEBUG_NET_WARN_ON_ONCE(len > INT_MAX); + skb_might_realloc(skb); if (likely(len <= skb_headlen(skb))) return SKB_NOT_DROPPED_YET; @@ -3240,6 +3247,7 @@ static inline int __pskb_trim(struct sk_buff *skb, unsigned int len) static inline int pskb_trim(struct sk_buff *skb, unsigned int len) { + skb_might_realloc(skb); return (len < skb->len) ? __pskb_trim(skb, len) : 0; } @@ -3994,6 +4002,7 @@ int pskb_trim_rcsum_slow(struct sk_buff *skb, unsigned int len); static inline int pskb_trim_rcsum(struct sk_buff *skb, unsigned int len) { + skb_might_realloc(skb); if (likely(len >= skb->len)) return 0; return pskb_trim_rcsum_slow(skb, len); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 7312ae7c3cc5..67b669d2e70e 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -2115,6 +2115,16 @@ config FAIL_SUNRPC Provide fault-injection capability for SunRPC and its consumers. +config FAIL_SKB_REALLOC + bool "Fault-injection capability forcing skb to reallocate" + depends on FAULT_INJECTION_DEBUG_FS + help + Provide fault-injection capability that forces the skb to be + reallocated, catching possible invalid pointers to the skb. + + For more information, check + Documentation/dev-tools/fault-injection/fault-injection.rst + config FAULT_INJECTION_CONFIGFS bool "Configfs interface for fault-injection capabilities" depends on FAULT_INJECTION diff --git a/net/core/Makefile b/net/core/Makefile index 5a72a87ee0f1..d9326600e289 100644 --- a/net/core/Makefile +++ b/net/core/Makefile @@ -46,3 +46,4 @@ obj-$(CONFIG_OF) += of_net.o obj-$(CONFIG_NET_TEST) += net_test.o obj-$(CONFIG_NET_DEVMEM) += devmem.o obj-$(CONFIG_DEBUG_NET_SMALL_RTNL) += rtnl_net_debug.o +obj-$(CONFIG_FAIL_SKB_REALLOC) += skb_fault_injection.o diff --git a/net/core/skb_fault_injection.c b/net/core/skb_fault_injection.c new file mode 100644 index 000000000000..4235db6bdfad --- /dev/null +++ b/net/core/skb_fault_injection.c @@ -0,0 +1,106 @@ +// SPDX-License-Identifier: GPL-2.0-only + +#include +#include +#include +#include + +static struct { + struct fault_attr attr; + char devname[IFNAMSIZ]; + bool filtered; +} skb_realloc = { + .attr = FAULT_ATTR_INITIALIZER, + .filtered = false, +}; + +static bool should_fail_net_realloc_skb(struct sk_buff *skb) +{ + struct net_device *net = skb->dev; + + if (skb_realloc.filtered && + strncmp(net->name, skb_realloc.devname, IFNAMSIZ)) + /* device name filter set, but names do not match */ + return false; + + if (!should_fail(&skb_realloc.attr, 1)) + return false; + + return true; +} +ALLOW_ERROR_INJECTION(should_fail_net_realloc_skb, TRUE); + +void skb_might_realloc(struct sk_buff *skb) +{ + if (!should_fail_net_realloc_skb(skb)) + return; + + pskb_expand_head(skb, 0, 0, GFP_ATOMIC); +} +EXPORT_SYMBOL(skb_might_realloc); + +static int __init fail_skb_realloc_setup(char *str) +{ + return setup_fault_attr(&skb_realloc.attr, str); +} +__setup("fail_skb_realloc=", fail_skb_realloc_setup); + +static void reset_settings(void) +{ + skb_realloc.filtered = false; + memset(&skb_realloc.devname, 0, IFNAMSIZ); +} + +static ssize_t devname_write(struct file *file, const char __user *buffer, + size_t count, loff_t *ppos) +{ + ssize_t ret; + + reset_settings(); + ret = simple_write_to_buffer(&skb_realloc.devname, IFNAMSIZ, + ppos, buffer, count); + if (ret < 0) + return ret; + + skb_realloc.devname[IFNAMSIZ - 1] = '\0'; + /* Remove a possible \n at the end of devname */ + strim(skb_realloc.devname); + + if (strnlen(skb_realloc.devname, IFNAMSIZ)) + skb_realloc.filtered = true; + + return count; +} + +static ssize_t devname_read(struct file *file, + char __user *buffer, + size_t size, loff_t *ppos) +{ + if (!skb_realloc.filtered) + return 0; + + return simple_read_from_buffer(buffer, size, ppos, &skb_realloc.devname, + strlen(skb_realloc.devname)); +} + +static const struct file_operations devname_ops = { + .write = devname_write, + .read = devname_read, +}; + +static int __init fail_skb_realloc_debugfs(void) +{ + umode_t mode = S_IFREG | 0600; + struct dentry *dir; + + dir = fault_create_debugfs_attr("fail_skb_realloc", NULL, + &skb_realloc.attr); + if (IS_ERR(dir)) + return PTR_ERR(dir); + + debugfs_create_file("devname", mode, dir, NULL, &devname_ops); + + return 0; +} + +late_initcall(fail_skb_realloc_debugfs); -- cgit v1.2.3 From 3e1d95b63c97506d0d98c75fc72a60662981a3c6 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Wed, 27 Nov 2024 19:06:03 -0800 Subject: selftests: kallsyms: fix and clarify current test boundaries Provide and clarify the existing ranges and what you should expect. Fix the gen_test_kallsyms.sh script to accept different ranges. Fixes: 84b4a51fce4ccc66 ("selftests: add new kallsyms selftests") Signed-off-by: Luis Chamberlain --- lib/Kconfig.debug | 32 +++++++++++++++++++++++++++++++- lib/tests/module/gen_test_kallsyms.sh | 9 +++++++-- 2 files changed, 38 insertions(+), 3 deletions(-) (limited to 'lib/Kconfig.debug') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index f340017585c5..f3d723705879 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -3003,9 +3003,39 @@ config TEST_KALLSYMS_D tristate depends on m +choice + prompt "Kallsym test range" + default TEST_KALLSYMS_LARGE + help + Selecting something other than "Fast" will enable tests which slow + down the build and may crash your build. + +config TEST_KALLSYMS_FAST + bool "Fast builds" + help + You won't really be testing kallsysms, so this just helps fast builds + when allmodconfig is used.. + +config TEST_KALLSYMS_LARGE + bool "Enable testing kallsyms with large exports" + help + This will enable larger number of symbols. This will slow down + your build considerably. + +config TEST_KALLSYMS_MAX + bool "Known kallsysms limits" + help + This will enable exports to the point we know we'll start crashing + builds. + +endchoice + config TEST_KALLSYMS_NUMSYMS int "test kallsyms number of symbols" - default 100 + range 2 10000 + default 2 if TEST_KALLSYMS_FAST + default 100 if TEST_KALLSYMS_LARGE + default 10000 if TEST_KALLSYMS_MAX help The number of symbols to create on TEST_KALLSYMS_A, only one of which module TEST_KALLSYMS_B will use. This also will be used diff --git a/lib/tests/module/gen_test_kallsyms.sh b/lib/tests/module/gen_test_kallsyms.sh index 3f2c626350ad..561dcac0f359 100755 --- a/lib/tests/module/gen_test_kallsyms.sh +++ b/lib/tests/module/gen_test_kallsyms.sh @@ -7,6 +7,11 @@ NUM_SYMS=$2 SCALE_FACTOR=$3 TEST_TYPE=$(echo $TARGET | sed -e 's|lib/tests/module/test_kallsyms_||g') TEST_TYPE=$(echo $TEST_TYPE | sed -e 's|.c||g') +FIRST_B_LOOKUP=1 + +if [[ $NUM_SYMS -gt 2 ]]; then + FIRST_B_LOOKUP=$((NUM_SYMS/2)) +fi gen_template_module_header() { @@ -52,10 +57,10 @@ ____END_MODULE gen_template_module_data_b() { - printf "\nextern int auto_test_a_%010d;\n\n" 28 + printf "\nextern int auto_test_a_%010d;\n\n" $FIRST_B_LOOKUP echo "static int auto_runtime_test(void)" echo "{" - printf "\nreturn auto_test_a_%010d;\n" 28 + printf "\nreturn auto_test_a_%010d;\n" $FIRST_B_LOOKUP echo "}" } -- cgit v1.2.3