diff options
Diffstat (limited to 'lib')
39 files changed, 2640 insertions, 464 deletions
diff --git a/lib/Kconfig b/lib/Kconfig index 0c8b78a9ae2e..6762529ad9e4 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -158,6 +158,14 @@ config CRC32_BIT endchoice +config CRC4 + tristate "CRC4 functions" + help + This option is provided for the case where no in-kernel-tree + modules require CRC4 functions, but a module built outside + the kernel tree does. Such modules that use library CRC4 + functions require M here. + config CRC7 tristate "CRC7 functions" help @@ -548,6 +556,9 @@ config ARCH_HAS_SG_CHAIN config ARCH_HAS_PMEM_API bool +config ARCH_HAS_UACCESS_FLUSHCACHE + bool + config ARCH_HAS_MMIO_FLUSH bool diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index e4587ebe52c7..98fe715522e8 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -286,7 +286,7 @@ config DEBUG_FS write to these files. For detailed documentation on the debugfs API, see - Documentation/DocBook/filesystems. + Documentation/filesystems/. If unsure, say N. @@ -778,34 +778,45 @@ config DEBUG_SHIRQ menu "Debug Lockups and Hangs" config LOCKUP_DETECTOR - bool "Detect Hard and Soft Lockups" + bool + +config SOFTLOCKUP_DETECTOR + bool "Detect Soft Lockups" depends on DEBUG_KERNEL && !S390 + select LOCKUP_DETECTOR help Say Y here to enable the kernel to act as a watchdog to detect - hard and soft lockups. + soft lockups. Softlockups are bugs that cause the kernel to loop in kernel mode for more than 20 seconds, without giving other tasks a chance to run. The current stack trace is displayed upon detection and the system will stay locked up. +config HARDLOCKUP_DETECTOR_PERF + bool + select SOFTLOCKUP_DETECTOR + +# +# arch/ can define HAVE_HARDLOCKUP_DETECTOR_ARCH to provide their own hard +# lockup detector rather than the perf based detector. +# +config HARDLOCKUP_DETECTOR + bool "Detect Hard Lockups" + depends on DEBUG_KERNEL && !S390 + depends on HAVE_HARDLOCKUP_DETECTOR_PERF || HAVE_HARDLOCKUP_DETECTOR_ARCH + select LOCKUP_DETECTOR + select HARDLOCKUP_DETECTOR_PERF if HAVE_HARDLOCKUP_DETECTOR_PERF + select HARDLOCKUP_DETECTOR_ARCH if HAVE_HARDLOCKUP_DETECTOR_ARCH + help + Say Y here to enable the kernel to act as a watchdog to detect + hard lockups. + Hardlockups are bugs that cause the CPU to loop in kernel mode for more than 10 seconds, without letting other interrupts have a chance to run. The current stack trace is displayed upon detection and the system will stay locked up. - The overhead should be minimal. A periodic hrtimer runs to - generate interrupts and kick the watchdog task every 4 seconds. - An NMI is generated every 10 seconds or so to check for hardlockups. - - The frequency of hrtimer and NMI events and the soft and hard lockup - thresholds can be controlled through the sysctl watchdog_thresh. - -config HARDLOCKUP_DETECTOR - def_bool y - depends on LOCKUP_DETECTOR && !HAVE_NMI_WATCHDOG - depends on PERF_EVENTS && HAVE_PERF_EVENTS_NMI - config BOOTPARAM_HARDLOCKUP_PANIC bool "Panic (Reboot) On Hard Lockups" depends on HARDLOCKUP_DETECTOR @@ -826,7 +837,7 @@ config BOOTPARAM_HARDLOCKUP_PANIC_VALUE config BOOTPARAM_SOFTLOCKUP_PANIC bool "Panic (Reboot) On Soft Lockups" - depends on LOCKUP_DETECTOR + depends on SOFTLOCKUP_DETECTOR help Say Y here to enable the kernel to panic on "soft lockups", which are bugs that cause the kernel to loop in kernel @@ -843,7 +854,7 @@ config BOOTPARAM_SOFTLOCKUP_PANIC config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE int - depends on LOCKUP_DETECTOR + depends on SOFTLOCKUP_DETECTOR range 0 1 default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC default 1 if BOOTPARAM_SOFTLOCKUP_PANIC @@ -851,7 +862,7 @@ config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE config DETECT_HUNG_TASK bool "Detect Hung Tasks" depends on DEBUG_KERNEL - default LOCKUP_DETECTOR + default SOFTLOCKUP_DETECTOR help Say Y here to enable the kernel to detect "hung tasks", which are bugs that cause the task to be stuck in @@ -1052,6 +1063,7 @@ config DEBUG_LOCK_ALLOC depends on DEBUG_KERNEL && TRACE_IRQFLAGS_SUPPORT && STACKTRACE_SUPPORT && LOCKDEP_SUPPORT select DEBUG_SPINLOCK select DEBUG_MUTEXES + select DEBUG_RT_MUTEXES if RT_MUTEXES select LOCKDEP help This feature will check whether any held lock (spinlock, rwlock, @@ -1067,6 +1079,7 @@ config PROVE_LOCKING select LOCKDEP select DEBUG_SPINLOCK select DEBUG_MUTEXES + select DEBUG_RT_MUTEXES if RT_MUTEXES select DEBUG_LOCK_ALLOC select TRACE_IRQFLAGS default n @@ -1121,6 +1134,7 @@ config LOCK_STAT select LOCKDEP select DEBUG_SPINLOCK select DEBUG_MUTEXES + select DEBUG_RT_MUTEXES if RT_MUTEXES select DEBUG_LOCK_ALLOC default n help @@ -1209,6 +1223,34 @@ config STACKTRACE It is also used by various kernel debugging features that require stack trace generation. +config WARN_ALL_UNSEEDED_RANDOM + bool "Warn for all uses of unseeded randomness" + default n + help + Some parts of the kernel contain bugs relating to their use of + cryptographically secure random numbers before it's actually possible + to generate those numbers securely. This setting ensures that these + flaws don't go unnoticed, by enabling a message, should this ever + occur. This will allow people with obscure setups to know when things + are going wrong, so that they might contact developers about fixing + it. + + Unfortunately, on some models of some architectures getting + a fully seeded CRNG is extremely difficult, and so this can + result in dmesg getting spammed for a surprisingly long + time. This is really bad from a security perspective, and + so architecture maintainers really need to do what they can + to get the CRNG seeded sooner after the system is booted. + However, since users can not do anything actionble to + address this, by default the kernel will issue only a single + warning for the first use of unseeded randomness. + + Say Y here if you want to receive warnings for all uses of + unseeded randomness. This will be of use primarily for + those developers interersted in improving the security of + Linux kernels running on their architecture (or + subarchitecture). + config DEBUG_KOBJECT bool "kobject debugging" depends on DEBUG_KERNEL @@ -1301,189 +1343,7 @@ config DEBUG_CREDENTIALS If unsure, say N. -menu "RCU Debugging" - -config PROVE_RCU - def_bool PROVE_LOCKING - -config PROVE_RCU_REPEATEDLY - bool "RCU debugging: don't disable PROVE_RCU on first splat" - depends on PROVE_RCU - default n - help - By itself, PROVE_RCU will disable checking upon issuing the - first warning (or "splat"). This feature prevents such - disabling, allowing multiple RCU-lockdep warnings to be printed - on a single reboot. - - Say Y to allow multiple RCU-lockdep warnings per boot. - - Say N if you are unsure. - -config SPARSE_RCU_POINTER - bool "RCU debugging: sparse-based checks for pointer usage" - default n - help - This feature enables the __rcu sparse annotation for - RCU-protected pointers. This annotation will cause sparse - to flag any non-RCU used of annotated pointers. This can be - helpful when debugging RCU usage. Please note that this feature - is not intended to enforce code cleanliness; it is instead merely - a debugging aid. - - Say Y to make sparse flag questionable use of RCU-protected pointers - - Say N if you are unsure. - -config TORTURE_TEST - tristate - default n - -config RCU_PERF_TEST - tristate "performance tests for RCU" - depends on DEBUG_KERNEL - select TORTURE_TEST - select SRCU - select TASKS_RCU - default n - help - This option provides a kernel module that runs performance - tests on the RCU infrastructure. The kernel module may be built - after the fact on the running kernel to be tested, if desired. - - Say Y here if you want RCU performance tests to be built into - the kernel. - Say M if you want the RCU performance tests to build as a module. - Say N if you are unsure. - -config RCU_TORTURE_TEST - tristate "torture tests for RCU" - depends on DEBUG_KERNEL - select TORTURE_TEST - select SRCU - select TASKS_RCU - default n - help - This option provides a kernel module that runs torture tests - on the RCU infrastructure. The kernel module may be built - after the fact on the running kernel to be tested, if desired. - - Say Y here if you want RCU torture tests to be built into - the kernel. - Say M if you want the RCU torture tests to build as a module. - Say N if you are unsure. - -config RCU_TORTURE_TEST_SLOW_PREINIT - bool "Slow down RCU grace-period pre-initialization to expose races" - depends on RCU_TORTURE_TEST - help - This option delays grace-period pre-initialization (the - propagation of CPU-hotplug changes up the rcu_node combining - tree) for a few jiffies between initializing each pair of - consecutive rcu_node structures. This helps to expose races - involving grace-period pre-initialization, in other words, it - makes your kernel less stable. It can also greatly increase - grace-period latency, especially on systems with large numbers - of CPUs. This is useful when torture-testing RCU, but in - almost no other circumstance. - - Say Y here if you want your system to crash and hang more often. - Say N if you want a sane system. - -config RCU_TORTURE_TEST_SLOW_PREINIT_DELAY - int "How much to slow down RCU grace-period pre-initialization" - range 0 5 - default 3 - depends on RCU_TORTURE_TEST_SLOW_PREINIT - help - This option specifies the number of jiffies to wait between - each rcu_node structure pre-initialization step. - -config RCU_TORTURE_TEST_SLOW_INIT - bool "Slow down RCU grace-period initialization to expose races" - depends on RCU_TORTURE_TEST - help - This option delays grace-period initialization for a few - jiffies between initializing each pair of consecutive - rcu_node structures. This helps to expose races involving - grace-period initialization, in other words, it makes your - kernel less stable. It can also greatly increase grace-period - latency, especially on systems with large numbers of CPUs. - This is useful when torture-testing RCU, but in almost no - other circumstance. - - Say Y here if you want your system to crash and hang more often. - Say N if you want a sane system. - -config RCU_TORTURE_TEST_SLOW_INIT_DELAY - int "How much to slow down RCU grace-period initialization" - range 0 5 - default 3 - depends on RCU_TORTURE_TEST_SLOW_INIT - help - This option specifies the number of jiffies to wait between - each rcu_node structure initialization. - -config RCU_TORTURE_TEST_SLOW_CLEANUP - bool "Slow down RCU grace-period cleanup to expose races" - depends on RCU_TORTURE_TEST - help - This option delays grace-period cleanup for a few jiffies - between cleaning up each pair of consecutive rcu_node - structures. This helps to expose races involving grace-period - cleanup, in other words, it makes your kernel less stable. - It can also greatly increase grace-period latency, especially - on systems with large numbers of CPUs. This is useful when - torture-testing RCU, but in almost no other circumstance. - - Say Y here if you want your system to crash and hang more often. - Say N if you want a sane system. - -config RCU_TORTURE_TEST_SLOW_CLEANUP_DELAY - int "How much to slow down RCU grace-period cleanup" - range 0 5 - default 3 - depends on RCU_TORTURE_TEST_SLOW_CLEANUP - help - This option specifies the number of jiffies to wait between - each rcu_node structure cleanup operation. - -config RCU_CPU_STALL_TIMEOUT - int "RCU CPU stall timeout in seconds" - depends on RCU_STALL_COMMON - range 3 300 - default 21 - help - If a given RCU grace period extends more than the specified - number of seconds, a CPU stall warning is printed. If the - RCU grace period persists, additional CPU stall warnings are - printed at more widely spaced intervals. - -config RCU_TRACE - bool "Enable tracing for RCU" - depends on DEBUG_KERNEL - default y if TREE_RCU - select TRACE_CLOCK - help - This option provides tracing in RCU which presents stats - in debugfs for debugging RCU implementation. It also enables - additional tracepoints for ftrace-style event tracing. - - Say Y here if you want to enable RCU tracing - Say N if you are unsure. - -config RCU_EQS_DEBUG - bool "Provide debugging asserts for adding NO_HZ support to an arch" - depends on DEBUG_KERNEL - help - This option provides consistency checks in RCU's handling of - NO_HZ. These checks have proven quite helpful in detecting - bugs in arch-specific NO_HZ code. - - Say N here if you need ultimate kernel/user switch latencies - Say Y if you are unsure - -endmenu # "RCU Debugging" +source "kernel/rcu/Kconfig.debug" config DEBUG_WQ_FORCE_RR_CPU bool "Force round-robin CPU selection for unbound work items" @@ -1773,7 +1633,7 @@ config RBTREE_TEST config INTERVAL_TREE_TEST tristate "Interval tree test" - depends on m && DEBUG_KERNEL + depends on DEBUG_KERNEL select INTERVAL_TREE help A benchmark measuring the performance of the interval tree library @@ -1964,6 +1824,17 @@ config TEST_FIRMWARE If unsure, say N. +config TEST_SYSCTL + tristate "sysctl test driver" + default n + depends on PROC_SYSCTL + help + This builds the "test_sysctl" module. This driver enables to test the + proc sysctl interfaces available to drivers safely without affecting + production knobs which might alter system functionality. + + If unsure, say N. + config TEST_UDELAY tristate "udelay test driver" default n @@ -2004,6 +1875,33 @@ config BUG_ON_DATA_CORRUPTION If unsure, say N. +config TEST_KMOD + tristate "kmod stress tester" + default n + depends on m + depends on BLOCK && (64BIT || LBDAF) # for XFS, BTRFS + depends on NETDEVICES && NET_CORE && INET # for TUN + select TEST_LKM + select XFS_FS + select TUN + select BTRFS_FS + help + Test the kernel's module loading mechanism: kmod. kmod implements + support to load modules using the Linux kernel's usermode helper. + This test provides a series of tests against kmod. + + Although technically you can either build test_kmod as a module or + into the kernel we disallow building it into the kernel since + it stress tests request_module() and this will very likely cause + some issues by taking over precious threads available from other + module load requests, ultimately this could be fatal. + + To run tests run: + + tools/testing/selftests/kmod/kmod.sh --help + + If unsure, say N. + source "samples/Kconfig" source "lib/Kconfig.kgdb" diff --git a/lib/Kconfig.kgdb b/lib/Kconfig.kgdb index 533f912638ed..ab4ff0eea776 100644 --- a/lib/Kconfig.kgdb +++ b/lib/Kconfig.kgdb @@ -13,7 +13,7 @@ menuconfig KGDB CONFIG_FRAME_POINTER to aid in producing more reliable stack backtraces in the external debugger. Documentation of kernel debugger is available at http://kgdb.sourceforge.net - as well as in DocBook form in Documentation/DocBook/. If + as well as in Documentation/dev-tools/kgdb.rst. If unsure, say N. if KGDB diff --git a/lib/Makefile b/lib/Makefile index 0166fbc0fa81..40c18372b301 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -25,9 +25,6 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ earlycpio.o seq_buf.o siphash.o \ nmi_backtrace.o nodemask.o win_minmax.o -CFLAGS_radix-tree.o += -DCONFIG_SPARSE_RCU_POINTER -CFLAGS_idr.o += -DCONFIG_SPARSE_RCU_POINTER - lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o lib-$(CONFIG_DMA_NOOP_OPS) += dma-noop.o @@ -41,7 +38,7 @@ obj-y += bcd.o div64.o sort.o parser.o debug_locks.o random32.o \ gcd.o lcm.o list_sort.o uuid.o flex_array.o iov_iter.o clz_ctz.o \ bsearch.o find_bit.o llist.o memweight.o kfifo.o \ percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o \ - once.o refcount.o usercopy.o + once.o refcount.o usercopy.o errseq.o obj-y += string_helpers.o obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o obj-y += hexdump.o @@ -49,6 +46,7 @@ obj-$(CONFIG_TEST_HEXDUMP) += test_hexdump.o obj-y += kstrtox.o obj-$(CONFIG_TEST_BPF) += test_bpf.o obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o +obj-$(CONFIG_TEST_SYSCTL) += test_sysctl.o obj-$(CONFIG_TEST_HASH) += test_hash.o test_siphash.o obj-$(CONFIG_TEST_KASAN) += test_kasan.o obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o @@ -63,6 +61,7 @@ obj-$(CONFIG_TEST_PRINTF) += test_printf.o obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o obj-$(CONFIG_TEST_UUID) += test_uuid.o obj-$(CONFIG_TEST_PARMAN) += test_parman.o +obj-$(CONFIG_TEST_KMOD) += test_kmod.o ifeq ($(CONFIG_DEBUG_KOBJECT),y) CFLAGS_kobject.o += -DDEBUG @@ -99,6 +98,7 @@ obj-$(CONFIG_CRC_T10DIF)+= crc-t10dif.o obj-$(CONFIG_CRC_ITU_T) += crc-itu-t.o obj-$(CONFIG_CRC32) += crc32.o obj-$(CONFIG_CRC32_SELFTEST) += crc32test.o +obj-$(CONFIG_CRC4) += crc4.o obj-$(CONFIG_CRC7) += crc7.o obj-$(CONFIG_LIBCRC32C) += libcrc32c.o obj-$(CONFIG_CRC8) += crc8.o diff --git a/lib/atomic64_test.c b/lib/atomic64_test.c index fd70c0e0e673..62ab629f51ca 100644 --- a/lib/atomic64_test.c +++ b/lib/atomic64_test.c @@ -153,8 +153,10 @@ static __init void test_atomic64(void) long long v0 = 0xaaa31337c001d00dLL; long long v1 = 0xdeadbeefdeafcafeLL; long long v2 = 0xfaceabadf00df001LL; + long long v3 = 0x8000000000000000LL; long long onestwos = 0x1111111122222222LL; long long one = 1LL; + int r_int; atomic64_t v = ATOMIC64_INIT(v0); long long r = v0; @@ -240,6 +242,11 @@ static __init void test_atomic64(void) BUG_ON(!atomic64_inc_not_zero(&v)); r += one; BUG_ON(v.counter != r); + + /* Confirm the return value fits in an int, even if the value doesn't */ + INIT(v3); + r_int = atomic64_inc_not_zero(&v); + BUG_ON(!r_int); } static __init int test_atomics_init(void) diff --git a/lib/bitmap.c b/lib/bitmap.c index 08c6ef3a2b6f..9a532805364b 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -251,7 +251,7 @@ int __bitmap_weight(const unsigned long *bitmap, unsigned int bits) } EXPORT_SYMBOL(__bitmap_weight); -void bitmap_set(unsigned long *map, unsigned int start, int len) +void __bitmap_set(unsigned long *map, unsigned int start, int len) { unsigned long *p = map + BIT_WORD(start); const unsigned int size = start + len; @@ -270,9 +270,9 @@ void bitmap_set(unsigned long *map, unsigned int start, int len) *p |= mask_to_set; } } -EXPORT_SYMBOL(bitmap_set); +EXPORT_SYMBOL(__bitmap_set); -void bitmap_clear(unsigned long *map, unsigned int start, int len) +void __bitmap_clear(unsigned long *map, unsigned int start, int len) { unsigned long *p = map + BIT_WORD(start); const unsigned int size = start + len; @@ -291,7 +291,7 @@ void bitmap_clear(unsigned long *map, unsigned int start, int len) *p &= ~mask_to_clear; } } -EXPORT_SYMBOL(bitmap_clear); +EXPORT_SYMBOL(__bitmap_clear); /** * bitmap_find_next_zero_area_off - find a contiguous aligned zero area diff --git a/lib/bsearch.c b/lib/bsearch.c index e33c179089db..18b445b010c3 100644 --- a/lib/bsearch.c +++ b/lib/bsearch.c @@ -33,19 +33,21 @@ void *bsearch(const void *key, const void *base, size_t num, size_t size, int (*cmp)(const void *key, const void *elt)) { - size_t start = 0, end = num; + const char *pivot; int result; - while (start < end) { - size_t mid = start + (end - start) / 2; + while (num > 0) { + pivot = base + (num >> 1) * size; + result = cmp(key, pivot); - result = cmp(key, base + mid * size); - if (result < 0) - end = mid; - else if (result > 0) - start = mid + 1; - else - return (void *)base + mid * size; + if (result == 0) + return (void *)pivot; + + if (result > 0) { + base = pivot + size; + num--; + } + num >>= 1; } return NULL; diff --git a/lib/cpumask.c b/lib/cpumask.c index 81dedaab36cc..4731a0895760 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -43,6 +43,38 @@ int cpumask_any_but(const struct cpumask *mask, unsigned int cpu) } EXPORT_SYMBOL(cpumask_any_but); +/** + * cpumask_next_wrap - helper to implement for_each_cpu_wrap + * @n: the cpu prior to the place to search + * @mask: the cpumask pointer + * @start: the start point of the iteration + * @wrap: assume @n crossing @start terminates the iteration + * + * Returns >= nr_cpu_ids on completion + * + * Note: the @wrap argument is required for the start condition when + * we cannot assume @start is set in @mask. + */ +int cpumask_next_wrap(int n, const struct cpumask *mask, int start, bool wrap) +{ + int next; + +again: + next = cpumask_next(n, mask); + + if (wrap && n < start && next >= start) { + return nr_cpumask_bits; + + } else if (next >= nr_cpumask_bits) { + wrap = true; + n = -1; + goto again; + } + + return next; +} +EXPORT_SYMBOL(cpumask_next_wrap); + /* These are not inline because of header tangles. */ #ifdef CONFIG_CPUMASK_OFFSTACK /** diff --git a/lib/crc4.c b/lib/crc4.c new file mode 100644 index 000000000000..cf6db46661be --- /dev/null +++ b/lib/crc4.c @@ -0,0 +1,46 @@ +/* + * crc4.c - simple crc-4 calculations. + * + * This source code is licensed under the GNU General Public License, Version + * 2. See the file COPYING for more details. + */ + +#include <linux/crc4.h> +#include <linux/module.h> + +static const uint8_t crc4_tab[] = { + 0x0, 0x7, 0xe, 0x9, 0xb, 0xc, 0x5, 0x2, + 0x1, 0x6, 0xf, 0x8, 0xa, 0xd, 0x4, 0x3, +}; + +/** + * crc4 - calculate the 4-bit crc of a value. + * @crc: starting crc4 + * @x: value to checksum + * @bits: number of bits in @x to checksum + * + * Returns the crc4 value of @x, using polynomial 0b10111. + * + * The @x value is treated as left-aligned, and bits above @bits are ignored + * in the crc calculations. + */ +uint8_t crc4(uint8_t c, uint64_t x, int bits) +{ + int i; + + /* mask off anything above the top bit */ + x &= (1ull << bits) - 1; + + /* Align to 4-bits */ + bits = (bits + 3) & ~0x3; + + /* Calculate crc4 over four-bit nibbles, starting at the MSbit */ + for (i = bits - 4; i >= 0; i -= 4) + c = crc4_tab[c ^ ((x >> i) & 0xf)]; + + return c; +} +EXPORT_SYMBOL_GPL(crc4); + +MODULE_DESCRIPTION("CRC4 calculations"); +MODULE_LICENSE("GPL"); diff --git a/lib/dma-noop.c b/lib/dma-noop.c index de26c8b68f34..acc4190e2731 100644 --- a/lib/dma-noop.c +++ b/lib/dma-noop.c @@ -7,6 +7,7 @@ #include <linux/mm.h> #include <linux/dma-mapping.h> #include <linux/scatterlist.h> +#include <linux/pfn.h> static void *dma_noop_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, gfp_t gfp, @@ -16,7 +17,8 @@ static void *dma_noop_alloc(struct device *dev, size_t size, ret = (void *)__get_free_pages(gfp, get_order(size)); if (ret) - *dma_handle = virt_to_phys(ret); + *dma_handle = virt_to_phys(ret) - PFN_PHYS(dev->dma_pfn_offset); + return ret; } @@ -32,7 +34,7 @@ static dma_addr_t dma_noop_map_page(struct device *dev, struct page *page, enum dma_data_direction dir, unsigned long attrs) { - return page_to_phys(page) + offset; + return page_to_phys(page) + offset - PFN_PHYS(dev->dma_pfn_offset); } static int dma_noop_map_sg(struct device *dev, struct scatterlist *sgl, int nents, @@ -43,34 +45,23 @@ static int dma_noop_map_sg(struct device *dev, struct scatterlist *sgl, int nent struct scatterlist *sg; for_each_sg(sgl, sg, nents, i) { + dma_addr_t offset = PFN_PHYS(dev->dma_pfn_offset); void *va; BUG_ON(!sg_page(sg)); va = sg_virt(sg); - sg_dma_address(sg) = (dma_addr_t)virt_to_phys(va); + sg_dma_address(sg) = (dma_addr_t)virt_to_phys(va) - offset; sg_dma_len(sg) = sg->length; } return nents; } -static int dma_noop_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - return 0; -} - -static int dma_noop_supported(struct device *dev, u64 mask) -{ - return 1; -} - const struct dma_map_ops dma_noop_ops = { .alloc = dma_noop_alloc, .free = dma_noop_free, .map_page = dma_noop_map_page, .map_sg = dma_noop_map_sg, - .mapping_error = dma_noop_mapping_error, - .dma_supported = dma_noop_supported, }; EXPORT_SYMBOL(dma_noop_ops); diff --git a/lib/dma-virt.c b/lib/dma-virt.c index dcd4df1f7174..5c4f11329721 100644 --- a/lib/dma-virt.c +++ b/lib/dma-virt.c @@ -51,22 +51,10 @@ static int dma_virt_map_sg(struct device *dev, struct scatterlist *sgl, return nents; } -static int dma_virt_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - return false; -} - -static int dma_virt_supported(struct device *dev, u64 mask) -{ - return true; -} - const struct dma_map_ops dma_virt_ops = { .alloc = dma_virt_alloc, .free = dma_virt_free, .map_page = dma_virt_map_page, .map_sg = dma_virt_map_sg, - .mapping_error = dma_virt_mapping_error, - .dma_supported = dma_virt_supported, }; EXPORT_SYMBOL(dma_virt_ops); diff --git a/lib/errseq.c b/lib/errseq.c new file mode 100644 index 000000000000..841fa24e6e00 --- /dev/null +++ b/lib/errseq.c @@ -0,0 +1,208 @@ +#include <linux/err.h> +#include <linux/bug.h> +#include <linux/atomic.h> +#include <linux/errseq.h> + +/* + * An errseq_t is a way of recording errors in one place, and allowing any + * number of "subscribers" to tell whether it has changed since a previous + * point where it was sampled. + * + * It's implemented as an unsigned 32-bit value. The low order bits are + * designated to hold an error code (between 0 and -MAX_ERRNO). The upper bits + * are used as a counter. This is done with atomics instead of locking so that + * these functions can be called from any context. + * + * The general idea is for consumers to sample an errseq_t value. That value + * can later be used to tell whether any new errors have occurred since that + * sampling was done. + * + * Note that there is a risk of collisions if new errors are being recorded + * frequently, since we have so few bits to use as a counter. + * + * To mitigate this, one bit is used as a flag to tell whether the value has + * been sampled since a new value was recorded. That allows us to avoid bumping + * the counter if no one has sampled it since the last time an error was + * recorded. + * + * A new errseq_t should always be zeroed out. A errseq_t value of all zeroes + * is the special (but common) case where there has never been an error. An all + * zero value thus serves as the "epoch" if one wishes to know whether there + * has ever been an error set since it was first initialized. + */ + +/* The low bits are designated for error code (max of MAX_ERRNO) */ +#define ERRSEQ_SHIFT ilog2(MAX_ERRNO + 1) + +/* This bit is used as a flag to indicate whether the value has been seen */ +#define ERRSEQ_SEEN (1 << ERRSEQ_SHIFT) + +/* The lowest bit of the counter */ +#define ERRSEQ_CTR_INC (1 << (ERRSEQ_SHIFT + 1)) + +/** + * __errseq_set - set a errseq_t for later reporting + * @eseq: errseq_t field that should be set + * @err: error to set + * + * This function sets the error in *eseq, and increments the sequence counter + * if the last sequence was sampled at some point in the past. + * + * Any error set will always overwrite an existing error. + * + * Most callers will want to use the errseq_set inline wrapper to efficiently + * handle the common case where err is 0. + * + * We do return an errseq_t here, primarily for debugging purposes. The return + * value should not be used as a previously sampled value in later calls as it + * will not have the SEEN flag set. + */ +errseq_t __errseq_set(errseq_t *eseq, int err) +{ + errseq_t cur, old; + + /* MAX_ERRNO must be able to serve as a mask */ + BUILD_BUG_ON_NOT_POWER_OF_2(MAX_ERRNO + 1); + + /* + * Ensure the error code actually fits where we want it to go. If it + * doesn't then just throw a warning and don't record anything. We + * also don't accept zero here as that would effectively clear a + * previous error. + */ + old = READ_ONCE(*eseq); + + if (WARN(unlikely(err == 0 || (unsigned int)-err > MAX_ERRNO), + "err = %d\n", err)) + return old; + + for (;;) { + errseq_t new; + + /* Clear out error bits and set new error */ + new = (old & ~(MAX_ERRNO|ERRSEQ_SEEN)) | -err; + + /* Only increment if someone has looked at it */ + if (old & ERRSEQ_SEEN) + new += ERRSEQ_CTR_INC; + + /* If there would be no change, then call it done */ + if (new == old) { + cur = new; + break; + } + + /* Try to swap the new value into place */ + cur = cmpxchg(eseq, old, new); + + /* + * Call it success if we did the swap or someone else beat us + * to it for the same value. + */ + if (likely(cur == old || cur == new)) + break; + + /* Raced with an update, try again */ + old = cur; + } + return cur; +} +EXPORT_SYMBOL(__errseq_set); + +/** + * errseq_sample - grab current errseq_t value + * @eseq: pointer to errseq_t to be sampled + * + * This function allows callers to sample an errseq_t value, marking it as + * "seen" if required. + */ +errseq_t errseq_sample(errseq_t *eseq) +{ + errseq_t old = READ_ONCE(*eseq); + errseq_t new = old; + + /* + * For the common case of no errors ever having been set, we can skip + * marking the SEEN bit. Once an error has been set, the value will + * never go back to zero. + */ + if (old != 0) { + new |= ERRSEQ_SEEN; + if (old != new) + cmpxchg(eseq, old, new); + } + return new; +} +EXPORT_SYMBOL(errseq_sample); + +/** + * errseq_check - has an error occurred since a particular sample point? + * @eseq: pointer to errseq_t value to be checked + * @since: previously-sampled errseq_t from which to check + * + * Grab the value that eseq points to, and see if it has changed "since" + * the given value was sampled. The "since" value is not advanced, so there + * is no need to mark the value as seen. + * + * Returns the latest error set in the errseq_t or 0 if it hasn't changed. + */ +int errseq_check(errseq_t *eseq, errseq_t since) +{ + errseq_t cur = READ_ONCE(*eseq); + + if (likely(cur == since)) + return 0; + return -(cur & MAX_ERRNO); +} +EXPORT_SYMBOL(errseq_check); + +/** + * errseq_check_and_advance - check an errseq_t and advance to current value + * @eseq: pointer to value being checked and reported + * @since: pointer to previously-sampled errseq_t to check against and advance + * + * Grab the eseq value, and see whether it matches the value that "since" + * points to. If it does, then just return 0. + * + * If it doesn't, then the value has changed. Set the "seen" flag, and try to + * swap it into place as the new eseq value. Then, set that value as the new + * "since" value, and return whatever the error portion is set to. + * + * Note that no locking is provided here for concurrent updates to the "since" + * value. The caller must provide that if necessary. Because of this, callers + * may want to do a lockless errseq_check before taking the lock and calling + * this. + */ +int errseq_check_and_advance(errseq_t *eseq, errseq_t *since) +{ + int err = 0; + errseq_t old, new; + + /* + * Most callers will want to use the inline wrapper to check this, + * so that the common case of no error is handled without needing + * to take the lock that protects the "since" value. + */ + old = READ_ONCE(*eseq); + if (old != *since) { + /* + * Set the flag and try to swap it into place if it has + * changed. + * + * We don't care about the outcome of the swap here. If the + * swap doesn't occur, then it has either been updated by a + * writer who is altering the value in some way (updating + * counter or resetting the error), or another reader who is + * just setting the "seen" flag. Either outcome is OK, and we + * can advance "since" and return an error based on what we + * have. + */ + new = old | ERRSEQ_SEEN; + if (new != old) + cmpxchg(eseq, old, new); + *since = new; + err = -(new & MAX_ERRNO); + } + return err; +} +EXPORT_SYMBOL(errseq_check_and_advance); diff --git a/lib/extable.c b/lib/extable.c index 62968daa66a9..f54996fdd0b8 100644 --- a/lib/extable.c +++ b/lib/extable.c @@ -9,6 +9,7 @@ * 2 of the License, or (at your option) any later version. */ +#include <linux/bsearch.h> #include <linux/module.h> #include <linux/init.h> #include <linux/sort.h> @@ -51,7 +52,7 @@ static void swap_ex(void *a, void *b, int size) * This is used both for the kernel exception table and for * the exception tables of modules that get loaded. */ -static int cmp_ex(const void *a, const void *b) +static int cmp_ex_sort(const void *a, const void *b) { const struct exception_table_entry *x = a, *y = b; @@ -67,7 +68,7 @@ void sort_extable(struct exception_table_entry *start, struct exception_table_entry *finish) { sort(start, finish - start, sizeof(struct exception_table_entry), - cmp_ex, swap_ex); + cmp_ex_sort, swap_ex); } #ifdef CONFIG_MODULES @@ -93,6 +94,20 @@ void trim_init_extable(struct module *m) #endif /* !ARCH_HAS_SORT_EXTABLE */ #ifndef ARCH_HAS_SEARCH_EXTABLE + +static int cmp_ex_search(const void *key, const void *elt) +{ + const struct exception_table_entry *_elt = elt; + unsigned long _key = *(unsigned long *)key; + + /* avoid overflow */ + if (_key > ex_to_insn(_elt)) + return 1; + if (_key < ex_to_insn(_elt)) + return -1; + return 0; +} + /* * Search one exception table for an entry corresponding to the * given instruction address, and return the address of the entry, @@ -101,25 +116,11 @@ void trim_init_extable(struct module *m) * already sorted. */ const struct exception_table_entry * -search_extable(const struct exception_table_entry *first, - const struct exception_table_entry *last, +search_extable(const struct exception_table_entry *base, + const size_t num, unsigned long value) { - while (first <= last) { - const struct exception_table_entry *mid; - - mid = ((last - first) >> 1) + first; - /* - * careful, the distance between value and insn - * can be larger than MAX_LONG: - */ - if (ex_to_insn(mid) < value) - first = mid + 1; - else if (ex_to_insn(mid) > value) - last = mid - 1; - else - return mid; - } - return NULL; + return bsearch(&value, base, num, + sizeof(struct exception_table_entry), cmp_ex_search); } #endif diff --git a/lib/fault-inject.c b/lib/fault-inject.c index 4ff157159a0d..7d315fdb9f13 100644 --- a/lib/fault-inject.c +++ b/lib/fault-inject.c @@ -107,6 +107,15 @@ static inline bool fail_stacktrace(struct fault_attr *attr) bool should_fail(struct fault_attr *attr, ssize_t size) { + if (in_task()) { + unsigned int fail_nth = READ_ONCE(current->fail_nth); + + if (fail_nth && !WRITE_ONCE(current->fail_nth, fail_nth - 1)) + goto fail; + + return false; + } + /* No need to check any other properties if the probability is 0 */ if (attr->probability == 0) return false; @@ -134,6 +143,7 @@ bool should_fail(struct fault_attr *attr, ssize_t size) if (!fail_stacktrace(attr)) return false; +fail: fail_dump(attr); if (atomic_read(&attr->times) != -1) diff --git a/lib/flex_proportions.c b/lib/flex_proportions.c index a71cf1bdd4c9..2cc1f94e03a1 100644 --- a/lib/flex_proportions.c +++ b/lib/flex_proportions.c @@ -207,7 +207,7 @@ static void fprop_reflect_period_percpu(struct fprop_global *p, if (val < (nr_cpu_ids * PROP_BATCH)) val = percpu_counter_sum(&pl->events); - __percpu_counter_add(&pl->events, + percpu_counter_add_batch(&pl->events, -val + (val >> (period-pl->period)), PROP_BATCH); } else percpu_counter_set(&pl->events, 0); @@ -219,7 +219,7 @@ static void fprop_reflect_period_percpu(struct fprop_global *p, void __fprop_inc_percpu(struct fprop_global *p, struct fprop_local_percpu *pl) { fprop_reflect_period_percpu(p, pl); - __percpu_counter_add(&pl->events, 1, PROP_BATCH); + percpu_counter_add_batch(&pl->events, 1, PROP_BATCH); percpu_counter_add(&p->events, 1); } @@ -267,6 +267,6 @@ void __fprop_inc_percpu_max(struct fprop_global *p, return; } else fprop_reflect_period_percpu(p, pl); - __percpu_counter_add(&pl->events, 1, PROP_BATCH); + percpu_counter_add_batch(&pl->events, 1, PROP_BATCH); percpu_counter_add(&p->events, 1); } diff --git a/lib/interval_tree_test.c b/lib/interval_tree_test.c index 245900b98c8e..df495fe81421 100644 --- a/lib/interval_tree_test.c +++ b/lib/interval_tree_test.c @@ -1,27 +1,38 @@ #include <linux/module.h> +#include <linux/moduleparam.h> #include <linux/interval_tree.h> #include <linux/random.h> +#include <linux/slab.h> #include <asm/timex.h> -#define NODES 100 -#define PERF_LOOPS 100000 -#define SEARCHES 100 -#define SEARCH_LOOPS 10000 +#define __param(type, name, init, msg) \ + static type name = init; \ + module_param(name, type, 0444); \ + MODULE_PARM_DESC(name, msg); + +__param(int, nnodes, 100, "Number of nodes in the interval tree"); +__param(int, perf_loops, 100000, "Number of iterations modifying the tree"); + +__param(int, nsearches, 100, "Number of searches to the interval tree"); +__param(int, search_loops, 10000, "Number of iterations searching the tree"); +__param(bool, search_all, false, "Searches will iterate all nodes in the tree"); + +__param(uint, max_endpoint, ~0, "Largest value for the interval's endpoint"); static struct rb_root root = RB_ROOT; -static struct interval_tree_node nodes[NODES]; -static u32 queries[SEARCHES]; +static struct interval_tree_node *nodes = NULL; +static u32 *queries = NULL; static struct rnd_state rnd; static inline unsigned long -search(unsigned long query, struct rb_root *root) +search(struct rb_root *root, unsigned long start, unsigned long last) { struct interval_tree_node *node; unsigned long results = 0; - for (node = interval_tree_iter_first(root, query, query); node; - node = interval_tree_iter_next(node, query, query)) + for (node = interval_tree_iter_first(root, start, last); node; + node = interval_tree_iter_next(node, start, last)) results++; return results; } @@ -29,19 +40,22 @@ search(unsigned long query, struct rb_root *root) static void init(void) { int i; - for (i = 0; i < NODES; i++) { - u32 a = prandom_u32_state(&rnd); - u32 b = prandom_u32_state(&rnd); - if (a <= b) { - nodes[i].start = a; - nodes[i].last = b; - } else { - nodes[i].start = b; - nodes[i].last = a; - } + + for (i = 0; i < nnodes; i++) { + u32 b = (prandom_u32_state(&rnd) >> 4) % max_endpoint; + u32 a = (prandom_u32_state(&rnd) >> 4) % b; + + nodes[i].start = a; + nodes[i].last = b; } - for (i = 0; i < SEARCHES; i++) - queries[i] = prandom_u32_state(&rnd); + + /* + * Limit the search scope to what the user defined. + * Otherwise we are merely measuring empty walks, + * which is pointless. + */ + for (i = 0; i < nsearches; i++) + queries[i] = (prandom_u32_state(&rnd) >> 4) % max_endpoint; } static int interval_tree_test_init(void) @@ -50,6 +64,16 @@ static int interval_tree_test_init(void) unsigned long results; cycles_t time1, time2, time; + nodes = kmalloc(nnodes * sizeof(struct interval_tree_node), GFP_KERNEL); + if (!nodes) + return -ENOMEM; + + queries = kmalloc(nsearches * sizeof(int), GFP_KERNEL); + if (!queries) { + kfree(nodes); + return -ENOMEM; + } + printk(KERN_ALERT "interval tree insert/remove"); prandom_seed_state(&rnd, 3141592653589793238ULL); @@ -57,39 +81,46 @@ static int interval_tree_test_init(void) time1 = get_cycles(); - for (i = 0; i < PERF_LOOPS; i++) { - for (j = 0; j < NODES; j++) + for (i = 0; i < perf_loops; i++) { + for (j = 0; j < nnodes; j++) interval_tree_insert(nodes + j, &root); - for (j = 0; j < NODES; j++) + for (j = 0; j < nnodes; j++) interval_tree_remove(nodes + j, &root); } time2 = get_cycles(); time = time2 - time1; - time = div_u64(time, PERF_LOOPS); + time = div_u64(time, perf_loops); printk(" -> %llu cycles\n", (unsigned long long)time); printk(KERN_ALERT "interval tree search"); - for (j = 0; j < NODES; j++) + for (j = 0; j < nnodes; j++) interval_tree_insert(nodes + j, &root); time1 = get_cycles(); results = 0; - for (i = 0; i < SEARCH_LOOPS; i++) - for (j = 0; j < SEARCHES; j++) - results += search(queries[j], &root); + for (i = 0; i < search_loops; i++) + for (j = 0; j < nsearches; j++) { + unsigned long start = search_all ? 0 : queries[j]; + unsigned long last = search_all ? max_endpoint : queries[j]; + + results += search(&root, start, last); + } time2 = get_cycles(); time = time2 - time1; - time = div_u64(time, SEARCH_LOOPS); - results = div_u64(results, SEARCH_LOOPS); + time = div_u64(time, search_loops); + results = div_u64(results, search_loops); printk(" -> %llu cycles (%lu results)\n", (unsigned long long)time, results); + kfree(queries); + kfree(nodes); + return -EAGAIN; /* Fail will directly unload the module */ } diff --git a/lib/iov_iter.c b/lib/iov_iter.c index f835964c9485..52c8dd6d8e82 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -130,6 +130,24 @@ } \ } +static int copyout(void __user *to, const void *from, size_t n) +{ + if (access_ok(VERIFY_WRITE, to, n)) { + kasan_check_read(from, n); + n = raw_copy_to_user(to, from, n); + } + return n; +} + +static int copyin(void *to, const void __user *from, size_t n) +{ + if (access_ok(VERIFY_READ, from, n)) { + kasan_check_write(to, n); + n = raw_copy_from_user(to, from, n); + } + return n; +} + static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes, struct iov_iter *i) { @@ -144,6 +162,7 @@ static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t b if (unlikely(!bytes)) return 0; + might_fault(); wanted = bytes; iov = i->iov; skip = i->iov_offset; @@ -155,7 +174,7 @@ static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t b from = kaddr + offset; /* first chunk, usually the only one */ - left = __copy_to_user_inatomic(buf, from, copy); + left = copyout(buf, from, copy); copy -= left; skip += copy; from += copy; @@ -165,7 +184,7 @@ static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t b iov++; buf = iov->iov_base; copy = min(bytes, iov->iov_len); - left = __copy_to_user_inatomic(buf, from, copy); + left = copyout(buf, from, copy); copy -= left; skip = copy; from += copy; @@ -184,7 +203,7 @@ static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t b kaddr = kmap(page); from = kaddr + offset; - left = __copy_to_user(buf, from, copy); + left = copyout(buf, from, copy); copy -= left; skip += copy; from += copy; @@ -193,7 +212,7 @@ static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t b iov++; buf = iov->iov_base; copy = min(bytes, iov->iov_len); - left = __copy_to_user(buf, from, copy); + left = copyout(buf, from, copy); copy -= left; skip = copy; from += copy; @@ -227,6 +246,7 @@ static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t if (unlikely(!bytes)) return 0; + might_fault(); wanted = bytes; iov = i->iov; skip = i->iov_offset; @@ -238,7 +258,7 @@ static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t to = kaddr + offset; /* first chunk, usually the only one */ - left = __copy_from_user_inatomic(to, buf, copy); + left = copyin(to, buf, copy); copy -= left; skip += copy; to += copy; @@ -248,7 +268,7 @@ static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t iov++; buf = iov->iov_base; copy = min(bytes, iov->iov_len); - left = __copy_from_user_inatomic(to, buf, copy); + left = copyin(to, buf, copy); copy -= left; skip = copy; to += copy; @@ -267,7 +287,7 @@ static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t kaddr = kmap(page); to = kaddr + offset; - left = __copy_from_user(to, buf, copy); + left = copyin(to, buf, copy); copy -= left; skip += copy; to += copy; @@ -276,7 +296,7 @@ static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t iov++; buf = iov->iov_base; copy = min(bytes, iov->iov_len); - left = __copy_from_user(to, buf, copy); + left = copyin(to, buf, copy); copy -= left; skip = copy; to += copy; @@ -535,14 +555,15 @@ static size_t copy_pipe_to_iter(const void *addr, size_t bytes, return bytes; } -size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) +size_t _copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) { const char *from = addr; if (unlikely(i->type & ITER_PIPE)) return copy_pipe_to_iter(addr, bytes, i); + if (iter_is_iovec(i)) + might_fault(); iterate_and_advance(i, bytes, v, - __copy_to_user(v.iov_base, (from += v.iov_len) - v.iov_len, - v.iov_len), + copyout(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len), memcpy_to_page(v.bv_page, v.bv_offset, (from += v.bv_len) - v.bv_len, v.bv_len), memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len) @@ -550,18 +571,19 @@ size_t copy_to_iter(const void *addr, size_t bytes, struct iov_iter *i) return bytes; } -EXPORT_SYMBOL(copy_to_iter); +EXPORT_SYMBOL(_copy_to_iter); -size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) +size_t _copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) { char *to = addr; if (unlikely(i->type & ITER_PIPE)) { WARN_ON(1); return 0; } + if (iter_is_iovec(i)) + might_fault(); iterate_and_advance(i, bytes, v, - __copy_from_user((to += v.iov_len) - v.iov_len, v.iov_base, - v.iov_len), + copyin((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, v.bv_offset, v.bv_len), memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) @@ -569,9 +591,9 @@ size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) return bytes; } -EXPORT_SYMBOL(copy_from_iter); +EXPORT_SYMBOL(_copy_from_iter); -bool copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i) +bool _copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i) { char *to = addr; if (unlikely(i->type & ITER_PIPE)) { @@ -581,8 +603,10 @@ bool copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i) if (unlikely(i->count < bytes)) return false; + if (iter_is_iovec(i)) + might_fault(); iterate_all_kinds(i, bytes, v, ({ - if (__copy_from_user((to += v.iov_len) - v.iov_len, + if (copyin((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len)) return false; 0;}), @@ -594,9 +618,9 @@ bool copy_from_iter_full(void *addr, size_t bytes, struct iov_iter *i) iov_iter_advance(i, bytes); return true; } -EXPORT_SYMBOL(copy_from_iter_full); +EXPORT_SYMBOL(_copy_from_iter_full); -size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) +size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) { char *to = addr; if (unlikely(i->type & ITER_PIPE)) { @@ -613,9 +637,31 @@ size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) return bytes; } -EXPORT_SYMBOL(copy_from_iter_nocache); +EXPORT_SYMBOL(_copy_from_iter_nocache); -bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i) +#ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE +size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i) +{ + char *to = addr; + if (unlikely(i->type & ITER_PIPE)) { + WARN_ON(1); + return 0; + } + iterate_and_advance(i, bytes, v, + __copy_from_user_flushcache((to += v.iov_len) - v.iov_len, + v.iov_base, v.iov_len), + memcpy_page_flushcache((to += v.bv_len) - v.bv_len, v.bv_page, + v.bv_offset, v.bv_len), + memcpy_flushcache((to += v.iov_len) - v.iov_len, v.iov_base, + v.iov_len) + ) + + return bytes; +} +EXPORT_SYMBOL_GPL(_copy_from_iter_flushcache); +#endif + +bool _copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i) { char *to = addr; if (unlikely(i->type & ITER_PIPE)) { @@ -637,11 +683,22 @@ bool copy_from_iter_full_nocache(void *addr, size_t bytes, struct iov_iter *i) iov_iter_advance(i, bytes); return true; } -EXPORT_SYMBOL(copy_from_iter_full_nocache); +EXPORT_SYMBOL(_copy_from_iter_full_nocache); + +static inline bool page_copy_sane(struct page *page, size_t offset, size_t n) +{ + size_t v = n + offset; + if (likely(n <= v && v <= (PAGE_SIZE << compound_order(page)))) + return true; + WARN_ON(1); + return false; +} size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, struct iov_iter *i) { + if (unlikely(!page_copy_sane(page, offset, bytes))) + return 0; if (i->type & (ITER_BVEC|ITER_KVEC)) { void *kaddr = kmap_atomic(page); size_t wanted = copy_to_iter(kaddr + offset, bytes, i); @@ -657,13 +714,15 @@ EXPORT_SYMBOL(copy_page_to_iter); size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, struct iov_iter *i) { + if (unlikely(!page_copy_sane(page, offset, bytes))) + return 0; if (unlikely(i->type & ITER_PIPE)) { WARN_ON(1); return 0; } if (i->type & (ITER_BVEC|ITER_KVEC)) { void *kaddr = kmap_atomic(page); - size_t wanted = copy_from_iter(kaddr + offset, bytes, i); + size_t wanted = _copy_from_iter(kaddr + offset, bytes, i); kunmap_atomic(kaddr); return wanted; } else @@ -700,7 +759,7 @@ size_t iov_iter_zero(size_t bytes, struct iov_iter *i) if (unlikely(i->type & ITER_PIPE)) return pipe_zero(bytes, i); iterate_and_advance(i, bytes, v, - __clear_user(v.iov_base, v.iov_len), + clear_user(v.iov_base, v.iov_len), memzero_page(v.bv_page, v.bv_offset, v.bv_len), memset(v.iov_base, 0, v.iov_len) ) @@ -713,14 +772,17 @@ size_t iov_iter_copy_from_user_atomic(struct page *page, struct iov_iter *i, unsigned long offset, size_t bytes) { char *kaddr = kmap_atomic(page), *p = kaddr + offset; + if (unlikely(!page_copy_sane(page, offset, bytes))) { + kunmap_atomic(kaddr); + return 0; + } if (unlikely(i->type & ITER_PIPE)) { kunmap_atomic(kaddr); WARN_ON(1); return 0; } iterate_all_kinds(i, bytes, v, - __copy_from_user_inatomic((p += v.iov_len) - v.iov_len, - v.iov_base, v.iov_len), + copyin((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len), memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page, v.bv_offset, v.bv_len), memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 9a2b811966eb..719c155fce20 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -23,6 +23,8 @@ #include <linux/socket.h> #include <linux/skbuff.h> #include <linux/netlink.h> +#include <linux/uuid.h> +#include <linux/ctype.h> #include <net/sock.h> #include <net/net_namespace.h> @@ -52,19 +54,13 @@ static const char *kobject_actions[] = { [KOBJ_OFFLINE] = "offline", }; -/** - * kobject_action_type - translate action string to numeric type - * - * @buf: buffer containing the action string, newline is ignored - * @count: length of buffer - * @type: pointer to the location to store the action type - * - * Returns 0 if the action string was recognized. - */ -int kobject_action_type(const char *buf, size_t count, - enum kobject_action *type) +static int kobject_action_type(const char *buf, size_t count, + enum kobject_action *type, + const char **args) { enum kobject_action action; + size_t count_first; + const char *args_start; int ret = -EINVAL; if (count && (buf[count-1] == '\n' || buf[count-1] == '\0')) @@ -73,11 +69,20 @@ int kobject_action_type(const char *buf, size_t count, if (!count) goto out; + args_start = strnchr(buf, count, ' '); + if (args_start) { + count_first = args_start - buf; + args_start = args_start + 1; + } else + count_first = count; + for (action = 0; action < ARRAY_SIZE(kobject_actions); action++) { - if (strncmp(kobject_actions[action], buf, count) != 0) + if (strncmp(kobject_actions[action], buf, count_first) != 0) continue; - if (kobject_actions[action][count] != '\0') + if (kobject_actions[action][count_first] != '\0') continue; + if (args) + *args = args_start; *type = action; ret = 0; break; @@ -86,6 +91,142 @@ out: return ret; } +static const char *action_arg_word_end(const char *buf, const char *buf_end, + char delim) +{ + const char *next = buf; + + while (next <= buf_end && *next != delim) + if (!isalnum(*next++)) + return NULL; + + if (next == buf) + return NULL; + + return next; +} + +static int kobject_action_args(const char *buf, size_t count, + struct kobj_uevent_env **ret_env) +{ + struct kobj_uevent_env *env = NULL; + const char *next, *buf_end, *key; + int key_len; + int r = -EINVAL; + + if (count && (buf[count - 1] == '\n' || buf[count - 1] == '\0')) + count--; + + if (!count) + return -EINVAL; + + env = kzalloc(sizeof(*env), GFP_KERNEL); + if (!env) + return -ENOMEM; + + /* first arg is UUID */ + if (count < UUID_STRING_LEN || !uuid_is_valid(buf) || + add_uevent_var(env, "SYNTH_UUID=%.*s", UUID_STRING_LEN, buf)) + goto out; + + /* + * the rest are custom environment variables in KEY=VALUE + * format with ' ' delimiter between each KEY=VALUE pair + */ + next = buf + UUID_STRING_LEN; + buf_end = buf + count - 1; + + while (next <= buf_end) { + if (*next != ' ') + goto out; + + /* skip the ' ', key must follow */ + key = ++next; + if (key > buf_end) + goto out; + + buf = next; + next = action_arg_word_end(buf, buf_end, '='); + if (!next || next > buf_end || *next != '=') + goto out; + key_len = next - buf; + + /* skip the '=', value must follow */ + if (++next > buf_end) + goto out; + + buf = next; + next = action_arg_word_end(buf, buf_end, ' '); + if (!next) + goto out; + + if (add_uevent_var(env, "SYNTH_ARG_%.*s=%.*s", + key_len, key, (int) (next - buf), buf)) + goto out; + } + + r = 0; +out: + if (r) + kfree(env); + else + *ret_env = env; + return r; +} + +/** + * kobject_synth_uevent - send synthetic uevent with arguments + * + * @kobj: struct kobject for which synthetic uevent is to be generated + * @buf: buffer containing action type and action args, newline is ignored + * @count: length of buffer + * + * Returns 0 if kobject_synthetic_uevent() is completed with success or the + * corresponding error when it fails. + */ +int kobject_synth_uevent(struct kobject *kobj, const char *buf, size_t count) +{ + char *no_uuid_envp[] = { "SYNTH_UUID=0", NULL }; + enum kobject_action action; + const char *action_args; + struct kobj_uevent_env *env; + const char *msg = NULL, *devpath; + int r; + + r = kobject_action_type(buf, count, &action, &action_args); + if (r) { + msg = "unknown uevent action string\n"; + goto out; + } + + if (!action_args) { + r = kobject_uevent_env(kobj, action, no_uuid_envp); + goto out; + } + + r = kobject_action_args(action_args, + count - (action_args - buf), &env); + if (r == -EINVAL) { + msg = "incorrect uevent action arguments\n"; + goto out; + } + + if (r) + goto out; + + r = kobject_uevent_env(kobj, action, env->envp); + kfree(env); +out: + if (r) { + devpath = kobject_get_path(kobj, GFP_KERNEL); + printk(KERN_WARNING "synth uevent: %s: %s", + devpath ?: "unknown device", + msg ?: "failed to send uevent"); + kfree(devpath); + } + return r; +} + #ifdef CONFIG_NET static int kobj_bcast_filter(struct sock *dsk, struct sk_buff *skb, void *data) { diff --git a/lib/kstrtox.c b/lib/kstrtox.c index bf85e05ce858..720144075c1e 100644 --- a/lib/kstrtox.c +++ b/lib/kstrtox.c @@ -51,13 +51,15 @@ unsigned int _parse_integer(const char *s, unsigned int base, unsigned long long res = 0; rv = 0; - while (*s) { + while (1) { + unsigned int c = *s; + unsigned int lc = c | 0x20; /* don't tolower() this line */ unsigned int val; - if ('0' <= *s && *s <= '9') - val = *s - '0'; - else if ('a' <= _tolower(*s) && _tolower(*s) <= 'f') - val = _tolower(*s) - 'a' + 10; + if ('0' <= c && c <= '9') + val = c - '0'; + else if ('a' <= lc && lc <= 'f') + val = lc - 'a' + 10; else break; diff --git a/lib/locking-selftest-rtmutex.h b/lib/locking-selftest-rtmutex.h new file mode 100644 index 000000000000..e3cb83989d16 --- /dev/null +++ b/lib/locking-selftest-rtmutex.h @@ -0,0 +1,11 @@ +#undef LOCK +#define LOCK RTL + +#undef UNLOCK +#define UNLOCK RTU + +#undef RLOCK +#undef WLOCK + +#undef INIT +#define INIT RTI diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c index f3a217ea0388..6f2b135dc5e8 100644 --- a/lib/locking-selftest.c +++ b/lib/locking-selftest.c @@ -21,6 +21,7 @@ #include <linux/interrupt.h> #include <linux/debug_locks.h> #include <linux/irqflags.h> +#include <linux/rtmutex.h> /* * Change this to 1 if you want to see the failure printouts: @@ -46,6 +47,7 @@ __setup("debug_locks_verbose=", setup_debug_locks_verbose); #define LOCKTYPE_MUTEX 0x4 #define LOCKTYPE_RWSEM 0x8 #define LOCKTYPE_WW 0x10 +#define LOCKTYPE_RTMUTEX 0x20 static struct ww_acquire_ctx t, t2; static struct ww_mutex o, o2, o3; @@ -74,6 +76,15 @@ static DECLARE_RWSEM(rwsem_B); static DECLARE_RWSEM(rwsem_C); static DECLARE_RWSEM(rwsem_D); +#ifdef CONFIG_RT_MUTEXES + +static DEFINE_RT_MUTEX(rtmutex_A); +static DEFINE_RT_MUTEX(rtmutex_B); +static DEFINE_RT_MUTEX(rtmutex_C); +static DEFINE_RT_MUTEX(rtmutex_D); + +#endif + /* * Locks that we initialize dynamically as well so that * e.g. X1 and X2 becomes two instances of the same class, @@ -108,6 +119,17 @@ static DECLARE_RWSEM(rwsem_Y2); static DECLARE_RWSEM(rwsem_Z1); static DECLARE_RWSEM(rwsem_Z2); +#ifdef CONFIG_RT_MUTEXES + +static DEFINE_RT_MUTEX(rtmutex_X1); +static DEFINE_RT_MUTEX(rtmutex_X2); +static DEFINE_RT_MUTEX(rtmutex_Y1); +static DEFINE_RT_MUTEX(rtmutex_Y2); +static DEFINE_RT_MUTEX(rtmutex_Z1); +static DEFINE_RT_MUTEX(rtmutex_Z2); + +#endif + /* * non-inlined runtime initializers, to let separate locks share * the same lock-class: @@ -129,6 +151,17 @@ INIT_CLASS_FUNC(Z) static void init_shared_classes(void) { +#ifdef CONFIG_RT_MUTEXES + static struct lock_class_key rt_X, rt_Y, rt_Z; + + __rt_mutex_init(&rtmutex_X1, __func__, &rt_X); + __rt_mutex_init(&rtmutex_X2, __func__, &rt_X); + __rt_mutex_init(&rtmutex_Y1, __func__, &rt_Y); + __rt_mutex_init(&rtmutex_Y2, __func__, &rt_Y); + __rt_mutex_init(&rtmutex_Z1, __func__, &rt_Z); + __rt_mutex_init(&rtmutex_Z2, __func__, &rt_Z); +#endif + init_class_X(&lock_X1, &rwlock_X1, &mutex_X1, &rwsem_X1); init_class_X(&lock_X2, &rwlock_X2, &mutex_X2, &rwsem_X2); @@ -193,6 +226,10 @@ static void init_shared_classes(void) #define MU(x) mutex_unlock(&mutex_##x) #define MI(x) mutex_init(&mutex_##x) +#define RTL(x) rt_mutex_lock(&rtmutex_##x) +#define RTU(x) rt_mutex_unlock(&rtmutex_##x) +#define RTI(x) rt_mutex_init(&rtmutex_##x) + #define WSL(x) down_write(&rwsem_##x) #define WSU(x) up_write(&rwsem_##x) @@ -264,6 +301,11 @@ GENERATE_TESTCASE(AA_wsem) #include "locking-selftest-rsem.h" GENERATE_TESTCASE(AA_rsem) +#ifdef CONFIG_RT_MUTEXES +#include "locking-selftest-rtmutex.h" +GENERATE_TESTCASE(AA_rtmutex); +#endif + #undef E /* @@ -345,6 +387,11 @@ GENERATE_TESTCASE(ABBA_wsem) #include "locking-selftest-rsem.h" GENERATE_TESTCASE(ABBA_rsem) +#ifdef CONFIG_RT_MUTEXES +#include "locking-selftest-rtmutex.h" +GENERATE_TESTCASE(ABBA_rtmutex); +#endif + #undef E /* @@ -373,6 +420,11 @@ GENERATE_TESTCASE(ABBCCA_wsem) #include "locking-selftest-rsem.h" GENERATE_TESTCASE(ABBCCA_rsem) +#ifdef CONFIG_RT_MUTEXES +#include "locking-selftest-rtmutex.h" +GENERATE_TESTCASE(ABBCCA_rtmutex); +#endif + #undef E /* @@ -401,6 +453,11 @@ GENERATE_TESTCASE(ABCABC_wsem) #include "locking-selftest-rsem.h" GENERATE_TESTCASE(ABCABC_rsem) +#ifdef CONFIG_RT_MUTEXES +#include "locking-selftest-rtmutex.h" +GENERATE_TESTCASE(ABCABC_rtmutex); +#endif + #undef E /* @@ -430,6 +487,11 @@ GENERATE_TESTCASE(ABBCCDDA_wsem) #include "locking-selftest-rsem.h" GENERATE_TESTCASE(ABBCCDDA_rsem) +#ifdef CONFIG_RT_MUTEXES +#include "locking-selftest-rtmutex.h" +GENERATE_TESTCASE(ABBCCDDA_rtmutex); +#endif + #undef E /* @@ -458,6 +520,11 @@ GENERATE_TESTCASE(ABCDBDDA_wsem) #include "locking-selftest-rsem.h" GENERATE_TESTCASE(ABCDBDDA_rsem) +#ifdef CONFIG_RT_MUTEXES +#include "locking-selftest-rtmutex.h" +GENERATE_TESTCASE(ABCDBDDA_rtmutex); +#endif + #undef E /* @@ -486,6 +553,11 @@ GENERATE_TESTCASE(ABCDBCDA_wsem) #include "locking-selftest-rsem.h" GENERATE_TESTCASE(ABCDBCDA_rsem) +#ifdef CONFIG_RT_MUTEXES +#include "locking-selftest-rtmutex.h" +GENERATE_TESTCASE(ABCDBCDA_rtmutex); +#endif + #undef E /* @@ -513,33 +585,10 @@ GENERATE_TESTCASE(double_unlock_wsem) #include "locking-selftest-rsem.h" GENERATE_TESTCASE(double_unlock_rsem) -#undef E - -/* - * Bad unlock ordering: - */ -#define E() \ - \ - LOCK(A); \ - LOCK(B); \ - UNLOCK(A); /* fail */ \ - UNLOCK(B); - -/* - * 6 testcases: - */ -#include "locking-selftest-spin.h" -GENERATE_TESTCASE(bad_unlock_order_spin) -#include "locking-selftest-wlock.h" -GENERATE_TESTCASE(bad_unlock_order_wlock) -#include "locking-selftest-rlock.h" -GENERATE_TESTCASE(bad_unlock_order_rlock) -#include "locking-selftest-mutex.h" -GENERATE_TESTCASE(bad_unlock_order_mutex) -#include "locking-selftest-wsem.h" -GENERATE_TESTCASE(bad_unlock_order_wsem) -#include "locking-selftest-rsem.h" -GENERATE_TESTCASE(bad_unlock_order_rsem) +#ifdef CONFIG_RT_MUTEXES +#include "locking-selftest-rtmutex.h" +GENERATE_TESTCASE(double_unlock_rtmutex); +#endif #undef E @@ -567,6 +616,11 @@ GENERATE_TESTCASE(init_held_wsem) #include "locking-selftest-rsem.h" GENERATE_TESTCASE(init_held_rsem) +#ifdef CONFIG_RT_MUTEXES +#include "locking-selftest-rtmutex.h" +GENERATE_TESTCASE(init_held_rtmutex); +#endif + #undef E /* @@ -916,6 +970,9 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft) # define I_MUTEX(x) lockdep_reset_lock(&mutex_##x.dep_map) # define I_RWSEM(x) lockdep_reset_lock(&rwsem_##x.dep_map) # define I_WW(x) lockdep_reset_lock(&x.dep_map) +#ifdef CONFIG_RT_MUTEXES +# define I_RTMUTEX(x) lockdep_reset_lock(&rtmutex_##x.dep_map) +#endif #else # define I_SPINLOCK(x) # define I_RWLOCK(x) @@ -924,12 +981,23 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft) # define I_WW(x) #endif +#ifndef I_RTMUTEX +# define I_RTMUTEX(x) +#endif + +#ifdef CONFIG_RT_MUTEXES +#define I2_RTMUTEX(x) rt_mutex_init(&rtmutex_##x) +#else +#define I2_RTMUTEX(x) +#endif + #define I1(x) \ do { \ I_SPINLOCK(x); \ I_RWLOCK(x); \ I_MUTEX(x); \ I_RWSEM(x); \ + I_RTMUTEX(x); \ } while (0) #define I2(x) \ @@ -938,6 +1006,7 @@ GENERATE_PERMUTATIONS_3_EVENTS(irq_read_recursion_soft) rwlock_init(&rwlock_##x); \ mutex_init(&mutex_##x); \ init_rwsem(&rwsem_##x); \ + I2_RTMUTEX(x); \ } while (0) static void reset_locks(void) @@ -1013,6 +1082,12 @@ static void dotest(void (*testcase_fn)(void), int expected, int lockclass_mask) reset_locks(); } +#ifdef CONFIG_RT_MUTEXES +#define dotest_rt(fn, e, m) dotest((fn), (e), (m)) +#else +#define dotest_rt(fn, e, m) +#endif + static inline void print_testname(const char *testname) { printk("%33s:", testname); @@ -1050,6 +1125,7 @@ static inline void print_testname(const char *testname) dotest(name##_mutex, FAILURE, LOCKTYPE_MUTEX); \ dotest(name##_wsem, FAILURE, LOCKTYPE_RWSEM); \ dotest(name##_rsem, FAILURE, LOCKTYPE_RWSEM); \ + dotest_rt(name##_rtmutex, FAILURE, LOCKTYPE_RTMUTEX); \ pr_cont("\n"); #define DO_TESTCASE_6_SUCCESS(desc, name) \ @@ -1060,6 +1136,7 @@ static inline void print_testname(const char *testname) dotest(name##_mutex, SUCCESS, LOCKTYPE_MUTEX); \ dotest(name##_wsem, SUCCESS, LOCKTYPE_RWSEM); \ dotest(name##_rsem, SUCCESS, LOCKTYPE_RWSEM); \ + dotest_rt(name##_rtmutex, SUCCESS, LOCKTYPE_RTMUTEX); \ pr_cont("\n"); /* @@ -1073,6 +1150,7 @@ static inline void print_testname(const char *testname) dotest(name##_mutex, FAILURE, LOCKTYPE_MUTEX); \ dotest(name##_wsem, FAILURE, LOCKTYPE_RWSEM); \ dotest(name##_rsem, FAILURE, LOCKTYPE_RWSEM); \ + dotest_rt(name##_rtmutex, FAILURE, LOCKTYPE_RTMUTEX); \ pr_cont("\n"); #define DO_TESTCASE_2I(desc, name, nr) \ @@ -1825,7 +1903,6 @@ void locking_selftest(void) DO_TESTCASE_6R("A-B-C-D-B-C-D-A deadlock", ABCDBCDA); DO_TESTCASE_6("double unlock", double_unlock); DO_TESTCASE_6("initialize held", init_held); - DO_TESTCASE_6_SUCCESS("bad unlock order", bad_unlock_order); printk(" --------------------------------------------------------------------------\n"); print_testname("recursive read-lock"); diff --git a/lib/nlattr.c b/lib/nlattr.c index a7e0b16078df..fb52435be42d 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -352,7 +352,7 @@ struct nlattr *__nla_reserve(struct sk_buff *skb, int attrtype, int attrlen) { struct nlattr *nla; - nla = (struct nlattr *) skb_put(skb, nla_total_size(attrlen)); + nla = skb_put(skb, nla_total_size(attrlen)); nla->nla_type = attrtype; nla->nla_len = nla_attr_size(attrlen); @@ -398,12 +398,7 @@ EXPORT_SYMBOL(__nla_reserve_64bit); */ void *__nla_reserve_nohdr(struct sk_buff *skb, int attrlen) { - void *start; - - start = skb_put(skb, NLA_ALIGN(attrlen)); - memset(start, 0, NLA_ALIGN(attrlen)); - - return start; + return skb_put_zero(skb, NLA_ALIGN(attrlen)); } EXPORT_SYMBOL(__nla_reserve_nohdr); @@ -617,7 +612,7 @@ int nla_append(struct sk_buff *skb, int attrlen, const void *data) if (unlikely(skb_tailroom(skb) < NLA_ALIGN(attrlen))) return -EMSGSIZE; - memcpy(skb_put(skb, attrlen), data, attrlen); + skb_put_data(skb, data, attrlen); return 0; } EXPORT_SYMBOL(nla_append); diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c index 4e8a30d1c22f..0bc0a3535a8a 100644 --- a/lib/nmi_backtrace.c +++ b/lib/nmi_backtrace.c @@ -86,9 +86,11 @@ void nmi_trigger_cpumask_backtrace(const cpumask_t *mask, bool nmi_cpu_backtrace(struct pt_regs *regs) { + static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED; int cpu = smp_processor_id(); if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { + arch_spin_lock(&lock); if (regs && cpu_in_idle(instruction_pointer(regs))) { pr_warn("NMI backtrace for cpu %d skipped: idling at pc %#lx\n", cpu, instruction_pointer(regs)); @@ -99,6 +101,7 @@ bool nmi_cpu_backtrace(struct pt_regs *regs) else dump_stack(); } + arch_spin_unlock(&lock); cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); return true; } diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index 9c21000df0b5..3bf4a9984f4c 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -72,7 +72,14 @@ void percpu_counter_set(struct percpu_counter *fbc, s64 amount) } EXPORT_SYMBOL(percpu_counter_set); -void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch) +/** + * This function is both preempt and irq safe. The former is due to explicit + * preemption disable. The latter is guaranteed by the fact that the slow path + * is explicitly protected by an irq-safe spinlock whereas the fast patch uses + * this_cpu_add which is irq-safe by definition. Hence there is no need muck + * with irq state before calling this one + */ +void percpu_counter_add_batch(struct percpu_counter *fbc, s64 amount, s32 batch) { s64 count; @@ -89,7 +96,7 @@ void __percpu_counter_add(struct percpu_counter *fbc, s64 amount, s32 batch) } preempt_enable(); } -EXPORT_SYMBOL(__percpu_counter_add); +EXPORT_SYMBOL(percpu_counter_add_batch); /* * Add up all the per-cpu counts, return the result. This is a more accurate diff --git a/lib/raid6/mktables.c b/lib/raid6/mktables.c index 39787db588b0..e824d088f72c 100644 --- a/lib/raid6/mktables.c +++ b/lib/raid6/mktables.c @@ -125,6 +125,26 @@ int main(int argc, char *argv[]) printf("EXPORT_SYMBOL(raid6_gfexp);\n"); printf("#endif\n"); + /* Compute log-of-2 table */ + printf("\nconst u8 __attribute__((aligned(256)))\n" + "raid6_gflog[256] =\n" "{\n"); + for (i = 0; i < 256; i += 8) { + printf("\t"); + for (j = 0; j < 8; j++) { + v = 255; + for (k = 0; k < 256; k++) + if (exptbl[k] == (i + j)) { + v = k; + break; + } + printf("0x%02x,%c", v, (j == 7) ? '\n' : ' '); + } + } + printf("};\n"); + printf("#ifdef __KERNEL__\n"); + printf("EXPORT_SYMBOL(raid6_gflog);\n"); + printf("#endif\n"); + /* Compute inverse table x^-1 == x^254 */ printf("\nconst u8 __attribute__((aligned(256)))\n" "raid6_gfinv[256] =\n" "{\n"); diff --git a/lib/refcount.c b/lib/refcount.c index 9f906783987e..5d0582a9480c 100644 --- a/lib/refcount.c +++ b/lib/refcount.c @@ -37,6 +37,8 @@ #include <linux/refcount.h> #include <linux/bug.h> +#ifdef CONFIG_REFCOUNT_FULL + /** * refcount_add_not_zero - add a value to a refcount unless it is 0 * @i: the value to add to the refcount @@ -225,6 +227,7 @@ void refcount_dec(refcount_t *r) WARN_ONCE(refcount_dec_and_test(r), "refcount_t: decrement hit 0; leaking memory.\n"); } EXPORT_SYMBOL(refcount_dec); +#endif /* CONFIG_REFCOUNT_FULL */ /** * refcount_dec_if_one - decrement a refcount if it is 1 diff --git a/lib/rhashtable.c b/lib/rhashtable.c index d9e7274a04cd..707ca5d677c6 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -211,11 +211,10 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, int i; size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]); - if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER) || - gfp != GFP_KERNEL) + if (gfp != GFP_KERNEL) tbl = kzalloc(size, gfp | __GFP_NOWARN | __GFP_NORETRY); - if (tbl == NULL && gfp == GFP_KERNEL) - tbl = vzalloc(size); + else + tbl = kvzalloc(size, gfp); size = nbuckets; @@ -235,7 +234,7 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, INIT_LIST_HEAD(&tbl->walkers); - get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd)); + tbl->hash_rnd = get_random_u32(); for (i = 0; i < nbuckets; i++) INIT_RHT_NULLS_HEAD(tbl->buckets[i], ht, i); diff --git a/lib/scatterlist.c b/lib/scatterlist.c index c6cf82242d65..be7b4dd6b68d 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -751,3 +751,38 @@ size_t sg_pcopy_to_buffer(struct scatterlist *sgl, unsigned int nents, return sg_copy_buffer(sgl, nents, buf, buflen, skip, true); } EXPORT_SYMBOL(sg_pcopy_to_buffer); + +/** + * sg_zero_buffer - Zero-out a part of a SG list + * @sgl: The SG list + * @nents: Number of SG entries + * @buflen: The number of bytes to zero out + * @skip: Number of bytes to skip before zeroing + * + * Returns the number of bytes zeroed. + **/ +size_t sg_zero_buffer(struct scatterlist *sgl, unsigned int nents, + size_t buflen, off_t skip) +{ + unsigned int offset = 0; + struct sg_mapping_iter miter; + unsigned int sg_flags = SG_MITER_ATOMIC | SG_MITER_TO_SG; + + sg_miter_start(&miter, sgl, nents, sg_flags); + + if (!sg_miter_skip(&miter, skip)) + return false; + + while (offset < buflen && sg_miter_next(&miter)) { + unsigned int len; + + len = min(miter.length, buflen - offset); + memset(miter.addr, 0, len); + + offset += len; + } + + sg_miter_stop(&miter); + return offset; +} +EXPORT_SYMBOL(sg_zero_buffer); diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c index 690d75b132fa..2fb007be0212 100644 --- a/lib/smp_processor_id.c +++ b/lib/smp_processor_id.c @@ -28,7 +28,7 @@ notrace static unsigned int check_preemption_disabled(const char *what1, /* * It is valid to assume CPU-locality during early bootup: */ - if (system_state != SYSTEM_RUNNING) + if (system_state < SYSTEM_SCHEDULING) goto out; /* diff --git a/lib/string.c b/lib/string.c index 1c1fc9187b05..ebbb99c775bd 100644 --- a/lib/string.c +++ b/lib/string.c @@ -978,3 +978,10 @@ char *strreplace(char *s, char old, char new) return s; } EXPORT_SYMBOL(strreplace); + +void fortify_panic(const char *name) +{ + pr_emerg("detected buffer overflow in %s\n", name); + BUG(); +} +EXPORT_SYMBOL(fortify_panic); diff --git a/lib/strnlen_user.c b/lib/strnlen_user.c index 8e105ed4df12..a5f567747ced 100644 --- a/lib/strnlen_user.c +++ b/lib/strnlen_user.c @@ -121,37 +121,3 @@ long strnlen_user(const char __user *str, long count) return 0; } EXPORT_SYMBOL(strnlen_user); - -/** - * strlen_user: - Get the size of a user string INCLUDING final NUL. - * @str: The string to measure. - * - * Context: User context only. This function may sleep if pagefaults are - * enabled. - * - * Get the size of a NUL-terminated string in user space. - * - * Returns the size of the string INCLUDING the terminating NUL. - * On exception, returns 0. - * - * If there is a limit on the length of a valid string, you may wish to - * consider using strnlen_user() instead. - */ -long strlen_user(const char __user *str) -{ - unsigned long max_addr, src_addr; - - max_addr = user_addr_max(); - src_addr = (unsigned long)str; - if (likely(src_addr < max_addr)) { - unsigned long max = max_addr - src_addr; - long retval; - - user_access_begin(); - retval = do_strnlen_user(str, ~0ul, max); - user_access_end(); - return retval; - } - return 0; -} -EXPORT_SYMBOL(strlen_user); diff --git a/lib/test_bitmap.c b/lib/test_bitmap.c index e2cbd43d193c..2526a2975c51 100644 --- a/lib/test_bitmap.c +++ b/lib/test_bitmap.c @@ -333,10 +333,39 @@ static void __init test_bitmap_u32_array_conversions(void) } } +static void noinline __init test_mem_optimisations(void) +{ + DECLARE_BITMAP(bmap1, 1024); + DECLARE_BITMAP(bmap2, 1024); + unsigned int start, nbits; + + for (start = 0; start < 1024; start += 8) { + memset(bmap1, 0x5a, sizeof(bmap1)); + memset(bmap2, 0x5a, sizeof(bmap2)); + for (nbits = 0; nbits < 1024 - start; nbits += 8) { + bitmap_set(bmap1, start, nbits); + __bitmap_set(bmap2, start, nbits); + if (!bitmap_equal(bmap1, bmap2, 1024)) + printk("set not equal %d %d\n", start, nbits); + if (!__bitmap_equal(bmap1, bmap2, 1024)) + printk("set not __equal %d %d\n", start, nbits); + + bitmap_clear(bmap1, start, nbits); + __bitmap_clear(bmap2, start, nbits); + if (!bitmap_equal(bmap1, bmap2, 1024)) + printk("clear not equal %d %d\n", start, nbits); + if (!__bitmap_equal(bmap1, bmap2, 1024)) + printk("clear not __equal %d %d\n", start, + nbits); + } + } +} + static int __init test_bitmap_init(void) { test_zero_fill_copy(); test_bitmap_u32_array_conversions(); + test_mem_optimisations(); if (failed_tests == 0) pr_info("all %u tests passed\n", total_tests); diff --git a/lib/test_bpf.c b/lib/test_bpf.c index be88cbaadde3..d9d5a410955c 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -84,6 +84,7 @@ struct bpf_test { } test[MAX_SUBTESTS]; int (*fill_helper)(struct bpf_test *self); __u8 frag_data[MAX_DATA]; + int stack_depth; /* for eBPF only, since tests don't call verifier */ }; /* Large test cases need separate allocation and fill handler. */ @@ -434,6 +435,30 @@ loop: return 0; } +static int bpf_fill_jump_around_ld_abs(struct bpf_test *self) +{ + unsigned int len = BPF_MAXINSNS; + struct bpf_insn *insn; + int i = 0; + + insn = kmalloc_array(len, sizeof(*insn), GFP_KERNEL); + if (!insn) + return -ENOMEM; + + insn[i++] = BPF_MOV64_REG(R6, R1); + insn[i++] = BPF_LD_ABS(BPF_B, 0); + insn[i] = BPF_JMP_IMM(BPF_JEQ, R0, 10, len - i - 2); + i++; + while (i < len - 1) + insn[i++] = BPF_LD_ABS(BPF_B, 1); + insn[i] = BPF_EXIT_INSN(); + + self->u.ptr.insns = insn; + self->u.ptr.len = len; + + return 0; +} + static int __bpf_fill_stxdw(struct bpf_test *self, int size) { unsigned int len = BPF_MAXINSNS; @@ -455,6 +480,7 @@ static int __bpf_fill_stxdw(struct bpf_test *self, int size) self->u.ptr.insns = insn; self->u.ptr.len = len; + self->stack_depth = 40; return 0; } @@ -2317,7 +2343,8 @@ static struct bpf_test tests[] = { { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x08, 0x06, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x10, 0xbf, 0x48, 0xd6, 0x43, 0xd6}, - { { 38, 256 } } + { { 38, 256 } }, + .stack_depth = 64, }, /* BPF_ALU | BPF_MOV | BPF_X */ { @@ -4169,6 +4196,7 @@ static struct bpf_test tests[] = { INTERNAL, { }, { { 0, 0xff } }, + .stack_depth = 40, }, { "ST_MEM_B: Store/Load byte: max positive", @@ -4181,6 +4209,7 @@ static struct bpf_test tests[] = { INTERNAL, { }, { { 0, 0x7f } }, + .stack_depth = 40, }, { "STX_MEM_B: Store/Load byte: max negative", @@ -4194,6 +4223,7 @@ static struct bpf_test tests[] = { INTERNAL, { }, { { 0, 0xff } }, + .stack_depth = 40, }, { "ST_MEM_H: Store/Load half word: max negative", @@ -4206,6 +4236,7 @@ static struct bpf_test tests[] = { INTERNAL, { }, { { 0, 0xffff } }, + .stack_depth = 40, }, { "ST_MEM_H: Store/Load half word: max positive", @@ -4218,6 +4249,7 @@ static struct bpf_test tests[] = { INTERNAL, { }, { { 0, 0x7fff } }, + .stack_depth = 40, }, { "STX_MEM_H: Store/Load half word: max negative", @@ -4231,6 +4263,7 @@ static struct bpf_test tests[] = { INTERNAL, { }, { { 0, 0xffff } }, + .stack_depth = 40, }, { "ST_MEM_W: Store/Load word: max negative", @@ -4243,6 +4276,7 @@ static struct bpf_test tests[] = { INTERNAL, { }, { { 0, 0xffffffff } }, + .stack_depth = 40, }, { "ST_MEM_W: Store/Load word: max positive", @@ -4255,6 +4289,7 @@ static struct bpf_test tests[] = { INTERNAL, { }, { { 0, 0x7fffffff } }, + .stack_depth = 40, }, { "STX_MEM_W: Store/Load word: max negative", @@ -4268,6 +4303,7 @@ static struct bpf_test tests[] = { INTERNAL, { }, { { 0, 0xffffffff } }, + .stack_depth = 40, }, { "ST_MEM_DW: Store/Load double word: max negative", @@ -4280,6 +4316,7 @@ static struct bpf_test tests[] = { INTERNAL, { }, { { 0, 0xffffffff } }, + .stack_depth = 40, }, { "ST_MEM_DW: Store/Load double word: max negative 2", @@ -4297,6 +4334,7 @@ static struct bpf_test tests[] = { INTERNAL, { }, { { 0, 0x1 } }, + .stack_depth = 40, }, { "ST_MEM_DW: Store/Load double word: max positive", @@ -4309,6 +4347,7 @@ static struct bpf_test tests[] = { INTERNAL, { }, { { 0, 0x7fffffff } }, + .stack_depth = 40, }, { "STX_MEM_DW: Store/Load double word: max negative", @@ -4322,6 +4361,7 @@ static struct bpf_test tests[] = { INTERNAL, { }, { { 0, 0xffffffff } }, + .stack_depth = 40, }, /* BPF_STX | BPF_XADD | BPF_W/DW */ { @@ -4336,6 +4376,7 @@ static struct bpf_test tests[] = { INTERNAL, { }, { { 0, 0x22 } }, + .stack_depth = 40, }, { "STX_XADD_W: Test side-effects, r10: 0x12 + 0x10 = 0x22", @@ -4351,6 +4392,7 @@ static struct bpf_test tests[] = { INTERNAL, { }, { { 0, 0 } }, + .stack_depth = 40, }, { "STX_XADD_W: Test side-effects, r0: 0x12 + 0x10 = 0x22", @@ -4363,6 +4405,7 @@ static struct bpf_test tests[] = { INTERNAL, { }, { { 0, 0x12 } }, + .stack_depth = 40, }, { "STX_XADD_W: X + 1 + 1 + 1 + ...", @@ -4384,6 +4427,7 @@ static struct bpf_test tests[] = { INTERNAL, { }, { { 0, 0x22 } }, + .stack_depth = 40, }, { "STX_XADD_DW: Test side-effects, r10: 0x12 + 0x10 = 0x22", @@ -4399,6 +4443,7 @@ static struct bpf_test tests[] = { INTERNAL, { }, { { 0, 0 } }, + .stack_depth = 40, }, { "STX_XADD_DW: Test side-effects, r0: 0x12 + 0x10 = 0x22", @@ -4411,6 +4456,7 @@ static struct bpf_test tests[] = { INTERNAL, { }, { { 0, 0x12 } }, + .stack_depth = 40, }, { "STX_XADD_DW: X + 1 + 1 + 1 + ...", @@ -5022,6 +5068,14 @@ static struct bpf_test tests[] = { { { ETH_HLEN, 0xbef } }, .fill_helper = bpf_fill_ld_abs_vlan_push_pop, }, + { + "BPF_MAXINSNS: jump around ld_abs", + { }, + INTERNAL, + { 10, 11 }, + { { 2, 10 } }, + .fill_helper = bpf_fill_jump_around_ld_abs, + }, /* * LD_IND / LD_ABS on fragmented SKBs */ @@ -5663,7 +5717,7 @@ static struct sk_buff *populate_skb(char *buf, int size) if (!skb) return NULL; - memcpy(__skb_put(skb, size), buf, size); + __skb_put_data(skb, buf, size); /* Initialize a fake skb with test pattern. */ skb_reset_mac_header(skb); @@ -5809,6 +5863,7 @@ static struct bpf_prog *generate_filter(int which, int *err) /* Type doesn't really matter here as long as it's not unspec. */ fp->type = BPF_PROG_TYPE_SOCKET_FILTER; memcpy(fp->insnsi, fptr, fp->len * sizeof(struct bpf_insn)); + fp->aux->stack_depth = tests[which].stack_depth; /* We cannot error here as we don't need type compatibility * checks. diff --git a/lib/test_kmod.c b/lib/test_kmod.c new file mode 100644 index 000000000000..6c1d678bcf8b --- /dev/null +++ b/lib/test_kmod.c @@ -0,0 +1,1246 @@ +/* + * kmod stress test driver + * + * Copyright (C) 2017 Luis R. Rodriguez <mcgrof@kernel.org> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or at your option any + * later version; or, when distributed separately from the Linux kernel or + * when incorporated into other software packages, subject to the following + * license: + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of copyleft-next (version 0.3.1 or later) as published + * at http://copyleft-next.org/. + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +/* + * This driver provides an interface to trigger and test the kernel's + * module loader through a series of configurations and a few triggers. + * To test this driver use the following script as root: + * + * tools/testing/selftests/kmod/kmod.sh --help + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/kmod.h> +#include <linux/printk.h> +#include <linux/kthread.h> +#include <linux/sched.h> +#include <linux/fs.h> +#include <linux/miscdevice.h> +#include <linux/vmalloc.h> +#include <linux/slab.h> +#include <linux/device.h> + +#define TEST_START_NUM_THREADS 50 +#define TEST_START_DRIVER "test_module" +#define TEST_START_TEST_FS "xfs" +#define TEST_START_TEST_CASE TEST_KMOD_DRIVER + + +static bool force_init_test = false; +module_param(force_init_test, bool_enable_only, 0644); +MODULE_PARM_DESC(force_init_test, + "Force kicking a test immediately after driver loads"); + +/* + * For device allocation / registration + */ +static DEFINE_MUTEX(reg_dev_mutex); +static LIST_HEAD(reg_test_devs); + +/* + * num_test_devs actually represents the *next* ID of the next + * device we will allow to create. + */ +static int num_test_devs; + +/** + * enum kmod_test_case - linker table test case + * + * If you add a test case, please be sure to review if you need to se + * @need_mod_put for your tests case. + * + * @TEST_KMOD_DRIVER: stress tests request_module() + * @TEST_KMOD_FS_TYPE: stress tests get_fs_type() + */ +enum kmod_test_case { + __TEST_KMOD_INVALID = 0, + + TEST_KMOD_DRIVER, + TEST_KMOD_FS_TYPE, + + __TEST_KMOD_MAX, +}; + +struct test_config { + char *test_driver; + char *test_fs; + unsigned int num_threads; + enum kmod_test_case test_case; + int test_result; +}; + +struct kmod_test_device; + +/** + * kmod_test_device_info - thread info + * + * @ret_sync: return value if request_module() is used, sync request for + * @TEST_KMOD_DRIVER + * @fs_sync: return value of get_fs_type() for @TEST_KMOD_FS_TYPE + * @thread_idx: thread ID + * @test_dev: test device test is being performed under + * @need_mod_put: Some tests (get_fs_type() is one) requires putting the module + * (module_put(fs_sync->owner)) when done, otherwise you will not be able + * to unload the respective modules and re-test. We use this to keep + * accounting of when we need this and to help out in case we need to + * error out and deal with module_put() on error. + */ +struct kmod_test_device_info { + int ret_sync; + struct file_system_type *fs_sync; + struct task_struct *task_sync; + unsigned int thread_idx; + struct kmod_test_device *test_dev; + bool need_mod_put; +}; + +/** + * kmod_test_device - test device to help test kmod + * + * @dev_idx: unique ID for test device + * @config: configuration for the test + * @misc_dev: we use a misc device under the hood + * @dev: pointer to misc_dev's own struct device + * @config_mutex: protects configuration of test + * @trigger_mutex: the test trigger can only be fired once at a time + * @thread_lock: protects @done count, and the @info per each thread + * @done: number of threads which have completed or failed + * @test_is_oom: when we run out of memory, use this to halt moving forward + * @kthreads_done: completion used to signal when all work is done + * @list: needed to be part of the reg_test_devs + * @info: array of info for each thread + */ +struct kmod_test_device { + int dev_idx; + struct test_config config; + struct miscdevice misc_dev; + struct device *dev; + struct mutex config_mutex; + struct mutex trigger_mutex; + struct mutex thread_mutex; + + unsigned int done; + + bool test_is_oom; + struct completion kthreads_done; + struct list_head list; + + struct kmod_test_device_info *info; +}; + +static const char *test_case_str(enum kmod_test_case test_case) +{ + switch (test_case) { + case TEST_KMOD_DRIVER: + return "TEST_KMOD_DRIVER"; + case TEST_KMOD_FS_TYPE: + return "TEST_KMOD_FS_TYPE"; + default: + return "invalid"; + } +} + +static struct miscdevice *dev_to_misc_dev(struct device *dev) +{ + return dev_get_drvdata(dev); +} + +static struct kmod_test_device *misc_dev_to_test_dev(struct miscdevice *misc_dev) +{ + return container_of(misc_dev, struct kmod_test_device, misc_dev); +} + +static struct kmod_test_device *dev_to_test_dev(struct device *dev) +{ + struct miscdevice *misc_dev; + + misc_dev = dev_to_misc_dev(dev); + + return misc_dev_to_test_dev(misc_dev); +} + +/* Must run with thread_mutex held */ +static void kmod_test_done_check(struct kmod_test_device *test_dev, + unsigned int idx) +{ + struct test_config *config = &test_dev->config; + + test_dev->done++; + dev_dbg(test_dev->dev, "Done thread count: %u\n", test_dev->done); + + if (test_dev->done == config->num_threads) { + dev_info(test_dev->dev, "Done: %u threads have all run now\n", + test_dev->done); + dev_info(test_dev->dev, "Last thread to run: %u\n", idx); + complete(&test_dev->kthreads_done); + } +} + +static void test_kmod_put_module(struct kmod_test_device_info *info) +{ + struct kmod_test_device *test_dev = info->test_dev; + struct test_config *config = &test_dev->config; + + if (!info->need_mod_put) + return; + + switch (config->test_case) { + case TEST_KMOD_DRIVER: + break; + case TEST_KMOD_FS_TYPE: + if (info && info->fs_sync && info->fs_sync->owner) + module_put(info->fs_sync->owner); + break; + default: + BUG(); + } + + info->need_mod_put = true; +} + +static int run_request(void *data) +{ + struct kmod_test_device_info *info = data; + struct kmod_test_device *test_dev = info->test_dev; + struct test_config *config = &test_dev->config; + + switch (config->test_case) { + case TEST_KMOD_DRIVER: + info->ret_sync = request_module("%s", config->test_driver); + break; + case TEST_KMOD_FS_TYPE: + info->fs_sync = get_fs_type(config->test_fs); + info->need_mod_put = true; + break; + default: + /* __trigger_config_run() already checked for test sanity */ + BUG(); + return -EINVAL; + } + + dev_dbg(test_dev->dev, "Ran thread %u\n", info->thread_idx); + + test_kmod_put_module(info); + + mutex_lock(&test_dev->thread_mutex); + info->task_sync = NULL; + kmod_test_done_check(test_dev, info->thread_idx); + mutex_unlock(&test_dev->thread_mutex); + + return 0; +} + +static int tally_work_test(struct kmod_test_device_info *info) +{ + struct kmod_test_device *test_dev = info->test_dev; + struct test_config *config = &test_dev->config; + int err_ret = 0; + + switch (config->test_case) { + case TEST_KMOD_DRIVER: + /* + * Only capture errors, if one is found that's + * enough, for now. + */ + if (info->ret_sync != 0) + err_ret = info->ret_sync; + dev_info(test_dev->dev, + "Sync thread %d return status: %d\n", + info->thread_idx, info->ret_sync); + break; + case TEST_KMOD_FS_TYPE: + /* For now we make this simple */ + if (!info->fs_sync) + err_ret = -EINVAL; + dev_info(test_dev->dev, "Sync thread %u fs: %s\n", + info->thread_idx, info->fs_sync ? config->test_fs : + "NULL"); + break; + default: + BUG(); + } + + return err_ret; +} + +/* + * XXX: add result option to display if all errors did not match. + * For now we just keep any error code if one was found. + * + * If this ran it means *all* tasks were created fine and we + * are now just collecting results. + * + * Only propagate errors, do not override with a subsequent sucess case. + */ +static void tally_up_work(struct kmod_test_device *test_dev) +{ + struct test_config *config = &test_dev->config; + struct kmod_test_device_info *info; + unsigned int idx; + int err_ret = 0; + int ret = 0; + + mutex_lock(&test_dev->thread_mutex); + + dev_info(test_dev->dev, "Results:\n"); + + for (idx=0; idx < config->num_threads; idx++) { + info = &test_dev->info[idx]; + ret = tally_work_test(info); + if (ret) + err_ret = ret; + } + + /* + * Note: request_module() returns 256 for a module not found even + * though modprobe itself returns 1. + */ + config->test_result = err_ret; + + mutex_unlock(&test_dev->thread_mutex); +} + +static int try_one_request(struct kmod_test_device *test_dev, unsigned int idx) +{ + struct kmod_test_device_info *info = &test_dev->info[idx]; + int fail_ret = -ENOMEM; + + mutex_lock(&test_dev->thread_mutex); + + info->thread_idx = idx; + info->test_dev = test_dev; + info->task_sync = kthread_run(run_request, info, "%s-%u", + KBUILD_MODNAME, idx); + + if (!info->task_sync || IS_ERR(info->task_sync)) { + test_dev->test_is_oom = true; + dev_err(test_dev->dev, "Setting up thread %u failed\n", idx); + info->task_sync = NULL; + goto err_out; + } else + dev_dbg(test_dev->dev, "Kicked off thread %u\n", idx); + + mutex_unlock(&test_dev->thread_mutex); + + return 0; + +err_out: + info->ret_sync = fail_ret; + mutex_unlock(&test_dev->thread_mutex); + + return fail_ret; +} + +static void test_dev_kmod_stop_tests(struct kmod_test_device *test_dev) +{ + struct test_config *config = &test_dev->config; + struct kmod_test_device_info *info; + unsigned int i; + + dev_info(test_dev->dev, "Ending request_module() tests\n"); + + mutex_lock(&test_dev->thread_mutex); + + for (i=0; i < config->num_threads; i++) { + info = &test_dev->info[i]; + if (info->task_sync && !IS_ERR(info->task_sync)) { + dev_info(test_dev->dev, + "Stopping still-running thread %i\n", i); + kthread_stop(info->task_sync); + } + + /* + * info->task_sync is well protected, it can only be + * NULL or a pointer to a struct. If its NULL we either + * never ran, or we did and we completed the work. Completed + * tasks *always* put the module for us. This is a sanity + * check -- just in case. + */ + if (info->task_sync && info->need_mod_put) + test_kmod_put_module(info); + } + + mutex_unlock(&test_dev->thread_mutex); +} + +/* + * Only wait *iff* we did not run into any errors during all of our thread + * set up. If run into any issues we stop threads and just bail out with + * an error to the trigger. This also means we don't need any tally work + * for any threads which fail. + */ +static int try_requests(struct kmod_test_device *test_dev) +{ + struct test_config *config = &test_dev->config; + unsigned int idx; + int ret; + bool any_error = false; + + for (idx=0; idx < config->num_threads; idx++) { + if (test_dev->test_is_oom) { + any_error = true; + break; + } + + ret = try_one_request(test_dev, idx); + if (ret) { + any_error = true; + break; + } + } + + if (!any_error) { + test_dev->test_is_oom = false; + dev_info(test_dev->dev, + "No errors were found while initializing threads\n"); + wait_for_completion(&test_dev->kthreads_done); + tally_up_work(test_dev); + } else { + test_dev->test_is_oom = true; + dev_info(test_dev->dev, + "At least one thread failed to start, stop all work\n"); + test_dev_kmod_stop_tests(test_dev); + return -ENOMEM; + } + + return 0; +} + +static int run_test_driver(struct kmod_test_device *test_dev) +{ + struct test_config *config = &test_dev->config; + + dev_info(test_dev->dev, "Test case: %s (%u)\n", + test_case_str(config->test_case), + config->test_case); + dev_info(test_dev->dev, "Test driver to load: %s\n", + config->test_driver); + dev_info(test_dev->dev, "Number of threads to run: %u\n", + config->num_threads); + dev_info(test_dev->dev, "Thread IDs will range from 0 - %u\n", + config->num_threads - 1); + + return try_requests(test_dev); +} + +static int run_test_fs_type(struct kmod_test_device *test_dev) +{ + struct test_config *config = &test_dev->config; + + dev_info(test_dev->dev, "Test case: %s (%u)\n", + test_case_str(config->test_case), + config->test_case); + dev_info(test_dev->dev, "Test filesystem to load: %s\n", + config->test_fs); + dev_info(test_dev->dev, "Number of threads to run: %u\n", + config->num_threads); + dev_info(test_dev->dev, "Thread IDs will range from 0 - %u\n", + config->num_threads - 1); + + return try_requests(test_dev); +} + +static ssize_t config_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct kmod_test_device *test_dev = dev_to_test_dev(dev); + struct test_config *config = &test_dev->config; + int len = 0; + + mutex_lock(&test_dev->config_mutex); + + len += snprintf(buf, PAGE_SIZE, + "Custom trigger configuration for: %s\n", + dev_name(dev)); + + len += snprintf(buf+len, PAGE_SIZE - len, + "Number of threads:\t%u\n", + config->num_threads); + + len += snprintf(buf+len, PAGE_SIZE - len, + "Test_case:\t%s (%u)\n", + test_case_str(config->test_case), + config->test_case); + + if (config->test_driver) + len += snprintf(buf+len, PAGE_SIZE - len, + "driver:\t%s\n", + config->test_driver); + else + len += snprintf(buf+len, PAGE_SIZE - len, + "driver:\tEMTPY\n"); + + if (config->test_fs) + len += snprintf(buf+len, PAGE_SIZE - len, + "fs:\t%s\n", + config->test_fs); + else + len += snprintf(buf+len, PAGE_SIZE - len, + "fs:\tEMTPY\n"); + + mutex_unlock(&test_dev->config_mutex); + + return len; +} +static DEVICE_ATTR_RO(config); + +/* + * This ensures we don't allow kicking threads through if our configuration + * is faulty. + */ +static int __trigger_config_run(struct kmod_test_device *test_dev) +{ + struct test_config *config = &test_dev->config; + + test_dev->done = 0; + + switch (config->test_case) { + case TEST_KMOD_DRIVER: + return run_test_driver(test_dev); + case TEST_KMOD_FS_TYPE: + return run_test_fs_type(test_dev); + default: + dev_warn(test_dev->dev, + "Invalid test case requested: %u\n", + config->test_case); + return -EINVAL; + } +} + +static int trigger_config_run(struct kmod_test_device *test_dev) +{ + struct test_config *config = &test_dev->config; + int ret; + + mutex_lock(&test_dev->trigger_mutex); + mutex_lock(&test_dev->config_mutex); + + ret = __trigger_config_run(test_dev); + if (ret < 0) + goto out; + dev_info(test_dev->dev, "General test result: %d\n", + config->test_result); + + /* + * We must return 0 after a trigger even unless something went + * wrong with the setup of the test. If the test setup went fine + * then userspace must just check the result of config->test_result. + * One issue with relying on the return from a call in the kernel + * is if the kernel returns a possitive value using this trigger + * will not return the value to userspace, it would be lost. + * + * By not relying on capturing the return value of tests we are using + * through the trigger it also us to run tests with set -e and only + * fail when something went wrong with the driver upon trigger + * requests. + */ + ret = 0; + +out: + mutex_unlock(&test_dev->config_mutex); + mutex_unlock(&test_dev->trigger_mutex); + + return ret; +} + +static ssize_t +trigger_config_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct kmod_test_device *test_dev = dev_to_test_dev(dev); + int ret; + + if (test_dev->test_is_oom) + return -ENOMEM; + + /* For all intents and purposes we don't care what userspace + * sent this trigger, we care only that we were triggered. + * We treat the return value only for caputuring issues with + * the test setup. At this point all the test variables should + * have been allocated so typically this should never fail. + */ + ret = trigger_config_run(test_dev); + if (unlikely(ret < 0)) + goto out; + + /* + * Note: any return > 0 will be treated as success + * and the error value will not be available to userspace. + * Do not rely on trying to send to userspace a test value + * return value as possitive return errors will be lost. + */ + if (WARN_ON(ret > 0)) + return -EINVAL; + + ret = count; +out: + return ret; +} +static DEVICE_ATTR_WO(trigger_config); + +/* + * XXX: move to kstrncpy() once merged. + * + * Users should use kfree_const() when freeing these. + */ +static int __kstrncpy(char **dst, const char *name, size_t count, gfp_t gfp) +{ + *dst = kstrndup(name, count, gfp); + if (!*dst) + return -ENOSPC; + return count; +} + +static int config_copy_test_driver_name(struct test_config *config, + const char *name, + size_t count) +{ + return __kstrncpy(&config->test_driver, name, count, GFP_KERNEL); +} + + +static int config_copy_test_fs(struct test_config *config, const char *name, + size_t count) +{ + return __kstrncpy(&config->test_fs, name, count, GFP_KERNEL); +} + +static void __kmod_config_free(struct test_config *config) +{ + if (!config) + return; + + kfree_const(config->test_driver); + config->test_driver = NULL; + + kfree_const(config->test_fs); + config->test_driver = NULL; +} + +static void kmod_config_free(struct kmod_test_device *test_dev) +{ + struct test_config *config; + + if (!test_dev) + return; + + config = &test_dev->config; + + mutex_lock(&test_dev->config_mutex); + __kmod_config_free(config); + mutex_unlock(&test_dev->config_mutex); +} + +static ssize_t config_test_driver_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct kmod_test_device *test_dev = dev_to_test_dev(dev); + struct test_config *config = &test_dev->config; + int copied; + + mutex_lock(&test_dev->config_mutex); + + kfree_const(config->test_driver); + config->test_driver = NULL; + + copied = config_copy_test_driver_name(config, buf, count); + mutex_unlock(&test_dev->config_mutex); + + return copied; +} + +/* + * As per sysfs_kf_seq_show() the buf is max PAGE_SIZE. + */ +static ssize_t config_test_show_str(struct mutex *config_mutex, + char *dst, + char *src) +{ + int len; + + mutex_lock(config_mutex); + len = snprintf(dst, PAGE_SIZE, "%s\n", src); + mutex_unlock(config_mutex); + + return len; +} + +static ssize_t config_test_driver_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct kmod_test_device *test_dev = dev_to_test_dev(dev); + struct test_config *config = &test_dev->config; + + return config_test_show_str(&test_dev->config_mutex, buf, + config->test_driver); +} +static DEVICE_ATTR(config_test_driver, 0644, config_test_driver_show, + config_test_driver_store); + +static ssize_t config_test_fs_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct kmod_test_device *test_dev = dev_to_test_dev(dev); + struct test_config *config = &test_dev->config; + int copied; + + mutex_lock(&test_dev->config_mutex); + + kfree_const(config->test_fs); + config->test_fs = NULL; + + copied = config_copy_test_fs(config, buf, count); + mutex_unlock(&test_dev->config_mutex); + + return copied; +} + +static ssize_t config_test_fs_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct kmod_test_device *test_dev = dev_to_test_dev(dev); + struct test_config *config = &test_dev->config; + + return config_test_show_str(&test_dev->config_mutex, buf, + config->test_fs); +} +static DEVICE_ATTR(config_test_fs, 0644, config_test_fs_show, + config_test_fs_store); + +static int trigger_config_run_type(struct kmod_test_device *test_dev, + enum kmod_test_case test_case, + const char *test_str) +{ + int copied = 0; + struct test_config *config = &test_dev->config; + + mutex_lock(&test_dev->config_mutex); + + switch (test_case) { + case TEST_KMOD_DRIVER: + kfree_const(config->test_driver); + config->test_driver = NULL; + copied = config_copy_test_driver_name(config, test_str, + strlen(test_str)); + break; + case TEST_KMOD_FS_TYPE: + break; + kfree_const(config->test_fs); + config->test_driver = NULL; + copied = config_copy_test_fs(config, test_str, + strlen(test_str)); + default: + mutex_unlock(&test_dev->config_mutex); + return -EINVAL; + } + + config->test_case = test_case; + + mutex_unlock(&test_dev->config_mutex); + + if (copied <= 0 || copied != strlen(test_str)) { + test_dev->test_is_oom = true; + return -ENOMEM; + } + + test_dev->test_is_oom = false; + + return trigger_config_run(test_dev); +} + +static void free_test_dev_info(struct kmod_test_device *test_dev) +{ + vfree(test_dev->info); + test_dev->info = NULL; +} + +static int kmod_config_sync_info(struct kmod_test_device *test_dev) +{ + struct test_config *config = &test_dev->config; + + free_test_dev_info(test_dev); + test_dev->info = vzalloc(config->num_threads * + sizeof(struct kmod_test_device_info)); + if (!test_dev->info) { + dev_err(test_dev->dev, "Cannot alloc test_dev info\n"); + return -ENOMEM; + } + + return 0; +} + +/* + * Old kernels may not have this, if you want to port this code to + * test it on older kernels. + */ +#ifdef get_kmod_umh_limit +static unsigned int kmod_init_test_thread_limit(void) +{ + return get_kmod_umh_limit(); +} +#else +static unsigned int kmod_init_test_thread_limit(void) +{ + return TEST_START_NUM_THREADS; +} +#endif + +static int __kmod_config_init(struct kmod_test_device *test_dev) +{ + struct test_config *config = &test_dev->config; + int ret = -ENOMEM, copied; + + __kmod_config_free(config); + + copied = config_copy_test_driver_name(config, TEST_START_DRIVER, + strlen(TEST_START_DRIVER)); + if (copied != strlen(TEST_START_DRIVER)) + goto err_out; + + copied = config_copy_test_fs(config, TEST_START_TEST_FS, + strlen(TEST_START_TEST_FS)); + if (copied != strlen(TEST_START_TEST_FS)) + goto err_out; + + config->num_threads = kmod_init_test_thread_limit(); + config->test_result = 0; + config->test_case = TEST_START_TEST_CASE; + + ret = kmod_config_sync_info(test_dev); + if (ret) + goto err_out; + + test_dev->test_is_oom = false; + + return 0; + +err_out: + test_dev->test_is_oom = true; + WARN_ON(test_dev->test_is_oom); + + __kmod_config_free(config); + + return ret; +} + +static ssize_t reset_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct kmod_test_device *test_dev = dev_to_test_dev(dev); + int ret; + + mutex_lock(&test_dev->trigger_mutex); + mutex_lock(&test_dev->config_mutex); + + ret = __kmod_config_init(test_dev); + if (ret < 0) { + ret = -ENOMEM; + dev_err(dev, "could not alloc settings for config trigger: %d\n", + ret); + goto out; + } + + dev_info(dev, "reset\n"); + ret = count; + +out: + mutex_unlock(&test_dev->config_mutex); + mutex_unlock(&test_dev->trigger_mutex); + + return ret; +} +static DEVICE_ATTR_WO(reset); + +static int test_dev_config_update_uint_sync(struct kmod_test_device *test_dev, + const char *buf, size_t size, + unsigned int *config, + int (*test_sync)(struct kmod_test_device *test_dev)) +{ + int ret; + long new; + unsigned int old_val; + + ret = kstrtol(buf, 10, &new); + if (ret) + return ret; + + if (new > UINT_MAX) + return -EINVAL; + + mutex_lock(&test_dev->config_mutex); + + old_val = *config; + *(unsigned int *)config = new; + + ret = test_sync(test_dev); + if (ret) { + *(unsigned int *)config = old_val; + + ret = test_sync(test_dev); + WARN_ON(ret); + + mutex_unlock(&test_dev->config_mutex); + return -EINVAL; + } + + mutex_unlock(&test_dev->config_mutex); + /* Always return full write size even if we didn't consume all */ + return size; +} + +static int test_dev_config_update_uint_range(struct kmod_test_device *test_dev, + const char *buf, size_t size, + unsigned int *config, + unsigned int min, + unsigned int max) +{ + int ret; + long new; + + ret = kstrtol(buf, 10, &new); + if (ret) + return ret; + + if (new < min || new > max || new > UINT_MAX) + return -EINVAL; + + mutex_lock(&test_dev->config_mutex); + *config = new; + mutex_unlock(&test_dev->config_mutex); + + /* Always return full write size even if we didn't consume all */ + return size; +} + +static int test_dev_config_update_int(struct kmod_test_device *test_dev, + const char *buf, size_t size, + int *config) +{ + int ret; + long new; + + ret = kstrtol(buf, 10, &new); + if (ret) + return ret; + + if (new > INT_MAX || new < INT_MIN) + return -EINVAL; + + mutex_lock(&test_dev->config_mutex); + *config = new; + mutex_unlock(&test_dev->config_mutex); + /* Always return full write size even if we didn't consume all */ + return size; +} + +static ssize_t test_dev_config_show_int(struct kmod_test_device *test_dev, + char *buf, + int config) +{ + int val; + + mutex_lock(&test_dev->config_mutex); + val = config; + mutex_unlock(&test_dev->config_mutex); + + return snprintf(buf, PAGE_SIZE, "%d\n", val); +} + +static ssize_t test_dev_config_show_uint(struct kmod_test_device *test_dev, + char *buf, + unsigned int config) +{ + unsigned int val; + + mutex_lock(&test_dev->config_mutex); + val = config; + mutex_unlock(&test_dev->config_mutex); + + return snprintf(buf, PAGE_SIZE, "%u\n", val); +} + +static ssize_t test_result_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct kmod_test_device *test_dev = dev_to_test_dev(dev); + struct test_config *config = &test_dev->config; + + return test_dev_config_update_int(test_dev, buf, count, + &config->test_result); +} + +static ssize_t config_num_threads_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct kmod_test_device *test_dev = dev_to_test_dev(dev); + struct test_config *config = &test_dev->config; + + return test_dev_config_update_uint_sync(test_dev, buf, count, + &config->num_threads, + kmod_config_sync_info); +} + +static ssize_t config_num_threads_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct kmod_test_device *test_dev = dev_to_test_dev(dev); + struct test_config *config = &test_dev->config; + + return test_dev_config_show_int(test_dev, buf, config->num_threads); +} +static DEVICE_ATTR(config_num_threads, 0644, config_num_threads_show, + config_num_threads_store); + +static ssize_t config_test_case_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct kmod_test_device *test_dev = dev_to_test_dev(dev); + struct test_config *config = &test_dev->config; + + return test_dev_config_update_uint_range(test_dev, buf, count, + &config->test_case, + __TEST_KMOD_INVALID + 1, + __TEST_KMOD_MAX - 1); +} + +static ssize_t config_test_case_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct kmod_test_device *test_dev = dev_to_test_dev(dev); + struct test_config *config = &test_dev->config; + + return test_dev_config_show_uint(test_dev, buf, config->test_case); +} +static DEVICE_ATTR(config_test_case, 0644, config_test_case_show, + config_test_case_store); + +static ssize_t test_result_show(struct device *dev, + struct device_attribute *attr, + char *buf) +{ + struct kmod_test_device *test_dev = dev_to_test_dev(dev); + struct test_config *config = &test_dev->config; + + return test_dev_config_show_int(test_dev, buf, config->test_result); +} +static DEVICE_ATTR(test_result, 0644, test_result_show, test_result_store); + +#define TEST_KMOD_DEV_ATTR(name) &dev_attr_##name.attr + +static struct attribute *test_dev_attrs[] = { + TEST_KMOD_DEV_ATTR(trigger_config), + TEST_KMOD_DEV_ATTR(config), + TEST_KMOD_DEV_ATTR(reset), + + TEST_KMOD_DEV_ATTR(config_test_driver), + TEST_KMOD_DEV_ATTR(config_test_fs), + TEST_KMOD_DEV_ATTR(config_num_threads), + TEST_KMOD_DEV_ATTR(config_test_case), + TEST_KMOD_DEV_ATTR(test_result), + + NULL, +}; + +ATTRIBUTE_GROUPS(test_dev); + +static int kmod_config_init(struct kmod_test_device *test_dev) +{ + int ret; + + mutex_lock(&test_dev->config_mutex); + ret = __kmod_config_init(test_dev); + mutex_unlock(&test_dev->config_mutex); + + return ret; +} + +static struct kmod_test_device *alloc_test_dev_kmod(int idx) +{ + int ret; + struct kmod_test_device *test_dev; + struct miscdevice *misc_dev; + + test_dev = vzalloc(sizeof(struct kmod_test_device)); + if (!test_dev) { + pr_err("Cannot alloc test_dev\n"); + goto err_out; + } + + mutex_init(&test_dev->config_mutex); + mutex_init(&test_dev->trigger_mutex); + mutex_init(&test_dev->thread_mutex); + + init_completion(&test_dev->kthreads_done); + + ret = kmod_config_init(test_dev); + if (ret < 0) { + pr_err("Cannot alloc kmod_config_init()\n"); + goto err_out_free; + } + + test_dev->dev_idx = idx; + misc_dev = &test_dev->misc_dev; + + misc_dev->minor = MISC_DYNAMIC_MINOR; + misc_dev->name = kasprintf(GFP_KERNEL, "test_kmod%d", idx); + if (!misc_dev->name) { + pr_err("Cannot alloc misc_dev->name\n"); + goto err_out_free_config; + } + misc_dev->groups = test_dev_groups; + + return test_dev; + +err_out_free_config: + free_test_dev_info(test_dev); + kmod_config_free(test_dev); +err_out_free: + vfree(test_dev); + test_dev = NULL; +err_out: + return NULL; +} + +static void free_test_dev_kmod(struct kmod_test_device *test_dev) +{ + if (test_dev) { + kfree_const(test_dev->misc_dev.name); + test_dev->misc_dev.name = NULL; + free_test_dev_info(test_dev); + kmod_config_free(test_dev); + vfree(test_dev); + test_dev = NULL; + } +} + +static struct kmod_test_device *register_test_dev_kmod(void) +{ + struct kmod_test_device *test_dev = NULL; + int ret; + + mutex_unlock(®_dev_mutex); + + /* int should suffice for number of devices, test for wrap */ + if (unlikely(num_test_devs + 1) < 0) { + pr_err("reached limit of number of test devices\n"); + goto out; + } + + test_dev = alloc_test_dev_kmod(num_test_devs); + if (!test_dev) + goto out; + + ret = misc_register(&test_dev->misc_dev); + if (ret) { + pr_err("could not register misc device: %d\n", ret); + free_test_dev_kmod(test_dev); + goto out; + } + + test_dev->dev = test_dev->misc_dev.this_device; + list_add_tail(&test_dev->list, ®_test_devs); + dev_info(test_dev->dev, "interface ready\n"); + + num_test_devs++; + +out: + mutex_unlock(®_dev_mutex); + + return test_dev; + +} + +static int __init test_kmod_init(void) +{ + struct kmod_test_device *test_dev; + int ret; + + test_dev = register_test_dev_kmod(); + if (!test_dev) { + pr_err("Cannot add first test kmod device\n"); + return -ENODEV; + } + + /* + * With some work we might be able to gracefully enable + * testing with this driver built-in, for now this seems + * rather risky. For those willing to try have at it, + * and enable the below. Good luck! If that works, try + * lowering the init level for more fun. + */ + if (force_init_test) { + ret = trigger_config_run_type(test_dev, + TEST_KMOD_DRIVER, "tun"); + if (WARN_ON(ret)) + return ret; + ret = trigger_config_run_type(test_dev, + TEST_KMOD_FS_TYPE, "btrfs"); + if (WARN_ON(ret)) + return ret; + } + + return 0; +} +late_initcall(test_kmod_init); + +static +void unregister_test_dev_kmod(struct kmod_test_device *test_dev) +{ + mutex_lock(&test_dev->trigger_mutex); + mutex_lock(&test_dev->config_mutex); + + test_dev_kmod_stop_tests(test_dev); + + dev_info(test_dev->dev, "removing interface\n"); + misc_deregister(&test_dev->misc_dev); + kfree(&test_dev->misc_dev.name); + + mutex_unlock(&test_dev->config_mutex); + mutex_unlock(&test_dev->trigger_mutex); + + free_test_dev_kmod(test_dev); +} + +static void __exit test_kmod_exit(void) +{ + struct kmod_test_device *test_dev, *tmp; + + mutex_lock(®_dev_mutex); + list_for_each_entry_safe(test_dev, tmp, ®_test_devs, list) { + list_del(&test_dev->list); + unregister_test_dev_kmod(test_dev); + } + mutex_unlock(®_dev_mutex); +} +module_exit(test_kmod_exit); + +MODULE_AUTHOR("Luis R. Rodriguez <mcgrof@kernel.org>"); +MODULE_LICENSE("GPL"); diff --git a/lib/test_sysctl.c b/lib/test_sysctl.c new file mode 100644 index 000000000000..3dd801c1c85b --- /dev/null +++ b/lib/test_sysctl.c @@ -0,0 +1,148 @@ +/* + * proc sysctl test driver + * + * Copyright (C) 2017 Luis R. Rodriguez <mcgrof@kernel.org> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the Free + * Software Foundation; either version 2 of the License, or at your option any + * later version; or, when distributed separately from the Linux kernel or + * when incorporated into other software packages, subject to the following + * license: + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of copyleft-next (version 0.3.1 or later) as published + * at http://copyleft-next.org/. + */ + +/* + * This module provides an interface to the the proc sysctl interfaces. This + * driver requires CONFIG_PROC_SYSCTL. It will not normally be loaded by the + * system unless explicitly requested by name. You can also build this driver + * into your kernel. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/init.h> +#include <linux/list.h> +#include <linux/module.h> +#include <linux/printk.h> +#include <linux/fs.h> +#include <linux/miscdevice.h> +#include <linux/slab.h> +#include <linux/uaccess.h> +#include <linux/async.h> +#include <linux/delay.h> +#include <linux/vmalloc.h> + +static int i_zero; +static int i_one_hundred = 100; + +struct test_sysctl_data { + int int_0001; + int int_0002; + int int_0003[4]; + + unsigned int uint_0001; + + char string_0001[65]; +}; + +static struct test_sysctl_data test_data = { + .int_0001 = 60, + .int_0002 = 1, + + .int_0003[0] = 0, + .int_0003[1] = 1, + .int_0003[2] = 2, + .int_0003[3] = 3, + + .uint_0001 = 314, + + .string_0001 = "(none)", +}; + +/* These are all under /proc/sys/debug/test_sysctl/ */ +static struct ctl_table test_table[] = { + { + .procname = "int_0001", + .data = &test_data.int_0001, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &i_zero, + .extra2 = &i_one_hundred, + }, + { + .procname = "int_0002", + .data = &test_data.int_0002, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "int_0003", + .data = &test_data.int_0003, + .maxlen = sizeof(test_data.int_0003), + .mode = 0644, + .proc_handler = proc_dointvec, + }, + { + .procname = "uint_0001", + .data = &test_data.uint_0001, + .maxlen = sizeof(unsigned int), + .mode = 0644, + .proc_handler = proc_douintvec, + }, + { + .procname = "string_0001", + .data = &test_data.string_0001, + .maxlen = sizeof(test_data.string_0001), + .mode = 0644, + .proc_handler = proc_dostring, + }, + { } +}; + +static struct ctl_table test_sysctl_table[] = { + { + .procname = "test_sysctl", + .maxlen = 0, + .mode = 0555, + .child = test_table, + }, + { } +}; + +static struct ctl_table test_sysctl_root_table[] = { + { + .procname = "debug", + .maxlen = 0, + .mode = 0555, + .child = test_sysctl_table, + }, + { } +}; + +static struct ctl_table_header *test_sysctl_header; + +static int __init test_sysctl_init(void) +{ + test_sysctl_header = register_sysctl_table(test_sysctl_root_table); + if (!test_sysctl_header) + return -ENOMEM; + return 0; +} +late_initcall(test_sysctl_init); + +static void __exit test_sysctl_exit(void) +{ + if (test_sysctl_header) + unregister_sysctl_table(test_sysctl_header); +} + +module_exit(test_sysctl_exit); + +MODULE_AUTHOR("Luis R. Rodriguez <mcgrof@kernel.org>"); +MODULE_LICENSE("GPL"); diff --git a/lib/test_uuid.c b/lib/test_uuid.c index 547d3127a3cf..478c049630b5 100644 --- a/lib/test_uuid.c +++ b/lib/test_uuid.c @@ -11,25 +11,25 @@ struct test_uuid_data { const char *uuid; - uuid_le le; - uuid_be be; + guid_t le; + uuid_t be; }; static const struct test_uuid_data test_uuid_test_data[] = { { .uuid = "c33f4995-3701-450e-9fbf-206a2e98e576", - .le = UUID_LE(0xc33f4995, 0x3701, 0x450e, 0x9f, 0xbf, 0x20, 0x6a, 0x2e, 0x98, 0xe5, 0x76), - .be = UUID_BE(0xc33f4995, 0x3701, 0x450e, 0x9f, 0xbf, 0x20, 0x6a, 0x2e, 0x98, 0xe5, 0x76), + .le = GUID_INIT(0xc33f4995, 0x3701, 0x450e, 0x9f, 0xbf, 0x20, 0x6a, 0x2e, 0x98, 0xe5, 0x76), + .be = UUID_INIT(0xc33f4995, 0x3701, 0x450e, 0x9f, 0xbf, 0x20, 0x6a, 0x2e, 0x98, 0xe5, 0x76), }, { .uuid = "64b4371c-77c1-48f9-8221-29f054fc023b", - .le = UUID_LE(0x64b4371c, 0x77c1, 0x48f9, 0x82, 0x21, 0x29, 0xf0, 0x54, 0xfc, 0x02, 0x3b), - .be = UUID_BE(0x64b4371c, 0x77c1, 0x48f9, 0x82, 0x21, 0x29, 0xf0, 0x54, 0xfc, 0x02, 0x3b), + .le = GUID_INIT(0x64b4371c, 0x77c1, 0x48f9, 0x82, 0x21, 0x29, 0xf0, 0x54, 0xfc, 0x02, 0x3b), + .be = UUID_INIT(0x64b4371c, 0x77c1, 0x48f9, 0x82, 0x21, 0x29, 0xf0, 0x54, 0xfc, 0x02, 0x3b), }, { .uuid = "0cb4ddff-a545-4401-9d06-688af53e7f84", - .le = UUID_LE(0x0cb4ddff, 0xa545, 0x4401, 0x9d, 0x06, 0x68, 0x8a, 0xf5, 0x3e, 0x7f, 0x84), - .be = UUID_BE(0x0cb4ddff, 0xa545, 0x4401, 0x9d, 0x06, 0x68, 0x8a, 0xf5, 0x3e, 0x7f, 0x84), + .le = GUID_INIT(0x0cb4ddff, 0xa545, 0x4401, 0x9d, 0x06, 0x68, 0x8a, 0xf5, 0x3e, 0x7f, 0x84), + .be = UUID_INIT(0x0cb4ddff, 0xa545, 0x4401, 0x9d, 0x06, 0x68, 0x8a, 0xf5, 0x3e, 0x7f, 0x84), }, }; @@ -61,28 +61,28 @@ static void __init test_uuid_failed(const char *prefix, bool wrong, bool be, static void __init test_uuid_test(const struct test_uuid_data *data) { - uuid_le le; - uuid_be be; + guid_t le; + uuid_t be; char buf[48]; /* LE */ total_tests++; - if (uuid_le_to_bin(data->uuid, &le)) + if (guid_parse(data->uuid, &le)) test_uuid_failed("conversion", false, false, data->uuid, NULL); total_tests++; - if (uuid_le_cmp(data->le, le)) { + if (!guid_equal(&data->le, &le)) { sprintf(buf, "%pUl", &le); test_uuid_failed("cmp", false, false, data->uuid, buf); } /* BE */ total_tests++; - if (uuid_be_to_bin(data->uuid, &be)) + if (uuid_parse(data->uuid, &be)) test_uuid_failed("conversion", false, true, data->uuid, NULL); total_tests++; - if (uuid_be_cmp(data->be, be)) { + if (uuid_equal(&data->be, &be)) { sprintf(buf, "%pUb", &be); test_uuid_failed("cmp", false, true, data->uuid, buf); } @@ -90,17 +90,17 @@ static void __init test_uuid_test(const struct test_uuid_data *data) static void __init test_uuid_wrong(const char *data) { - uuid_le le; - uuid_be be; + guid_t le; + uuid_t be; /* LE */ total_tests++; - if (!uuid_le_to_bin(data, &le)) + if (!guid_parse(data, &le)) test_uuid_failed("negative", true, false, data, NULL); /* BE */ total_tests++; - if (!uuid_be_to_bin(data, &be)) + if (!uuid_parse(data, &be)) test_uuid_failed("negative", true, true, data, NULL); } diff --git a/lib/usercopy.c b/lib/usercopy.c index 1b6010a3beb8..f5d9f08ee032 100644 --- a/lib/usercopy.c +++ b/lib/usercopy.c @@ -6,8 +6,11 @@ unsigned long _copy_from_user(void *to, const void __user *from, unsigned long n) { unsigned long res = n; - if (likely(access_ok(VERIFY_READ, from, n))) + might_fault(); + if (likely(access_ok(VERIFY_READ, from, n))) { + kasan_check_write(to, n); res = raw_copy_from_user(to, from, n); + } if (unlikely(res)) memset(to + (n - res), 0, res); return res; @@ -18,8 +21,11 @@ EXPORT_SYMBOL(_copy_from_user); #ifndef INLINE_COPY_TO_USER unsigned long _copy_to_user(void *to, const void __user *from, unsigned long n) { - if (likely(access_ok(VERIFY_WRITE, to, n))) + might_fault(); + if (likely(access_ok(VERIFY_WRITE, to, n))) { + kasan_check_read(from, n); n = raw_copy_to_user(to, from, n); + } return n; } EXPORT_SYMBOL(_copy_to_user); diff --git a/lib/uuid.c b/lib/uuid.c index 37687af77ff8..680b9fb9ba09 100644 --- a/lib/uuid.c +++ b/lib/uuid.c @@ -21,10 +21,13 @@ #include <linux/uuid.h> #include <linux/random.h> -const u8 uuid_le_index[16] = {3,2,1,0,5,4,7,6,8,9,10,11,12,13,14,15}; -EXPORT_SYMBOL(uuid_le_index); -const u8 uuid_be_index[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; -EXPORT_SYMBOL(uuid_be_index); +const guid_t guid_null; +EXPORT_SYMBOL(guid_null); +const uuid_t uuid_null; +EXPORT_SYMBOL(uuid_null); + +const u8 guid_index[16] = {3,2,1,0,5,4,7,6,8,9,10,11,12,13,14,15}; +const u8 uuid_index[16] = {0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15}; /*************************************************************** * Random UUID interface @@ -53,21 +56,21 @@ static void __uuid_gen_common(__u8 b[16]) b[8] = (b[8] & 0x3F) | 0x80; } -void uuid_le_gen(uuid_le *lu) +void guid_gen(guid_t *lu) { __uuid_gen_common(lu->b); /* version 4 : random generation */ lu->b[7] = (lu->b[7] & 0x0F) | 0x40; } -EXPORT_SYMBOL_GPL(uuid_le_gen); +EXPORT_SYMBOL_GPL(guid_gen); -void uuid_be_gen(uuid_be *bu) +void uuid_gen(uuid_t *bu) { __uuid_gen_common(bu->b); /* version 4 : random generation */ bu->b[6] = (bu->b[6] & 0x0F) | 0x40; } -EXPORT_SYMBOL_GPL(uuid_be_gen); +EXPORT_SYMBOL_GPL(uuid_gen); /** * uuid_is_valid - checks if UUID string valid @@ -97,7 +100,7 @@ bool uuid_is_valid(const char *uuid) } EXPORT_SYMBOL(uuid_is_valid); -static int __uuid_to_bin(const char *uuid, __u8 b[16], const u8 ei[16]) +static int __uuid_parse(const char *uuid, __u8 b[16], const u8 ei[16]) { static const u8 si[16] = {0,2,4,6,9,11,14,16,19,21,24,26,28,30,32,34}; unsigned int i; @@ -115,14 +118,14 @@ static int __uuid_to_bin(const char *uuid, __u8 b[16], const u8 ei[16]) return 0; } -int uuid_le_to_bin(const char *uuid, uuid_le *u) +int guid_parse(const char *uuid, guid_t *u) { - return __uuid_to_bin(uuid, u->b, uuid_le_index); + return __uuid_parse(uuid, u->b, guid_index); } -EXPORT_SYMBOL(uuid_le_to_bin); +EXPORT_SYMBOL(guid_parse); -int uuid_be_to_bin(const char *uuid, uuid_be *u) +int uuid_parse(const char *uuid, uuid_t *u) { - return __uuid_to_bin(uuid, u->b, uuid_be_index); + return __uuid_parse(uuid, u->b, uuid_index); } -EXPORT_SYMBOL(uuid_be_to_bin); +EXPORT_SYMBOL(uuid_parse); diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 2d41de3f98a1..86c3385b9eb3 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -31,6 +31,7 @@ #include <linux/dcache.h> #include <linux/cred.h> #include <linux/uuid.h> +#include <linux/of.h> #include <net/addrconf.h> #ifdef CONFIG_BLOCK #include <linux/blkdev.h> @@ -1308,14 +1309,14 @@ char *uuid_string(char *buf, char *end, const u8 *addr, char uuid[UUID_STRING_LEN + 1]; char *p = uuid; int i; - const u8 *index = uuid_be_index; + const u8 *index = uuid_index; bool uc = false; switch (*(++fmt)) { case 'L': uc = true; /* fall-through */ case 'l': - index = uuid_le_index; + index = guid_index; break; case 'B': uc = true; @@ -1470,6 +1471,126 @@ char *flags_string(char *buf, char *end, void *flags_ptr, const char *fmt) return format_flags(buf, end, flags, names); } +static const char *device_node_name_for_depth(const struct device_node *np, int depth) +{ + for ( ; np && depth; depth--) + np = np->parent; + + return kbasename(np->full_name); +} + +static noinline_for_stack +char *device_node_gen_full_name(const struct device_node *np, char *buf, char *end) +{ + int depth; + const struct device_node *parent = np->parent; + static const struct printf_spec strspec = { + .field_width = -1, + .precision = -1, + }; + + /* special case for root node */ + if (!parent) + return string(buf, end, "/", strspec); + + for (depth = 0; parent->parent; depth++) + parent = parent->parent; + + for ( ; depth >= 0; depth--) { + buf = string(buf, end, "/", strspec); + buf = string(buf, end, device_node_name_for_depth(np, depth), + strspec); + } + return buf; +} + +static noinline_for_stack +char *device_node_string(char *buf, char *end, struct device_node *dn, + struct printf_spec spec, const char *fmt) +{ + char tbuf[sizeof("xxxx") + 1]; + const char *p; + int ret; + char *buf_start = buf; + struct property *prop; + bool has_mult, pass; + static const struct printf_spec num_spec = { + .flags = SMALL, + .field_width = -1, + .precision = -1, + .base = 10, + }; + + struct printf_spec str_spec = spec; + str_spec.field_width = -1; + + if (!IS_ENABLED(CONFIG_OF)) + return string(buf, end, "(!OF)", spec); + + if ((unsigned long)dn < PAGE_SIZE) + return string(buf, end, "(null)", spec); + + /* simple case without anything any more format specifiers */ + fmt++; + if (fmt[0] == '\0' || strcspn(fmt,"fnpPFcC") > 0) + fmt = "f"; + + for (pass = false; strspn(fmt,"fnpPFcC"); fmt++, pass = true) { + if (pass) { + if (buf < end) + *buf = ':'; + buf++; + } + + switch (*fmt) { + case 'f': /* full_name */ + buf = device_node_gen_full_name(dn, buf, end); + break; + case 'n': /* name */ + buf = string(buf, end, dn->name, str_spec); + break; + case 'p': /* phandle */ + buf = number(buf, end, (unsigned int)dn->phandle, num_spec); + break; + case 'P': /* path-spec */ + p = kbasename(of_node_full_name(dn)); + if (!p[1]) + p = "/"; + buf = string(buf, end, p, str_spec); + break; + case 'F': /* flags */ + tbuf[0] = of_node_check_flag(dn, OF_DYNAMIC) ? 'D' : '-'; + tbuf[1] = of_node_check_flag(dn, OF_DETACHED) ? 'd' : '-'; + tbuf[2] = of_node_check_flag(dn, OF_POPULATED) ? 'P' : '-'; + tbuf[3] = of_node_check_flag(dn, OF_POPULATED_BUS) ? 'B' : '-'; + tbuf[4] = 0; + buf = string(buf, end, tbuf, str_spec); + break; + case 'c': /* major compatible string */ + ret = of_property_read_string(dn, "compatible", &p); + if (!ret) + buf = string(buf, end, p, str_spec); + break; + case 'C': /* full compatible string */ + has_mult = false; + of_property_for_each_string(dn, "compatible", prop, p) { + if (has_mult) + buf = string(buf, end, ",", str_spec); + buf = string(buf, end, "\"", str_spec); + buf = string(buf, end, p, str_spec); + buf = string(buf, end, "\"", str_spec); + + has_mult = true; + } + break; + default: + break; + } + } + + return widen_string(buf, buf - buf_start, end, spec); +} + int kptr_restrict __read_mostly; /* @@ -1566,6 +1687,16 @@ int kptr_restrict __read_mostly; * p page flags (see struct page) given as pointer to unsigned long * g gfp flags (GFP_* and __GFP_*) given as pointer to gfp_t * v vma flags (VM_*) given as pointer to unsigned long + * - 'O' For a kobject based struct. Must be one of the following: + * - 'OF[fnpPcCF]' For a device tree object + * Without any optional arguments prints the full_name + * f device node full_name + * n device node name + * p device node phandle + * P device node path spec (name + @unit) + * F device node flags + * c major compatible string + * C full compatible string * * ** Please update also Documentation/printk-formats.txt when making changes ** * @@ -1721,6 +1852,11 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, case 'G': return flags_string(buf, end, ptr, fmt); + case 'O': + switch (fmt[1]) { + case 'F': + return device_node_string(buf, end, ptr, spec, fmt + 1); + } } spec.flags |= SMALL; if (spec.field_width == -1) { |