diff options
Diffstat (limited to 'lib')
45 files changed, 2681 insertions, 898 deletions
diff --git a/lib/Kconfig b/lib/Kconfig index 54cf309a92a5..87da53bb1fef 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -13,8 +13,17 @@ config RAID6_PQ config BITREVERSE tristate +config HAVE_ARCH_BITREVERSE + bool + default n + depends on BITREVERSE + help + This option provides an config for the architecture which have instruction + can do bitreverse operation, we use the hardware instruction if the architecture + have this capability. + config RATIONAL - boolean + bool config GENERIC_STRNCPY_FROM_USER bool @@ -39,14 +48,14 @@ config GENERIC_IOMAP select GENERIC_PCI_IOMAP config GENERIC_IO - boolean + bool default n config STMP_DEVICE bool config PERCPU_RWSEM - boolean + bool config ARCH_USE_CMPXCHG_LOCKREF bool @@ -257,7 +266,7 @@ config DECOMPRESS_LZ4 # Generic allocator support is selected if needed # config GENERIC_ALLOCATOR - boolean + bool # # reed solomon support is select'ed if needed @@ -266,16 +275,16 @@ config REED_SOLOMON tristate config REED_SOLOMON_ENC8 - boolean + bool config REED_SOLOMON_DEC8 - boolean + bool config REED_SOLOMON_ENC16 - boolean + bool config REED_SOLOMON_DEC16 - boolean + bool # # BCH support is selected if needed @@ -284,7 +293,7 @@ config BCH tristate config BCH_CONST_PARAMS - boolean + bool help Drivers may select this option to force specific constant values for parameters 'm' (Galois field order) and 't' @@ -320,7 +329,7 @@ config BCH_CONST_T # Textsearch support is select'ed if needed # config TEXTSEARCH - boolean + bool config TEXTSEARCH_KMP tristate @@ -332,10 +341,10 @@ config TEXTSEARCH_FSM tristate config BTREE - boolean + bool config INTERVAL_TREE - boolean + bool help Simple, embeddable, interval-tree. Can find the start of an overlapping range in log(n) time and then iterate over all @@ -363,18 +372,18 @@ config ASSOCIATIVE_ARRAY for more information. config HAS_IOMEM - boolean + bool depends on !NO_IOMEM select GENERIC_IO default y config HAS_IOPORT_MAP - boolean + bool depends on HAS_IOMEM && !NO_IOPORT_MAP default y config HAS_DMA - boolean + bool depends on !NO_DMA default y diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 5f2ce616c046..c5cefb3c009c 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -167,6 +167,17 @@ config DEBUG_INFO_DWARF4 But it significantly improves the success of resolving variables in gdb on optimized code. +config GDB_SCRIPTS + bool "Provide GDB scripts for kernel debugging" + depends on DEBUG_INFO + help + This creates the required links to GDB helper scripts in the + build directory. If you load vmlinux into gdb, the helper + scripts will be automatically imported by gdb as well, and + additional functions are available to analyze a Linux kernel + instance. See Documentation/gdb-kernel-debugging.txt for further + details. + config ENABLE_WARN_DEPRECATED bool "Enable __deprecated logic" default y @@ -636,7 +647,7 @@ config DEBUG_STACKOVERFLOW depends on DEBUG_KERNEL && HAVE_DEBUG_STACKOVERFLOW ---help--- Say Y here if you want to check for overflows of kernel, IRQ - and exception stacks (if your archicture uses them). This + and exception stacks (if your architecture uses them). This option will show detailed messages if free stack space drops below a certain limit. @@ -651,6 +662,8 @@ config DEBUG_STACKOVERFLOW source "lib/Kconfig.kmemcheck" +source "lib/Kconfig.kasan" + endmenu # "Memory Debugging" config DEBUG_SHIRQ @@ -1215,6 +1228,7 @@ config RCU_TORTURE_TEST tristate "torture tests for RCU" depends on DEBUG_KERNEL select TORTURE_TEST + select SRCU default n help This option provides a kernel module that runs torture tests @@ -1257,7 +1271,7 @@ config RCU_CPU_STALL_TIMEOUT config RCU_CPU_STALL_INFO bool "Print additional diagnostics on RCU CPU stall" depends on (TREE_RCU || PREEMPT_RCU) && DEBUG_KERNEL - default n + default y help For each stalled CPU that is aware of the current RCU grace period, print out additional per-CPU diagnostic information @@ -1579,6 +1593,9 @@ config ASYNC_RAID6_TEST If unsure, say N. +config TEST_HEXDUMP + tristate "Test functions located in the hexdump module at runtime" + config TEST_STRING_HELPERS tristate "Test functions located in the string_helpers module at runtime" @@ -1586,7 +1603,7 @@ config TEST_KSTRTOX tristate "Test kstrto*() family of functions at runtime" config TEST_RHASHTABLE - bool "Perform selftest on resizable hash table" + tristate "Perform selftest on resizable hash table" default n help Enable this option to test the rhashtable functions at boot. diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan new file mode 100644 index 000000000000..4fecaedc80a2 --- /dev/null +++ b/lib/Kconfig.kasan @@ -0,0 +1,54 @@ +config HAVE_ARCH_KASAN + bool + +if HAVE_ARCH_KASAN + +config KASAN + bool "KASan: runtime memory debugger" + depends on SLUB_DEBUG + select CONSTRUCTORS + help + Enables kernel address sanitizer - runtime memory debugger, + designed to find out-of-bounds accesses and use-after-free bugs. + This is strictly debugging feature. It consumes about 1/8 + of available memory and brings about ~x3 performance slowdown. + For better error detection enable CONFIG_STACKTRACE, + and add slub_debug=U to boot cmdline. + +config KASAN_SHADOW_OFFSET + hex + default 0xdffffc0000000000 if X86_64 + +choice + prompt "Instrumentation type" + depends on KASAN + default KASAN_OUTLINE + +config KASAN_OUTLINE + bool "Outline instrumentation" + help + Before every memory access compiler insert function call + __asan_load*/__asan_store*. These functions performs check + of shadow memory. This is slower than inline instrumentation, + however it doesn't bloat size of kernel's .text section so + much as inline does. + +config KASAN_INLINE + bool "Inline instrumentation" + help + Compiler directly inserts code checking shadow memory before + memory accesses. This is faster than outline (in some workloads + it gives about x2 boost over outline instrumentation), but + make kernel's .text size much bigger. + +endchoice + +config TEST_KASAN + tristate "Module for testing kasan for bug detection" + depends on m && KASAN + help + This is a test module doing various nasty things like + out of bounds accesses, use after free. It is useful for testing + kernel debugging features like kernel address sanitizer. + +endif diff --git a/lib/Makefile b/lib/Makefile index 3c3b30b9e020..58f74d2dd396 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -4,7 +4,7 @@ ifdef CONFIG_FUNCTION_TRACER ORIG_CFLAGS := $(KBUILD_CFLAGS) -KBUILD_CFLAGS = $(subst -pg,,$(ORIG_CFLAGS)) +KBUILD_CFLAGS = $(subst $(CC_FLAGS_FTRACE),,$(ORIG_CFLAGS)) endif lib-y := ctype.o string.o vsprintf.o cmdline.o \ @@ -23,18 +23,22 @@ lib-y += kobject.o klist.o obj-y += lockref.o obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ - bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ - gcd.o lcm.o list_sort.o uuid.o flex_array.o iovec.o clz_ctz.o \ + bust_spinlocks.o kasprintf.o bitmap.o scatterlist.o \ + gcd.o lcm.o list_sort.o uuid.o flex_array.o iov_iter.o clz_ctz.o \ bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \ percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o obj-y += string_helpers.o obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o +obj-y += hexdump.o +obj-$(CONFIG_TEST_HEXDUMP) += test-hexdump.o obj-y += kstrtox.o +obj-$(CONFIG_TEST_BPF) += test_bpf.o +obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o +obj-$(CONFIG_TEST_KASAN) += test_kasan.o obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o obj-$(CONFIG_TEST_LKM) += test_module.o +obj-$(CONFIG_TEST_RHASHTABLE) += test_rhashtable.o obj-$(CONFIG_TEST_USER_COPY) += test_user_copy.o -obj-$(CONFIG_TEST_BPF) += test_bpf.o -obj-$(CONFIG_TEST_FIRMWARE) += test_firmware.o ifeq ($(CONFIG_DEBUG_KOBJECT),y) CFLAGS_kobject.o += -DDEBUG diff --git a/lib/bitmap.c b/lib/bitmap.c index 324ea9eab8c1..d456f4c15a9f 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -104,18 +104,18 @@ EXPORT_SYMBOL(__bitmap_complement); * @dst : destination bitmap * @src : source bitmap * @shift : shift by this many bits - * @bits : bitmap size, in bits + * @nbits : bitmap size, in bits * * Shifting right (dividing) means moving bits in the MS -> LS bit * direction. Zeros are fed into the vacated MS positions and the * LS bits shifted off the bottom are lost. */ -void __bitmap_shift_right(unsigned long *dst, - const unsigned long *src, int shift, int bits) +void __bitmap_shift_right(unsigned long *dst, const unsigned long *src, + unsigned shift, unsigned nbits) { - int k, lim = BITS_TO_LONGS(bits), left = bits % BITS_PER_LONG; - int off = shift/BITS_PER_LONG, rem = shift % BITS_PER_LONG; - unsigned long mask = (1UL << left) - 1; + unsigned k, lim = BITS_TO_LONGS(nbits); + unsigned off = shift/BITS_PER_LONG, rem = shift % BITS_PER_LONG; + unsigned long mask = BITMAP_LAST_WORD_MASK(nbits); for (k = 0; off + k < lim; ++k) { unsigned long upper, lower; @@ -127,17 +127,15 @@ void __bitmap_shift_right(unsigned long *dst, upper = 0; else { upper = src[off + k + 1]; - if (off + k + 1 == lim - 1 && left) + if (off + k + 1 == lim - 1) upper &= mask; + upper <<= (BITS_PER_LONG - rem); } lower = src[off + k]; - if (left && off + k == lim - 1) + if (off + k == lim - 1) lower &= mask; - dst[k] = lower >> rem; - if (rem) - dst[k] |= upper << (BITS_PER_LONG - rem); - if (left && k == lim - 1) - dst[k] &= mask; + lower >>= rem; + dst[k] = lower | upper; } if (off) memset(&dst[lim - off], 0, off*sizeof(unsigned long)); @@ -150,18 +148,19 @@ EXPORT_SYMBOL(__bitmap_shift_right); * @dst : destination bitmap * @src : source bitmap * @shift : shift by this many bits - * @bits : bitmap size, in bits + * @nbits : bitmap size, in bits * * Shifting left (multiplying) means moving bits in the LS -> MS * direction. Zeros are fed into the vacated LS bit positions * and those MS bits shifted off the top are lost. */ -void __bitmap_shift_left(unsigned long *dst, - const unsigned long *src, int shift, int bits) +void __bitmap_shift_left(unsigned long *dst, const unsigned long *src, + unsigned int shift, unsigned int nbits) { - int k, lim = BITS_TO_LONGS(bits), left = bits % BITS_PER_LONG; - int off = shift/BITS_PER_LONG, rem = shift % BITS_PER_LONG; + int k; + unsigned int lim = BITS_TO_LONGS(nbits); + unsigned int off = shift/BITS_PER_LONG, rem = shift % BITS_PER_LONG; for (k = lim - off - 1; k >= 0; --k) { unsigned long upper, lower; @@ -170,17 +169,11 @@ void __bitmap_shift_left(unsigned long *dst, * word below and make them the bottom rem bits of result. */ if (rem && k > 0) - lower = src[k - 1]; + lower = src[k - 1] >> (BITS_PER_LONG - rem); else lower = 0; - upper = src[k]; - if (left && k == lim - 1) - upper &= (1UL << left) - 1; - dst[k + off] = upper << rem; - if (rem) - dst[k + off] |= lower >> (BITS_PER_LONG - rem); - if (left && k + off == lim - 1) - dst[k + off] &= (1UL << left) - 1; + upper = src[k] << rem; + dst[k + off] = lower | upper; } if (off) memset(dst, 0, off*sizeof(unsigned long)); @@ -377,45 +370,6 @@ EXPORT_SYMBOL(bitmap_find_next_zero_area_off); #define BASEDEC 10 /* fancier cpuset lists input in decimal */ /** - * bitmap_scnprintf - convert bitmap to an ASCII hex string. - * @buf: byte buffer into which string is placed - * @buflen: reserved size of @buf, in bytes - * @maskp: pointer to bitmap to convert - * @nmaskbits: size of bitmap, in bits - * - * Exactly @nmaskbits bits are displayed. Hex digits are grouped into - * comma-separated sets of eight digits per set. Returns the number of - * characters which were written to *buf, excluding the trailing \0. - */ -int bitmap_scnprintf(char *buf, unsigned int buflen, - const unsigned long *maskp, int nmaskbits) -{ - int i, word, bit, len = 0; - unsigned long val; - const char *sep = ""; - int chunksz; - u32 chunkmask; - - chunksz = nmaskbits & (CHUNKSZ - 1); - if (chunksz == 0) - chunksz = CHUNKSZ; - - i = ALIGN(nmaskbits, CHUNKSZ) - CHUNKSZ; - for (; i >= 0; i -= CHUNKSZ) { - chunkmask = ((1ULL << chunksz) - 1); - word = i / BITS_PER_LONG; - bit = i % BITS_PER_LONG; - val = (maskp[word] >> bit) & chunkmask; - len += scnprintf(buf+len, buflen-len, "%s%0*lx", sep, - (chunksz+3)/4, val); - chunksz = CHUNKSZ; - sep = ","; - } - return len; -} -EXPORT_SYMBOL(bitmap_scnprintf); - -/** * __bitmap_parse - convert an ASCII hex string into a bitmap. * @buf: pointer to buffer containing string. * @buflen: buffer size in bytes. If string is smaller than this @@ -528,65 +482,6 @@ int bitmap_parse_user(const char __user *ubuf, } EXPORT_SYMBOL(bitmap_parse_user); -/* - * bscnl_emit(buf, buflen, rbot, rtop, bp) - * - * Helper routine for bitmap_scnlistprintf(). Write decimal number - * or range to buf, suppressing output past buf+buflen, with optional - * comma-prefix. Return len of what was written to *buf, excluding the - * trailing \0. - */ -static inline int bscnl_emit(char *buf, int buflen, int rbot, int rtop, int len) -{ - if (len > 0) - len += scnprintf(buf + len, buflen - len, ","); - if (rbot == rtop) - len += scnprintf(buf + len, buflen - len, "%d", rbot); - else - len += scnprintf(buf + len, buflen - len, "%d-%d", rbot, rtop); - return len; -} - -/** - * bitmap_scnlistprintf - convert bitmap to list format ASCII string - * @buf: byte buffer into which string is placed - * @buflen: reserved size of @buf, in bytes - * @maskp: pointer to bitmap to convert - * @nmaskbits: size of bitmap, in bits - * - * Output format is a comma-separated list of decimal numbers and - * ranges. Consecutively set bits are shown as two hyphen-separated - * decimal numbers, the smallest and largest bit numbers set in - * the range. Output format is compatible with the format - * accepted as input by bitmap_parselist(). - * - * The return value is the number of characters which were written to *buf - * excluding the trailing '\0', as per ISO C99's scnprintf. - */ -int bitmap_scnlistprintf(char *buf, unsigned int buflen, - const unsigned long *maskp, int nmaskbits) -{ - int len = 0; - /* current bit is 'cur', most recently seen range is [rbot, rtop] */ - int cur, rbot, rtop; - - if (buflen == 0) - return 0; - buf[0] = 0; - - rbot = cur = find_first_bit(maskp, nmaskbits); - while (cur < nmaskbits) { - rtop = cur; - cur = find_next_bit(maskp, nmaskbits, cur+1); - if (cur >= nmaskbits || cur > rtop + 1) { - len = bscnl_emit(buf, buflen, rbot, rtop, len); - rbot = cur; - } - } - return len; -} -EXPORT_SYMBOL(bitmap_scnlistprintf); - /** * bitmap_print_to_pagebuf - convert bitmap to list or hex format ASCII string * @list: indicates whether the bitmap must be list @@ -605,8 +500,8 @@ int bitmap_print_to_pagebuf(bool list, char *buf, const unsigned long *maskp, int n = 0; if (len > 1) { - n = list ? bitmap_scnlistprintf(buf, len, maskp, nmaskbits) : - bitmap_scnprintf(buf, len, maskp, nmaskbits); + n = list ? scnprintf(buf, len, "%*pbl", nmaskbits, maskp) : + scnprintf(buf, len, "%*pb", nmaskbits, maskp); buf[n++] = '\n'; buf[n] = '\0'; } @@ -744,10 +639,10 @@ EXPORT_SYMBOL(bitmap_parselist_user); /** * bitmap_pos_to_ord - find ordinal of set bit at given position in bitmap * @buf: pointer to a bitmap - * @pos: a bit position in @buf (0 <= @pos < @bits) - * @bits: number of valid bit positions in @buf + * @pos: a bit position in @buf (0 <= @pos < @nbits) + * @nbits: number of valid bit positions in @buf * - * Map the bit at position @pos in @buf (of length @bits) to the + * Map the bit at position @pos in @buf (of length @nbits) to the * ordinal of which set bit it is. If it is not set or if @pos * is not a valid bit position, map to -1. * @@ -759,56 +654,40 @@ EXPORT_SYMBOL(bitmap_parselist_user); * * The bit positions 0 through @bits are valid positions in @buf. */ -static int bitmap_pos_to_ord(const unsigned long *buf, int pos, int bits) +static int bitmap_pos_to_ord(const unsigned long *buf, unsigned int pos, unsigned int nbits) { - int i, ord; - - if (pos < 0 || pos >= bits || !test_bit(pos, buf)) + if (pos >= nbits || !test_bit(pos, buf)) return -1; - i = find_first_bit(buf, bits); - ord = 0; - while (i < pos) { - i = find_next_bit(buf, bits, i + 1); - ord++; - } - BUG_ON(i != pos); - - return ord; + return __bitmap_weight(buf, pos); } /** * bitmap_ord_to_pos - find position of n-th set bit in bitmap * @buf: pointer to bitmap * @ord: ordinal bit position (n-th set bit, n >= 0) - * @bits: number of valid bit positions in @buf + * @nbits: number of valid bit positions in @buf * * Map the ordinal offset of bit @ord in @buf to its position in @buf. - * Value of @ord should be in range 0 <= @ord < weight(buf), else - * results are undefined. + * Value of @ord should be in range 0 <= @ord < weight(buf). If @ord + * >= weight(buf), returns @nbits. * * If for example, just bits 4 through 7 are set in @buf, then @ord * values 0 through 3 will get mapped to 4 through 7, respectively, - * and all other @ord values return undefined values. When @ord value 3 + * and all other @ord values returns @nbits. When @ord value 3 * gets mapped to (returns) @pos value 7 in this example, that means * that the 3rd set bit (starting with 0th) is at position 7 in @buf. * - * The bit positions 0 through @bits are valid positions in @buf. + * The bit positions 0 through @nbits-1 are valid positions in @buf. */ -int bitmap_ord_to_pos(const unsigned long *buf, int ord, int bits) +unsigned int bitmap_ord_to_pos(const unsigned long *buf, unsigned int ord, unsigned int nbits) { - int pos = 0; - - if (ord >= 0 && ord < bits) { - int i; + unsigned int pos; - for (i = find_first_bit(buf, bits); - i < bits && ord > 0; - i = find_next_bit(buf, bits, i + 1)) - ord--; - if (i < bits && ord == 0) - pos = i; - } + for (pos = find_first_bit(buf, nbits); + pos < nbits && ord; + pos = find_next_bit(buf, nbits, pos + 1)) + ord--; return pos; } @@ -819,7 +698,7 @@ int bitmap_ord_to_pos(const unsigned long *buf, int ord, int bits) * @src: subset to be remapped * @old: defines domain of map * @new: defines range of map - * @bits: number of bits in each of these bitmaps + * @nbits: number of bits in each of these bitmaps * * Let @old and @new define a mapping of bit positions, such that * whatever position is held by the n-th set bit in @old is mapped @@ -847,22 +726,22 @@ int bitmap_ord_to_pos(const unsigned long *buf, int ord, int bits) */ void bitmap_remap(unsigned long *dst, const unsigned long *src, const unsigned long *old, const unsigned long *new, - int bits) + unsigned int nbits) { - int oldbit, w; + unsigned int oldbit, w; if (dst == src) /* following doesn't handle inplace remaps */ return; - bitmap_zero(dst, bits); + bitmap_zero(dst, nbits); - w = bitmap_weight(new, bits); - for_each_set_bit(oldbit, src, bits) { - int n = bitmap_pos_to_ord(old, oldbit, bits); + w = bitmap_weight(new, nbits); + for_each_set_bit(oldbit, src, nbits) { + int n = bitmap_pos_to_ord(old, oldbit, nbits); if (n < 0 || w == 0) set_bit(oldbit, dst); /* identity map */ else - set_bit(bitmap_ord_to_pos(new, n % w, bits), dst); + set_bit(bitmap_ord_to_pos(new, n % w, nbits), dst); } } EXPORT_SYMBOL(bitmap_remap); @@ -1006,9 +885,9 @@ EXPORT_SYMBOL(bitmap_bitremap); * All bits in @dst not set by the above rule are cleared. */ void bitmap_onto(unsigned long *dst, const unsigned long *orig, - const unsigned long *relmap, int bits) + const unsigned long *relmap, unsigned int bits) { - int n, m; /* same meaning as in above comment */ + unsigned int n, m; /* same meaning as in above comment */ if (dst == orig) /* following doesn't handle inplace mappings */ return; @@ -1039,22 +918,22 @@ EXPORT_SYMBOL(bitmap_onto); * @dst: resulting smaller bitmap * @orig: original larger bitmap * @sz: specified size - * @bits: number of bits in each of these bitmaps + * @nbits: number of bits in each of these bitmaps * * For each bit oldbit in @orig, set bit oldbit mod @sz in @dst. * Clear all other bits in @dst. See further the comment and * Example [2] for bitmap_onto() for why and how to use this. */ void bitmap_fold(unsigned long *dst, const unsigned long *orig, - int sz, int bits) + unsigned int sz, unsigned int nbits) { - int oldbit; + unsigned int oldbit; if (dst == orig) /* following doesn't handle inplace mappings */ return; - bitmap_zero(dst, bits); + bitmap_zero(dst, nbits); - for_each_set_bit(oldbit, orig, bits) + for_each_set_bit(oldbit, orig, nbits) set_bit(oldbit % sz, dst); } EXPORT_SYMBOL(bitmap_fold); @@ -1207,16 +1086,17 @@ EXPORT_SYMBOL(bitmap_allocate_region); * * Require nbits % BITS_PER_LONG == 0. */ -void bitmap_copy_le(void *dst, const unsigned long *src, int nbits) +#ifdef __BIG_ENDIAN +void bitmap_copy_le(unsigned long *dst, const unsigned long *src, unsigned int nbits) { - unsigned long *d = dst; - int i; + unsigned int i; for (i = 0; i < nbits/BITS_PER_LONG; i++) { if (BITS_PER_LONG == 64) - d[i] = cpu_to_le64(src[i]); + dst[i] = cpu_to_le64(src[i]); else - d[i] = cpu_to_le32(src[i]); + dst[i] = cpu_to_le32(src[i]); } } EXPORT_SYMBOL(bitmap_copy_le); +#endif diff --git a/lib/bitrev.c b/lib/bitrev.c index 3956203456d4..40ffda94cc5d 100644 --- a/lib/bitrev.c +++ b/lib/bitrev.c @@ -1,3 +1,4 @@ +#ifndef CONFIG_HAVE_ARCH_BITREVERSE #include <linux/types.h> #include <linux/module.h> #include <linux/bitrev.h> @@ -42,18 +43,4 @@ const u8 byte_rev_table[256] = { }; EXPORT_SYMBOL_GPL(byte_rev_table); -u16 bitrev16(u16 x) -{ - return (bitrev8(x & 0xff) << 8) | bitrev8(x >> 8); -} -EXPORT_SYMBOL(bitrev16); - -/** - * bitrev32 - reverse the order of bits in a u32 value - * @x: value to be bit-reversed - */ -u32 bitrev32(u32 x) -{ - return (bitrev16(x & 0xffff) << 16) | bitrev16(x >> 16); -} -EXPORT_SYMBOL(bitrev32); +#endif /* CONFIG_HAVE_ARCH_BITREVERSE */ diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index 527799d44476..d8f3d3150603 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -641,7 +641,7 @@ static __init int ddebug_setup_query(char *str) __setup("ddebug_query=", ddebug_setup_query); /* - * File_ops->write method for <debugfs>/dynamic_debug/conrol. Gathers the + * File_ops->write method for <debugfs>/dynamic_debug/control. Gathers the * command text from userspace, parses and executes it. */ #define USER_BUF_PAGE 4096 diff --git a/lib/dynamic_queue_limits.c b/lib/dynamic_queue_limits.c index 0777c5a45fa0..f346715e2255 100644 --- a/lib/dynamic_queue_limits.c +++ b/lib/dynamic_queue_limits.c @@ -3,12 +3,12 @@ * * Copyright (c) 2011, Tom Herbert <therbert@google.com> */ -#include <linux/module.h> #include <linux/types.h> -#include <linux/ctype.h> #include <linux/kernel.h> #include <linux/jiffies.h> #include <linux/dynamic_queue_limits.h> +#include <linux/compiler.h> +#include <linux/export.h> #define POSDIFF(A, B) ((int)((A) - (B)) > 0 ? (A) - (B) : 0) #define AFTER_EQ(A, B) ((int)((A) - (B)) >= 0) diff --git a/lib/gen_crc32table.c b/lib/gen_crc32table.c index 71fcfcd96410..d83a372fa76f 100644 --- a/lib/gen_crc32table.c +++ b/lib/gen_crc32table.c @@ -109,7 +109,7 @@ int main(int argc, char** argv) if (CRC_LE_BITS > 1) { crc32init_le(); - printf("static u32 __cacheline_aligned " + printf("static const u32 ____cacheline_aligned " "crc32table_le[%d][%d] = {", LE_TABLE_ROWS, LE_TABLE_SIZE); output_table(crc32table_le, LE_TABLE_ROWS, @@ -119,7 +119,7 @@ int main(int argc, char** argv) if (CRC_BE_BITS > 1) { crc32init_be(); - printf("static u32 __cacheline_aligned " + printf("static const u32 ____cacheline_aligned " "crc32table_be[%d][%d] = {", BE_TABLE_ROWS, BE_TABLE_SIZE); output_table(crc32table_be, LE_TABLE_ROWS, @@ -128,7 +128,7 @@ int main(int argc, char** argv) } if (CRC_LE_BITS > 1) { crc32cinit_le(); - printf("static u32 __cacheline_aligned " + printf("static const u32 ____cacheline_aligned " "crc32ctable_le[%d][%d] = {", LE_TABLE_ROWS, LE_TABLE_SIZE); output_table(crc32ctable_le, LE_TABLE_ROWS, diff --git a/lib/genalloc.c b/lib/genalloc.c index 2e65d206b01c..d214866eeea2 100644 --- a/lib/genalloc.c +++ b/lib/genalloc.c @@ -34,7 +34,6 @@ #include <linux/rculist.h> #include <linux/interrupt.h> #include <linux/genalloc.h> -#include <linux/of_address.h> #include <linux/of_device.h> static inline size_t chunk_size(const struct gen_pool_chunk *chunk) @@ -415,7 +414,7 @@ bool addr_in_gen_pool(struct gen_pool *pool, unsigned long start, size_t size) { bool found = false; - unsigned long end = start + size; + unsigned long end = start + size - 1; struct gen_pool_chunk *chunk; rcu_read_lock(); @@ -587,6 +586,8 @@ struct gen_pool *devm_gen_pool_create(struct device *dev, int min_alloc_order, struct gen_pool **ptr, *pool; ptr = devres_alloc(devm_gen_pool_release, sizeof(*ptr), GFP_KERNEL); + if (!ptr) + return NULL; pool = gen_pool_create(min_alloc_order, nid); if (pool) { diff --git a/lib/halfmd4.c b/lib/halfmd4.c index 66d0ee8b7776..a8fe6274a13c 100644 --- a/lib/halfmd4.c +++ b/lib/halfmd4.c @@ -1,4 +1,4 @@ -#include <linux/kernel.h> +#include <linux/compiler.h> #include <linux/export.h> #include <linux/cryptohash.h> diff --git a/lib/hexdump.c b/lib/hexdump.c index 270773b91923..7ea09699855d 100644 --- a/lib/hexdump.c +++ b/lib/hexdump.c @@ -97,63 +97,79 @@ EXPORT_SYMBOL(bin2hex); * * example output buffer: * 40 41 42 43 44 45 46 47 48 49 4a 4b 4c 4d 4e 4f @ABCDEFGHIJKLMNO + * + * Return: + * The amount of bytes placed in the buffer without terminating NUL. If the + * output was truncated, then the return value is the number of bytes + * (excluding the terminating NUL) which would have been written to the final + * string if enough space had been available. */ -void hex_dump_to_buffer(const void *buf, size_t len, int rowsize, - int groupsize, char *linebuf, size_t linebuflen, - bool ascii) +int hex_dump_to_buffer(const void *buf, size_t len, int rowsize, int groupsize, + char *linebuf, size_t linebuflen, bool ascii) { const u8 *ptr = buf; + int ngroups; u8 ch; int j, lx = 0; int ascii_column; + int ret; if (rowsize != 16 && rowsize != 32) rowsize = 16; - if (!len) - goto nil; if (len > rowsize) /* limit to one line at a time */ len = rowsize; + if (!is_power_of_2(groupsize) || groupsize > 8) + groupsize = 1; if ((len % groupsize) != 0) /* no mixed size output */ groupsize = 1; - switch (groupsize) { - case 8: { - const u64 *ptr8 = buf; - int ngroups = len / groupsize; + ngroups = len / groupsize; + ascii_column = rowsize * 2 + rowsize / groupsize + 1; - for (j = 0; j < ngroups; j++) - lx += scnprintf(linebuf + lx, linebuflen - lx, - "%s%16.16llx", j ? " " : "", - (unsigned long long)*(ptr8 + j)); - ascii_column = 17 * ngroups + 2; - break; - } + if (!linebuflen) + goto overflow1; - case 4: { - const u32 *ptr4 = buf; - int ngroups = len / groupsize; + if (!len) + goto nil; - for (j = 0; j < ngroups; j++) - lx += scnprintf(linebuf + lx, linebuflen - lx, - "%s%8.8x", j ? " " : "", *(ptr4 + j)); - ascii_column = 9 * ngroups + 2; - break; - } + if (groupsize == 8) { + const u64 *ptr8 = buf; - case 2: { - const u16 *ptr2 = buf; - int ngroups = len / groupsize; + for (j = 0; j < ngroups; j++) { + ret = snprintf(linebuf + lx, linebuflen - lx, + "%s%16.16llx", j ? " " : "", + (unsigned long long)*(ptr8 + j)); + if (ret >= linebuflen - lx) + goto overflow1; + lx += ret; + } + } else if (groupsize == 4) { + const u32 *ptr4 = buf; - for (j = 0; j < ngroups; j++) - lx += scnprintf(linebuf + lx, linebuflen - lx, - "%s%4.4x", j ? " " : "", *(ptr2 + j)); - ascii_column = 5 * ngroups + 2; - break; - } + for (j = 0; j < ngroups; j++) { + ret = snprintf(linebuf + lx, linebuflen - lx, + "%s%8.8x", j ? " " : "", + *(ptr4 + j)); + if (ret >= linebuflen - lx) + goto overflow1; + lx += ret; + } + } else if (groupsize == 2) { + const u16 *ptr2 = buf; - default: - for (j = 0; (j < len) && (lx + 3) <= linebuflen; j++) { + for (j = 0; j < ngroups; j++) { + ret = snprintf(linebuf + lx, linebuflen - lx, + "%s%4.4x", j ? " " : "", + *(ptr2 + j)); + if (ret >= linebuflen - lx) + goto overflow1; + lx += ret; + } + } else { + for (j = 0; j < len; j++) { + if (linebuflen < lx + 3) + goto overflow2; ch = ptr[j]; linebuf[lx++] = hex_asc_hi(ch); linebuf[lx++] = hex_asc_lo(ch); @@ -161,21 +177,28 @@ void hex_dump_to_buffer(const void *buf, size_t len, int rowsize, } if (j) lx--; - - ascii_column = 3 * rowsize + 2; - break; } if (!ascii) goto nil; - while (lx < (linebuflen - 1) && lx < (ascii_column - 1)) + while (lx < ascii_column) { + if (linebuflen < lx + 2) + goto overflow2; linebuf[lx++] = ' '; - for (j = 0; (j < len) && (lx + 2) < linebuflen; j++) { + } + for (j = 0; j < len; j++) { + if (linebuflen < lx + 2) + goto overflow2; ch = ptr[j]; linebuf[lx++] = (isascii(ch) && isprint(ch)) ? ch : '.'; } nil: + linebuf[lx] = '\0'; + return lx; +overflow2: linebuf[lx++] = '\0'; +overflow1: + return ascii ? ascii_column + len : (groupsize * 2 + 1) * ngroups - 1; } EXPORT_SYMBOL(hex_dump_to_buffer); diff --git a/lib/idr.c b/lib/idr.c index e654aebd5f80..5335c43adf46 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -30,7 +30,6 @@ #include <linux/idr.h> #include <linux/spinlock.h> #include <linux/percpu.h> -#include <linux/hardirq.h> #define MAX_IDR_SHIFT (sizeof(int) * 8 - 1) #define MAX_IDR_BIT (1U << MAX_IDR_SHIFT) diff --git a/lib/interval_tree.c b/lib/interval_tree.c index f367f9ad544c..c85f6600a5f8 100644 --- a/lib/interval_tree.c +++ b/lib/interval_tree.c @@ -1,7 +1,7 @@ -#include <linux/init.h> #include <linux/interval_tree.h> #include <linux/interval_tree_generic.h> -#include <linux/module.h> +#include <linux/compiler.h> +#include <linux/export.h> #define START(node) ((node)->start) #define LAST(node) ((node)->last) diff --git a/lib/iov_iter.c b/lib/iov_iter.c new file mode 100644 index 000000000000..9d96e283520c --- /dev/null +++ b/lib/iov_iter.c @@ -0,0 +1,768 @@ +#include <linux/export.h> +#include <linux/uio.h> +#include <linux/pagemap.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <net/checksum.h> + +#define iterate_iovec(i, n, __v, __p, skip, STEP) { \ + size_t left; \ + size_t wanted = n; \ + __p = i->iov; \ + __v.iov_len = min(n, __p->iov_len - skip); \ + if (likely(__v.iov_len)) { \ + __v.iov_base = __p->iov_base + skip; \ + left = (STEP); \ + __v.iov_len -= left; \ + skip += __v.iov_len; \ + n -= __v.iov_len; \ + } else { \ + left = 0; \ + } \ + while (unlikely(!left && n)) { \ + __p++; \ + __v.iov_len = min(n, __p->iov_len); \ + if (unlikely(!__v.iov_len)) \ + continue; \ + __v.iov_base = __p->iov_base; \ + left = (STEP); \ + __v.iov_len -= left; \ + skip = __v.iov_len; \ + n -= __v.iov_len; \ + } \ + n = wanted - n; \ +} + +#define iterate_kvec(i, n, __v, __p, skip, STEP) { \ + size_t wanted = n; \ + __p = i->kvec; \ + __v.iov_len = min(n, __p->iov_len - skip); \ + if (likely(__v.iov_len)) { \ + __v.iov_base = __p->iov_base + skip; \ + (void)(STEP); \ + skip += __v.iov_len; \ + n -= __v.iov_len; \ + } \ + while (unlikely(n)) { \ + __p++; \ + __v.iov_len = min(n, __p->iov_len); \ + if (unlikely(!__v.iov_len)) \ + continue; \ + __v.iov_base = __p->iov_base; \ + (void)(STEP); \ + skip = __v.iov_len; \ + n -= __v.iov_len; \ + } \ + n = wanted; \ +} + +#define iterate_bvec(i, n, __v, __p, skip, STEP) { \ + size_t wanted = n; \ + __p = i->bvec; \ + __v.bv_len = min_t(size_t, n, __p->bv_len - skip); \ + if (likely(__v.bv_len)) { \ + __v.bv_page = __p->bv_page; \ + __v.bv_offset = __p->bv_offset + skip; \ + (void)(STEP); \ + skip += __v.bv_len; \ + n -= __v.bv_len; \ + } \ + while (unlikely(n)) { \ + __p++; \ + __v.bv_len = min_t(size_t, n, __p->bv_len); \ + if (unlikely(!__v.bv_len)) \ + continue; \ + __v.bv_page = __p->bv_page; \ + __v.bv_offset = __p->bv_offset; \ + (void)(STEP); \ + skip = __v.bv_len; \ + n -= __v.bv_len; \ + } \ + n = wanted; \ +} + +#define iterate_all_kinds(i, n, v, I, B, K) { \ + size_t skip = i->iov_offset; \ + if (unlikely(i->type & ITER_BVEC)) { \ + const struct bio_vec *bvec; \ + struct bio_vec v; \ + iterate_bvec(i, n, v, bvec, skip, (B)) \ + } else if (unlikely(i->type & ITER_KVEC)) { \ + const struct kvec *kvec; \ + struct kvec v; \ + iterate_kvec(i, n, v, kvec, skip, (K)) \ + } else { \ + const struct iovec *iov; \ + struct iovec v; \ + iterate_iovec(i, n, v, iov, skip, (I)) \ + } \ +} + +#define iterate_and_advance(i, n, v, I, B, K) { \ + size_t skip = i->iov_offset; \ + if (unlikely(i->type & ITER_BVEC)) { \ + const struct bio_vec *bvec; \ + struct bio_vec v; \ + iterate_bvec(i, n, v, bvec, skip, (B)) \ + if (skip == bvec->bv_len) { \ + bvec++; \ + skip = 0; \ + } \ + i->nr_segs -= bvec - i->bvec; \ + i->bvec = bvec; \ + } else if (unlikely(i->type & ITER_KVEC)) { \ + const struct kvec *kvec; \ + struct kvec v; \ + iterate_kvec(i, n, v, kvec, skip, (K)) \ + if (skip == kvec->iov_len) { \ + kvec++; \ + skip = 0; \ + } \ + i->nr_segs -= kvec - i->kvec; \ + i->kvec = kvec; \ + } else { \ + const struct iovec *iov; \ + struct iovec v; \ + iterate_iovec(i, n, v, iov, skip, (I)) \ + if (skip == iov->iov_len) { \ + iov++; \ + skip = 0; \ + } \ + i->nr_segs -= iov - i->iov; \ + i->iov = iov; \ + } \ + i->count -= n; \ + i->iov_offset = skip; \ +} + +static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes, + struct iov_iter *i) +{ + size_t skip, copy, left, wanted; + const struct iovec *iov; + char __user *buf; + void *kaddr, *from; + + if (unlikely(bytes > i->count)) + bytes = i->count; + + if (unlikely(!bytes)) + return 0; + + wanted = bytes; + iov = i->iov; + skip = i->iov_offset; + buf = iov->iov_base + skip; + copy = min(bytes, iov->iov_len - skip); + + if (!fault_in_pages_writeable(buf, copy)) { + kaddr = kmap_atomic(page); + from = kaddr + offset; + + /* first chunk, usually the only one */ + left = __copy_to_user_inatomic(buf, from, copy); + copy -= left; + skip += copy; + from += copy; + bytes -= copy; + + while (unlikely(!left && bytes)) { + iov++; + buf = iov->iov_base; + copy = min(bytes, iov->iov_len); + left = __copy_to_user_inatomic(buf, from, copy); + copy -= left; + skip = copy; + from += copy; + bytes -= copy; + } + if (likely(!bytes)) { + kunmap_atomic(kaddr); + goto done; + } + offset = from - kaddr; + buf += copy; + kunmap_atomic(kaddr); + copy = min(bytes, iov->iov_len - skip); + } + /* Too bad - revert to non-atomic kmap */ + kaddr = kmap(page); + from = kaddr + offset; + left = __copy_to_user(buf, from, copy); + copy -= left; + skip += copy; + from += copy; + bytes -= copy; + while (unlikely(!left && bytes)) { + iov++; + buf = iov->iov_base; + copy = min(bytes, iov->iov_len); + left = __copy_to_user(buf, from, copy); + copy -= left; + skip = copy; + from += copy; + bytes -= copy; + } + kunmap(page); +done: + if (skip == iov->iov_len) { + iov++; + skip = 0; + } + i->count -= wanted - bytes; + i->nr_segs -= iov - i->iov; + i->iov = iov; + i->iov_offset = skip; + return wanted - bytes; +} + +static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes, + struct iov_iter *i) +{ + size_t skip, copy, left, wanted; + const struct iovec *iov; + char __user *buf; + void *kaddr, *to; + + if (unlikely(bytes > i->count)) + bytes = i->count; + + if (unlikely(!bytes)) + return 0; + + wanted = bytes; + iov = i->iov; + skip = i->iov_offset; + buf = iov->iov_base + skip; + copy = min(bytes, iov->iov_len - skip); + + if (!fault_in_pages_readable(buf, copy)) { + kaddr = kmap_atomic(page); + to = kaddr + offset; + + /* first chunk, usually the only one */ + left = __copy_from_user_inatomic(to, buf, copy); + copy -= left; + skip += copy; + to += copy; + bytes -= copy; + + while (unlikely(!left && bytes)) { + iov++; + buf = iov->iov_base; + copy = min(bytes, iov->iov_len); + left = __copy_from_user_inatomic(to, buf, copy); + copy -= left; + skip = copy; + to += copy; + bytes -= copy; + } + if (likely(!bytes)) { + kunmap_atomic(kaddr); + goto done; + } + offset = to - kaddr; + buf += copy; + kunmap_atomic(kaddr); + copy = min(bytes, iov->iov_len - skip); + } + /* Too bad - revert to non-atomic kmap */ + kaddr = kmap(page); + to = kaddr + offset; + left = __copy_from_user(to, buf, copy); + copy -= left; + skip += copy; + to += copy; + bytes -= copy; + while (unlikely(!left && bytes)) { + iov++; + buf = iov->iov_base; + copy = min(bytes, iov->iov_len); + left = __copy_from_user(to, buf, copy); + copy -= left; + skip = copy; + to += copy; + bytes -= copy; + } + kunmap(page); +done: + if (skip == iov->iov_len) { + iov++; + skip = 0; + } + i->count -= wanted - bytes; + i->nr_segs -= iov - i->iov; + i->iov = iov; + i->iov_offset = skip; + return wanted - bytes; +} + +/* + * Fault in the first iovec of the given iov_iter, to a maximum length + * of bytes. Returns 0 on success, or non-zero if the memory could not be + * accessed (ie. because it is an invalid address). + * + * writev-intensive code may want this to prefault several iovecs -- that + * would be possible (callers must not rely on the fact that _only_ the + * first iovec will be faulted with the current implementation). + */ +int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes) +{ + if (!(i->type & (ITER_BVEC|ITER_KVEC))) { + char __user *buf = i->iov->iov_base + i->iov_offset; + bytes = min(bytes, i->iov->iov_len - i->iov_offset); + return fault_in_pages_readable(buf, bytes); + } + return 0; +} +EXPORT_SYMBOL(iov_iter_fault_in_readable); + +void iov_iter_init(struct iov_iter *i, int direction, + const struct iovec *iov, unsigned long nr_segs, + size_t count) +{ + /* It will get better. Eventually... */ + if (segment_eq(get_fs(), KERNEL_DS)) { + direction |= ITER_KVEC; + i->type = direction; + i->kvec = (struct kvec *)iov; + } else { + i->type = direction; + i->iov = iov; + } + i->nr_segs = nr_segs; + i->iov_offset = 0; + i->count = count; +} +EXPORT_SYMBOL(iov_iter_init); + +static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len) +{ + char *from = kmap_atomic(page); + memcpy(to, from + offset, len); + kunmap_atomic(from); +} + +static void memcpy_to_page(struct page *page, size_t offset, char *from, size_t len) +{ + char *to = kmap_atomic(page); + memcpy(to + offset, from, len); + kunmap_atomic(to); +} + +static void memzero_page(struct page *page, size_t offset, size_t len) +{ + char *addr = kmap_atomic(page); + memset(addr + offset, 0, len); + kunmap_atomic(addr); +} + +size_t copy_to_iter(void *addr, size_t bytes, struct iov_iter *i) +{ + char *from = addr; + if (unlikely(bytes > i->count)) + bytes = i->count; + + if (unlikely(!bytes)) + return 0; + + iterate_and_advance(i, bytes, v, + __copy_to_user(v.iov_base, (from += v.iov_len) - v.iov_len, + v.iov_len), + memcpy_to_page(v.bv_page, v.bv_offset, + (from += v.bv_len) - v.bv_len, v.bv_len), + memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len) + ) + + return bytes; +} +EXPORT_SYMBOL(copy_to_iter); + +size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) +{ + char *to = addr; + if (unlikely(bytes > i->count)) + bytes = i->count; + + if (unlikely(!bytes)) + return 0; + + iterate_and_advance(i, bytes, v, + __copy_from_user((to += v.iov_len) - v.iov_len, v.iov_base, + v.iov_len), + memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, + v.bv_offset, v.bv_len), + memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) + ) + + return bytes; +} +EXPORT_SYMBOL(copy_from_iter); + +size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) +{ + char *to = addr; + if (unlikely(bytes > i->count)) + bytes = i->count; + + if (unlikely(!bytes)) + return 0; + + iterate_and_advance(i, bytes, v, + __copy_from_user_nocache((to += v.iov_len) - v.iov_len, + v.iov_base, v.iov_len), + memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, + v.bv_offset, v.bv_len), + memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) + ) + + return bytes; +} +EXPORT_SYMBOL(copy_from_iter_nocache); + +size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, + struct iov_iter *i) +{ + if (i->type & (ITER_BVEC|ITER_KVEC)) { + void *kaddr = kmap_atomic(page); + size_t wanted = copy_to_iter(kaddr + offset, bytes, i); + kunmap_atomic(kaddr); + return wanted; + } else + return copy_page_to_iter_iovec(page, offset, bytes, i); +} +EXPORT_SYMBOL(copy_page_to_iter); + +size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, + struct iov_iter *i) +{ + if (i->type & (ITER_BVEC|ITER_KVEC)) { + void *kaddr = kmap_atomic(page); + size_t wanted = copy_from_iter(kaddr + offset, bytes, i); + kunmap_atomic(kaddr); + return wanted; + } else + return copy_page_from_iter_iovec(page, offset, bytes, i); +} +EXPORT_SYMBOL(copy_page_from_iter); + +size_t iov_iter_zero(size_t bytes, struct iov_iter *i) +{ + if (unlikely(bytes > i->count)) + bytes = i->count; + + if (unlikely(!bytes)) + return 0; + + iterate_and_advance(i, bytes, v, + __clear_user(v.iov_base, v.iov_len), + memzero_page(v.bv_page, v.bv_offset, v.bv_len), + memset(v.iov_base, 0, v.iov_len) + ) + + return bytes; +} +EXPORT_SYMBOL(iov_iter_zero); + +size_t iov_iter_copy_from_user_atomic(struct page *page, + struct iov_iter *i, unsigned long offset, size_t bytes) +{ + char *kaddr = kmap_atomic(page), *p = kaddr + offset; + iterate_all_kinds(i, bytes, v, + __copy_from_user_inatomic((p += v.iov_len) - v.iov_len, + v.iov_base, v.iov_len), + memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page, + v.bv_offset, v.bv_len), + memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) + ) + kunmap_atomic(kaddr); + return bytes; +} +EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); + +void iov_iter_advance(struct iov_iter *i, size_t size) +{ + iterate_and_advance(i, size, v, 0, 0, 0) +} +EXPORT_SYMBOL(iov_iter_advance); + +/* + * Return the count of just the current iov_iter segment. + */ +size_t iov_iter_single_seg_count(const struct iov_iter *i) +{ + if (i->nr_segs == 1) + return i->count; + else if (i->type & ITER_BVEC) + return min(i->count, i->bvec->bv_len - i->iov_offset); + else + return min(i->count, i->iov->iov_len - i->iov_offset); +} +EXPORT_SYMBOL(iov_iter_single_seg_count); + +void iov_iter_kvec(struct iov_iter *i, int direction, + const struct kvec *kvec, unsigned long nr_segs, + size_t count) +{ + BUG_ON(!(direction & ITER_KVEC)); + i->type = direction; + i->kvec = kvec; + i->nr_segs = nr_segs; + i->iov_offset = 0; + i->count = count; +} +EXPORT_SYMBOL(iov_iter_kvec); + +void iov_iter_bvec(struct iov_iter *i, int direction, + const struct bio_vec *bvec, unsigned long nr_segs, + size_t count) +{ + BUG_ON(!(direction & ITER_BVEC)); + i->type = direction; + i->bvec = bvec; + i->nr_segs = nr_segs; + i->iov_offset = 0; + i->count = count; +} +EXPORT_SYMBOL(iov_iter_bvec); + +unsigned long iov_iter_alignment(const struct iov_iter *i) +{ + unsigned long res = 0; + size_t size = i->count; + + if (!size) + return 0; + + iterate_all_kinds(i, size, v, + (res |= (unsigned long)v.iov_base | v.iov_len, 0), + res |= v.bv_offset | v.bv_len, + res |= (unsigned long)v.iov_base | v.iov_len + ) + return res; +} +EXPORT_SYMBOL(iov_iter_alignment); + +ssize_t iov_iter_get_pages(struct iov_iter *i, + struct page **pages, size_t maxsize, unsigned maxpages, + size_t *start) +{ + if (maxsize > i->count) + maxsize = i->count; + + if (!maxsize) + return 0; + + iterate_all_kinds(i, maxsize, v, ({ + unsigned long addr = (unsigned long)v.iov_base; + size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); + int n; + int res; + + if (len > maxpages * PAGE_SIZE) + len = maxpages * PAGE_SIZE; + addr &= ~(PAGE_SIZE - 1); + n = DIV_ROUND_UP(len, PAGE_SIZE); + res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, pages); + if (unlikely(res < 0)) + return res; + return (res == n ? len : res * PAGE_SIZE) - *start; + 0;}),({ + /* can't be more than PAGE_SIZE */ + *start = v.bv_offset; + get_page(*pages = v.bv_page); + return v.bv_len; + }),({ + return -EFAULT; + }) + ) + return 0; +} +EXPORT_SYMBOL(iov_iter_get_pages); + +static struct page **get_pages_array(size_t n) +{ + struct page **p = kmalloc(n * sizeof(struct page *), GFP_KERNEL); + if (!p) + p = vmalloc(n * sizeof(struct page *)); + return p; +} + +ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, + struct page ***pages, size_t maxsize, + size_t *start) +{ + struct page **p; + + if (maxsize > i->count) + maxsize = i->count; + + if (!maxsize) + return 0; + + iterate_all_kinds(i, maxsize, v, ({ + unsigned long addr = (unsigned long)v.iov_base; + size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); + int n; + int res; + + addr &= ~(PAGE_SIZE - 1); + n = DIV_ROUND_UP(len, PAGE_SIZE); + p = get_pages_array(n); + if (!p) + return -ENOMEM; + res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, p); + if (unlikely(res < 0)) { + kvfree(p); + return res; + } + *pages = p; + return (res == n ? len : res * PAGE_SIZE) - *start; + 0;}),({ + /* can't be more than PAGE_SIZE */ + *start = v.bv_offset; + *pages = p = get_pages_array(1); + if (!p) + return -ENOMEM; + get_page(*p = v.bv_page); + return v.bv_len; + }),({ + return -EFAULT; + }) + ) + return 0; +} +EXPORT_SYMBOL(iov_iter_get_pages_alloc); + +size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, + struct iov_iter *i) +{ + char *to = addr; + __wsum sum, next; + size_t off = 0; + if (unlikely(bytes > i->count)) + bytes = i->count; + + if (unlikely(!bytes)) + return 0; + + sum = *csum; + iterate_and_advance(i, bytes, v, ({ + int err = 0; + next = csum_and_copy_from_user(v.iov_base, + (to += v.iov_len) - v.iov_len, + v.iov_len, 0, &err); + if (!err) { + sum = csum_block_add(sum, next, off); + off += v.iov_len; + } + err ? v.iov_len : 0; + }), ({ + char *p = kmap_atomic(v.bv_page); + next = csum_partial_copy_nocheck(p + v.bv_offset, + (to += v.bv_len) - v.bv_len, + v.bv_len, 0); + kunmap_atomic(p); + sum = csum_block_add(sum, next, off); + off += v.bv_len; + }),({ + next = csum_partial_copy_nocheck(v.iov_base, + (to += v.iov_len) - v.iov_len, + v.iov_len, 0); + sum = csum_block_add(sum, next, off); + off += v.iov_len; + }) + ) + *csum = sum; + return bytes; +} +EXPORT_SYMBOL(csum_and_copy_from_iter); + +size_t csum_and_copy_to_iter(void *addr, size_t bytes, __wsum *csum, + struct iov_iter *i) +{ + char *from = addr; + __wsum sum, next; + size_t off = 0; + if (unlikely(bytes > i->count)) + bytes = i->count; + + if (unlikely(!bytes)) + return 0; + + sum = *csum; + iterate_and_advance(i, bytes, v, ({ + int err = 0; + next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len, + v.iov_base, + v.iov_len, 0, &err); + if (!err) { + sum = csum_block_add(sum, next, off); + off += v.iov_len; + } + err ? v.iov_len : 0; + }), ({ + char *p = kmap_atomic(v.bv_page); + next = csum_partial_copy_nocheck((from += v.bv_len) - v.bv_len, + p + v.bv_offset, + v.bv_len, 0); + kunmap_atomic(p); + sum = csum_block_add(sum, next, off); + off += v.bv_len; + }),({ + next = csum_partial_copy_nocheck((from += v.iov_len) - v.iov_len, + v.iov_base, + v.iov_len, 0); + sum = csum_block_add(sum, next, off); + off += v.iov_len; + }) + ) + *csum = sum; + return bytes; +} +EXPORT_SYMBOL(csum_and_copy_to_iter); + +int iov_iter_npages(const struct iov_iter *i, int maxpages) +{ + size_t size = i->count; + int npages = 0; + + if (!size) + return 0; + + iterate_all_kinds(i, size, v, ({ + unsigned long p = (unsigned long)v.iov_base; + npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) + - p / PAGE_SIZE; + if (npages >= maxpages) + return maxpages; + 0;}),({ + npages++; + if (npages >= maxpages) + return maxpages; + }),({ + unsigned long p = (unsigned long)v.iov_base; + npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) + - p / PAGE_SIZE; + if (npages >= maxpages) + return maxpages; + }) + ) + return npages; +} +EXPORT_SYMBOL(iov_iter_npages); + +const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags) +{ + *new = *old; + if (new->type & ITER_BVEC) + return new->bvec = kmemdup(new->bvec, + new->nr_segs * sizeof(struct bio_vec), + flags); + else + /* iovec and kvec have identical layout */ + return new->iov = kmemdup(new->iov, + new->nr_segs * sizeof(struct iovec), + flags); +} +EXPORT_SYMBOL(dup_iter); diff --git a/lib/iovec.c b/lib/iovec.c deleted file mode 100644 index 2d99cb4a5006..000000000000 --- a/lib/iovec.c +++ /dev/null @@ -1,87 +0,0 @@ -#include <linux/uaccess.h> -#include <linux/export.h> -#include <linux/uio.h> - -/* - * Copy iovec to kernel. Returns -EFAULT on error. - * - * Note: this modifies the original iovec. - */ - -int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len) -{ - while (len > 0) { - if (iov->iov_len) { - int copy = min_t(unsigned int, len, iov->iov_len); - if (copy_from_user(kdata, iov->iov_base, copy)) - return -EFAULT; - len -= copy; - kdata += copy; - iov->iov_base += copy; - iov->iov_len -= copy; - } - iov++; - } - - return 0; -} -EXPORT_SYMBOL(memcpy_fromiovec); - -/* - * Copy kernel to iovec. Returns -EFAULT on error. - */ - -int memcpy_toiovecend(const struct iovec *iov, unsigned char *kdata, - int offset, int len) -{ - int copy; - for (; len > 0; ++iov) { - /* Skip over the finished iovecs */ - if (unlikely(offset >= iov->iov_len)) { - offset -= iov->iov_len; - continue; - } - copy = min_t(unsigned int, iov->iov_len - offset, len); - if (copy_to_user(iov->iov_base + offset, kdata, copy)) - return -EFAULT; - offset = 0; - kdata += copy; - len -= copy; - } - - return 0; -} -EXPORT_SYMBOL(memcpy_toiovecend); - -/* - * Copy iovec to kernel. Returns -EFAULT on error. - */ - -int memcpy_fromiovecend(unsigned char *kdata, const struct iovec *iov, - int offset, int len) -{ - /* No data? Done! */ - if (len == 0) - return 0; - - /* Skip over the finished iovecs */ - while (offset >= iov->iov_len) { - offset -= iov->iov_len; - iov++; - } - - while (len > 0) { - u8 __user *base = iov->iov_base + offset; - int copy = min_t(unsigned int, len, iov->iov_len - offset); - - offset = 0; - if (copy_from_user(kdata, base, copy)) - return -EFAULT; - len -= copy; - kdata += copy; - iov++; - } - - return 0; -} -EXPORT_SYMBOL(memcpy_fromiovecend); diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 9ebf9e20de53..f6c2c1e7779c 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -20,7 +20,6 @@ #include <linux/export.h> #include <linux/kmod.h> #include <linux/slab.h> -#include <linux/user_namespace.h> #include <linux/socket.h> #include <linux/skbuff.h> #include <linux/netlink.h> diff --git a/lib/lcm.c b/lib/lcm.c index 51cc6b13cd52..03d7fcb420b5 100644 --- a/lib/lcm.c +++ b/lib/lcm.c @@ -1,4 +1,4 @@ -#include <linux/kernel.h> +#include <linux/compiler.h> #include <linux/gcd.h> #include <linux/export.h> #include <linux/lcm.h> @@ -12,3 +12,14 @@ unsigned long lcm(unsigned long a, unsigned long b) return 0; } EXPORT_SYMBOL_GPL(lcm); + +unsigned long lcm_not_zero(unsigned long a, unsigned long b) +{ + unsigned long l = lcm(a, b); + + if (l) + return l; + + return (b ? : a); +} +EXPORT_SYMBOL_GPL(lcm_not_zero); diff --git a/lib/list_sort.c b/lib/list_sort.c index 12bcba1c8612..b29015102698 100644 --- a/lib/list_sort.c +++ b/lib/list_sort.c @@ -2,9 +2,11 @@ #define pr_fmt(fmt) "list_sort_test: " fmt #include <linux/kernel.h> -#include <linux/module.h> +#include <linux/bug.h> +#include <linux/compiler.h> +#include <linux/export.h> +#include <linux/string.h> #include <linux/list_sort.h> -#include <linux/slab.h> #include <linux/list.h> #define MAX_LIST_LENGTH_BITS 20 @@ -146,6 +148,7 @@ EXPORT_SYMBOL(list_sort); #ifdef CONFIG_TEST_LIST_SORT +#include <linux/slab.h> #include <linux/random.h> /* diff --git a/lib/llist.c b/lib/llist.c index f76196d07409..0b0e9779d675 100644 --- a/lib/llist.c +++ b/lib/llist.c @@ -24,7 +24,6 @@ */ #include <linux/kernel.h> #include <linux/export.h> -#include <linux/interrupt.h> #include <linux/llist.h> diff --git a/lib/lockref.c b/lib/lockref.c index d2233de9a86e..ecb9a665ec19 100644 --- a/lib/lockref.c +++ b/lib/lockref.c @@ -60,7 +60,7 @@ void lockref_get(struct lockref *lockref) EXPORT_SYMBOL(lockref_get); /** - * lockref_get_not_zero - Increments count unless the count is 0 + * lockref_get_not_zero - Increments count unless the count is 0 or dead * @lockref: pointer to lockref structure * Return: 1 if count updated successfully or 0 if count was zero */ @@ -70,7 +70,7 @@ int lockref_get_not_zero(struct lockref *lockref) CMPXCHG_LOOP( new.count++; - if (!old.count) + if (old.count <= 0) return 0; , return 1; @@ -78,7 +78,7 @@ int lockref_get_not_zero(struct lockref *lockref) spin_lock(&lockref->lock); retval = 0; - if (lockref->count) { + if (lockref->count > 0) { lockref->count++; retval = 1; } @@ -88,7 +88,7 @@ int lockref_get_not_zero(struct lockref *lockref) EXPORT_SYMBOL(lockref_get_not_zero); /** - * lockref_get_or_lock - Increments count unless the count is 0 + * lockref_get_or_lock - Increments count unless the count is 0 or dead * @lockref: pointer to lockref structure * Return: 1 if count updated successfully or 0 if count was zero * and we got the lock instead. @@ -97,14 +97,14 @@ int lockref_get_or_lock(struct lockref *lockref) { CMPXCHG_LOOP( new.count++; - if (!old.count) + if (old.count <= 0) break; , return 1; ); spin_lock(&lockref->lock); - if (!lockref->count) + if (lockref->count <= 0) return 0; lockref->count++; spin_unlock(&lockref->lock); @@ -113,6 +113,26 @@ int lockref_get_or_lock(struct lockref *lockref) EXPORT_SYMBOL(lockref_get_or_lock); /** + * lockref_put_return - Decrement reference count if possible + * @lockref: pointer to lockref structure + * + * Decrement the reference count and return the new value. + * If the lockref was dead or locked, return an error. + */ +int lockref_put_return(struct lockref *lockref) +{ + CMPXCHG_LOOP( + new.count--; + if (old.count <= 0) + return -1; + , + return new.count; + ); + return -1; +} +EXPORT_SYMBOL(lockref_put_return); + +/** * lockref_put_or_lock - decrements count unless count <= 1 before decrement * @lockref: pointer to lockref structure * Return: 1 if count updated successfully or 0 if count <= 1 and lock taken @@ -158,7 +178,7 @@ int lockref_get_not_dead(struct lockref *lockref) CMPXCHG_LOOP( new.count++; - if ((int)old.count < 0) + if (old.count < 0) return 0; , return 1; @@ -166,7 +186,7 @@ int lockref_get_not_dead(struct lockref *lockref) spin_lock(&lockref->lock); retval = 0; - if ((int) lockref->count >= 0) { + if (lockref->count >= 0) { lockref->count++; retval = 1; } diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c index 7a85967060a5..f0f5c5c3de12 100644 --- a/lib/lz4/lz4_decompress.c +++ b/lib/lz4/lz4_decompress.c @@ -139,6 +139,9 @@ static int lz4_uncompress(const char *source, char *dest, int osize) /* Error: request to write beyond destination buffer */ if (cpy > oend) goto _output_error; + if ((ref + COPYLENGTH) > oend || + (op + COPYLENGTH) > oend) + goto _output_error; LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH)); while (op < cpy) *op++ = *ref++; diff --git a/lib/md5.c b/lib/md5.c index 958a3c15923c..bb0cd01d356d 100644 --- a/lib/md5.c +++ b/lib/md5.c @@ -1,4 +1,4 @@ -#include <linux/kernel.h> +#include <linux/compiler.h> #include <linux/export.h> #include <linux/cryptohash.h> diff --git a/lib/mpi/mpi-cmp.c b/lib/mpi/mpi-cmp.c index 1871e7b61ca0..d25e9e96c310 100644 --- a/lib/mpi/mpi-cmp.c +++ b/lib/mpi/mpi-cmp.c @@ -57,14 +57,12 @@ int mpi_cmp(MPI u, MPI v) if (usize != vsize && !u->sign && !v->sign) return usize - vsize; if (usize != vsize && u->sign && v->sign) - return vsize + usize; + return vsize - usize; if (!usize) return 0; cmp = mpihelp_cmp(u->d, v->d, usize); - if (!cmp) - return 0; - if ((cmp < 0 ? 1 : 0) == (u->sign ? 1 : 0)) - return 1; - return -1; + if (u->sign) + return -cmp; + return cmp; } EXPORT_SYMBOL_GPL(mpi_cmp); diff --git a/lib/mpi/mpi-internal.h b/lib/mpi/mpi-internal.h index 60cf765628e9..c65dd1bff45a 100644 --- a/lib/mpi/mpi-internal.h +++ b/lib/mpi/mpi-internal.h @@ -84,7 +84,7 @@ static inline int RESIZE_IF_NEEDED(MPI a, unsigned b) do { \ mpi_size_t _i; \ for (_i = 0; _i < (n); _i++) \ - (d)[_i] = (d)[_i]; \ + (d)[_i] = (s)[_i]; \ } while (0) #define MPN_COPY_DECR(d, s, n) \ diff --git a/lib/nlattr.c b/lib/nlattr.c index 9c3e85ff0a6c..f5907d23272d 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -9,7 +9,6 @@ #include <linux/kernel.h> #include <linux/errno.h> #include <linux/jiffies.h> -#include <linux/netdevice.h> #include <linux/skbuff.h> #include <linux/string.h> #include <linux/types.h> @@ -280,6 +279,8 @@ int nla_memcpy(void *dest, const struct nlattr *src, int count) int minlen = min_t(int, count, nla_len(src)); memcpy(dest, nla_data(src), minlen); + if (count > minlen) + memset(dest + minlen, 0, count - minlen); return minlen; } diff --git a/lib/pci_iomap.c b/lib/pci_iomap.c index 0d83ea8a9605..bcce5f149310 100644 --- a/lib/pci_iomap.c +++ b/lib/pci_iomap.c @@ -10,10 +10,11 @@ #ifdef CONFIG_PCI /** - * pci_iomap - create a virtual mapping cookie for a PCI BAR + * pci_iomap_range - create a virtual mapping cookie for a PCI BAR * @dev: PCI device that owns the BAR * @bar: BAR number - * @maxlen: length of the memory to map + * @offset: map memory at the given offset in BAR + * @maxlen: max length of the memory to map * * Using this function you will get a __iomem address to your device BAR. * You can access it using ioread*() and iowrite*(). These functions hide @@ -21,16 +22,21 @@ * you expect from them in the correct way. * * @maxlen specifies the maximum length to map. If you want to get access to - * the complete BAR without checking for its length first, pass %0 here. + * the complete BAR from offset to the end, pass %0 here. * */ -void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) +void __iomem *pci_iomap_range(struct pci_dev *dev, + int bar, + unsigned long offset, + unsigned long maxlen) { resource_size_t start = pci_resource_start(dev, bar); resource_size_t len = pci_resource_len(dev, bar); unsigned long flags = pci_resource_flags(dev, bar); - if (!len || !start) + if (len <= offset || !start) return NULL; + len -= offset; + start += offset; if (maxlen && len > maxlen) len = maxlen; if (flags & IORESOURCE_IO) @@ -43,6 +49,25 @@ void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) /* What? */ return NULL; } +EXPORT_SYMBOL(pci_iomap_range); +/** + * pci_iomap - create a virtual mapping cookie for a PCI BAR + * @dev: PCI device that owns the BAR + * @bar: BAR number + * @maxlen: length of the memory to map + * + * Using this function you will get a __iomem address to your device BAR. + * You can access it using ioread*() and iowrite*(). These functions hide + * the details if this is a MMIO or PIO address space and will just do what + * you expect from them in the correct way. + * + * @maxlen specifies the maximum length to map. If you want to get access to + * the complete BAR without checking for its length first, pass %0 here. + * */ +void __iomem *pci_iomap(struct pci_dev *dev, int bar, unsigned long maxlen) +{ + return pci_iomap_range(dev, bar, 0, maxlen); +} EXPORT_SYMBOL(pci_iomap); #endif /* CONFIG_PCI */ diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c index 93d145e5539c..f75715131f20 100644 --- a/lib/percpu_ida.c +++ b/lib/percpu_ida.c @@ -19,13 +19,10 @@ #include <linux/bug.h> #include <linux/err.h> #include <linux/export.h> -#include <linux/hardirq.h> -#include <linux/idr.h> #include <linux/init.h> #include <linux/kernel.h> #include <linux/percpu.h> #include <linux/sched.h> -#include <linux/slab.h> #include <linux/string.h> #include <linux/spinlock.h> #include <linux/percpu_ida.h> diff --git a/lib/plist.c b/lib/plist.c index d408e774b746..3a30c53db061 100644 --- a/lib/plist.c +++ b/lib/plist.c @@ -25,7 +25,6 @@ #include <linux/bug.h> #include <linux/plist.h> -#include <linux/spinlock.h> #ifdef CONFIG_DEBUG_PI_LIST diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 3291a8e37490..3d2aa27b845b 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -33,7 +33,7 @@ #include <linux/string.h> #include <linux/bitops.h> #include <linux/rcupdate.h> -#include <linux/hardirq.h> /* in_interrupt() */ +#include <linux/preempt_mask.h> /* in_interrupt() */ /* diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index 7d0e5cd7b570..dbef2314901e 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -89,10 +89,10 @@ void (*raid6_datap_recov)(int, size_t, int, void **); EXPORT_SYMBOL_GPL(raid6_datap_recov); const struct raid6_recov_calls *const raid6_recov_algos[] = { -#if (defined(__i386__) || defined(__x86_64__)) && !defined(__arch_um__) #ifdef CONFIG_AS_AVX2 &raid6_recov_avx2, #endif +#ifdef CONFIG_AS_SSSE3 &raid6_recov_ssse3, #endif &raid6_recov_intx1, diff --git a/lib/raid6/recov_avx2.c b/lib/raid6/recov_avx2.c index e1eea433a493..53fe3d7bdfb3 100644 --- a/lib/raid6/recov_avx2.c +++ b/lib/raid6/recov_avx2.c @@ -8,7 +8,7 @@ * of the License. */ -#if CONFIG_AS_AVX2 +#ifdef CONFIG_AS_AVX2 #include <linux/raid/pq.h> #include "x86.h" diff --git a/lib/raid6/recov_ssse3.c b/lib/raid6/recov_ssse3.c index a9168328f03b..cda33e56a5e3 100644 --- a/lib/raid6/recov_ssse3.c +++ b/lib/raid6/recov_ssse3.c @@ -7,6 +7,8 @@ * of the License. */ +#ifdef CONFIG_AS_SSSE3 + #include <linux/raid/pq.h> #include "x86.h" @@ -330,3 +332,7 @@ const struct raid6_recov_calls raid6_recov_ssse3 = { #endif .priority = 1, }; + +#else +#warning "your version of binutils lacks SSSE3 support" +#endif diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 6c3c723e902b..b5344ef4c684 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -1,7 +1,7 @@ /* * Resizable, Scalable, Concurrent Hash Table * - * Copyright (c) 2014 Thomas Graf <tgraf@suug.ch> + * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch> * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net> * * Based on the following paper: @@ -17,117 +17,230 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/log2.h> +#include <linux/sched.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/mm.h> #include <linux/jhash.h> #include <linux/random.h> #include <linux/rhashtable.h> +#include <linux/err.h> #define HASH_DEFAULT_SIZE 64UL #define HASH_MIN_SIZE 4UL +#define BUCKET_LOCKS_PER_CPU 128UL -#define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT)) +/* Base bits plus 1 bit for nulls marker */ +#define HASH_RESERVED_SPACE (RHT_BASE_BITS + 1) -#ifdef CONFIG_PROVE_LOCKING -int lockdep_rht_mutex_is_held(const struct rhashtable *ht) +enum { + RHT_LOCK_NORMAL, + RHT_LOCK_NESTED, +}; + +/* The bucket lock is selected based on the hash and protects mutations + * on a group of hash buckets. + * + * A maximum of tbl->size/2 bucket locks is allocated. This ensures that + * a single lock always covers both buckets which may both contains + * entries which link to the same bucket of the old table during resizing. + * This allows to simplify the locking as locking the bucket in both + * tables during resize always guarantee protection. + * + * IMPORTANT: When holding the bucket lock of both the old and new table + * during expansions and shrinking, the old bucket lock must always be + * acquired first. + */ +static spinlock_t *bucket_lock(const struct bucket_table *tbl, u32 hash) { - return ht->p.mutex_is_held(ht->p.parent); + return &tbl->locks[hash & tbl->locks_mask]; } -EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held); -#endif static void *rht_obj(const struct rhashtable *ht, const struct rhash_head *he) { return (void *) he - ht->p.head_offset; } -static u32 __hashfn(const struct rhashtable *ht, const void *key, - u32 len, u32 hsize) +static u32 rht_bucket_index(const struct bucket_table *tbl, u32 hash) +{ + return hash & (tbl->size - 1); +} + +static u32 obj_raw_hashfn(const struct rhashtable *ht, const void *ptr) { - u32 h; + u32 hash; - h = ht->p.hashfn(key, len, ht->p.hash_rnd); + if (unlikely(!ht->p.key_len)) + hash = ht->p.obj_hashfn(ptr, ht->p.hash_rnd); + else + hash = ht->p.hashfn(ptr + ht->p.key_offset, ht->p.key_len, + ht->p.hash_rnd); - return h & (hsize - 1); + return hash >> HASH_RESERVED_SPACE; } -/** - * rhashtable_hashfn - compute hash for key of given length - * @ht: hash table to compute for - * @key: pointer to key - * @len: length of key - * - * Computes the hash value using the hash function provided in the 'hashfn' - * of struct rhashtable_params. The returned value is guaranteed to be - * smaller than the number of buckets in the hash table. - */ -u32 rhashtable_hashfn(const struct rhashtable *ht, const void *key, u32 len) +static u32 key_hashfn(struct rhashtable *ht, const void *key, u32 len) +{ + return ht->p.hashfn(key, len, ht->p.hash_rnd) >> HASH_RESERVED_SPACE; +} + +static u32 head_hashfn(const struct rhashtable *ht, + const struct bucket_table *tbl, + const struct rhash_head *he) +{ + return rht_bucket_index(tbl, obj_raw_hashfn(ht, rht_obj(ht, he))); +} + +#ifdef CONFIG_PROVE_LOCKING +static void debug_dump_buckets(const struct rhashtable *ht, + const struct bucket_table *tbl) { - struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); + struct rhash_head *he; + unsigned int i, hash; + + for (i = 0; i < tbl->size; i++) { + pr_warn(" [Bucket %d] ", i); + rht_for_each_rcu(he, tbl, i) { + hash = head_hashfn(ht, tbl, he); + pr_cont("[hash = %#x, lock = %p] ", + hash, bucket_lock(tbl, hash)); + } + pr_cont("\n"); + } - return __hashfn(ht, key, len, tbl->size); } -EXPORT_SYMBOL_GPL(rhashtable_hashfn); -static u32 obj_hashfn(const struct rhashtable *ht, const void *ptr, u32 hsize) +static void debug_dump_table(struct rhashtable *ht, + const struct bucket_table *tbl, + unsigned int hash) { - if (unlikely(!ht->p.key_len)) { - u32 h; + struct bucket_table *old_tbl, *future_tbl; - h = ht->p.obj_hashfn(ptr, ht->p.hash_rnd); + pr_emerg("BUG: lock for hash %#x in table %p not held\n", + hash, tbl); - return h & (hsize - 1); + rcu_read_lock(); + future_tbl = rht_dereference_rcu(ht->future_tbl, ht); + old_tbl = rht_dereference_rcu(ht->tbl, ht); + if (future_tbl != old_tbl) { + pr_warn("Future table %p (size: %zd)\n", + future_tbl, future_tbl->size); + debug_dump_buckets(ht, future_tbl); } - return __hashfn(ht, ptr + ht->p.key_offset, ht->p.key_len, hsize); + pr_warn("Table %p (size: %zd)\n", old_tbl, old_tbl->size); + debug_dump_buckets(ht, old_tbl); + + rcu_read_unlock(); } -/** - * rhashtable_obj_hashfn - compute hash for hashed object - * @ht: hash table to compute for - * @ptr: pointer to hashed object - * - * Computes the hash value using the hash function `hashfn` respectively - * 'obj_hashfn' depending on whether the hash table is set up to work with - * a fixed length key. The returned value is guaranteed to be smaller than - * the number of buckets in the hash table. - */ -u32 rhashtable_obj_hashfn(const struct rhashtable *ht, void *ptr) +#define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT)) +#define ASSERT_BUCKET_LOCK(HT, TBL, HASH) \ + do { \ + if (unlikely(!lockdep_rht_bucket_is_held(TBL, HASH))) { \ + debug_dump_table(HT, TBL, HASH); \ + BUG(); \ + } \ + } while (0) + +int lockdep_rht_mutex_is_held(struct rhashtable *ht) { - struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); + return (debug_locks) ? lockdep_is_held(&ht->mutex) : 1; +} +EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held); + +int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash) +{ + spinlock_t *lock = bucket_lock(tbl, hash); - return obj_hashfn(ht, ptr, tbl->size); + return (debug_locks) ? lockdep_is_held(lock) : 1; } -EXPORT_SYMBOL_GPL(rhashtable_obj_hashfn); +EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held); +#else +#define ASSERT_RHT_MUTEX(HT) +#define ASSERT_BUCKET_LOCK(HT, TBL, HASH) +#endif -static u32 head_hashfn(const struct rhashtable *ht, - const struct rhash_head *he, u32 hsize) + +static struct rhash_head __rcu **bucket_tail(struct bucket_table *tbl, u32 n) { - return obj_hashfn(ht, rht_obj(ht, he), hsize); + struct rhash_head __rcu **pprev; + + for (pprev = &tbl->buckets[n]; + !rht_is_a_nulls(rht_dereference_bucket(*pprev, tbl, n)); + pprev = &rht_dereference_bucket(*pprev, tbl, n)->next) + ; + + return pprev; } -static struct bucket_table *bucket_table_alloc(size_t nbuckets) +static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl) { - struct bucket_table *tbl; + unsigned int i, size; +#if defined(CONFIG_PROVE_LOCKING) + unsigned int nr_pcpus = 2; +#else + unsigned int nr_pcpus = num_possible_cpus(); +#endif + + nr_pcpus = min_t(unsigned int, nr_pcpus, 32UL); + size = roundup_pow_of_two(nr_pcpus * ht->p.locks_mul); + + /* Never allocate more than 0.5 locks per bucket */ + size = min_t(unsigned int, size, tbl->size >> 1); + + if (sizeof(spinlock_t) != 0) { +#ifdef CONFIG_NUMA + if (size * sizeof(spinlock_t) > PAGE_SIZE) + tbl->locks = vmalloc(size * sizeof(spinlock_t)); + else +#endif + tbl->locks = kmalloc_array(size, sizeof(spinlock_t), + GFP_KERNEL); + if (!tbl->locks) + return -ENOMEM; + for (i = 0; i < size; i++) + spin_lock_init(&tbl->locks[i]); + } + tbl->locks_mask = size - 1; + + return 0; +} + +static void bucket_table_free(const struct bucket_table *tbl) +{ + if (tbl) + kvfree(tbl->locks); + + kvfree(tbl); +} + +static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, + size_t nbuckets) +{ + struct bucket_table *tbl = NULL; size_t size; + int i; size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]); - tbl = kzalloc(size, GFP_KERNEL | __GFP_NOWARN); + if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER)) + tbl = kzalloc(size, GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY); if (tbl == NULL) tbl = vzalloc(size); - if (tbl == NULL) return NULL; tbl->size = nbuckets; - return tbl; -} + if (alloc_bucket_locks(ht, tbl) < 0) { + bucket_table_free(tbl); + return NULL; + } -static void bucket_table_free(const struct bucket_table *tbl) -{ - kvfree(tbl); + for (i = 0; i < nbuckets; i++) + INIT_RHT_NULLS_HEAD(tbl->buckets[i], ht, i); + + return tbl; } /** @@ -135,56 +248,89 @@ static void bucket_table_free(const struct bucket_table *tbl) * @ht: hash table * @new_size: new table size */ -bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size) +static bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size) { /* Expand table when exceeding 75% load */ - return ht->nelems > (new_size / 4 * 3); + return atomic_read(&ht->nelems) > (new_size / 4 * 3) && + (!ht->p.max_shift || atomic_read(&ht->shift) < ht->p.max_shift); } -EXPORT_SYMBOL_GPL(rht_grow_above_75); /** * rht_shrink_below_30 - returns true if nelems < 0.3 * table-size * @ht: hash table * @new_size: new table size */ -bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size) +static bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size) { /* Shrink table beneath 30% load */ - return ht->nelems < (new_size * 3 / 10); + return atomic_read(&ht->nelems) < (new_size * 3 / 10) && + (atomic_read(&ht->shift) > ht->p.min_shift); +} + +static void lock_buckets(struct bucket_table *new_tbl, + struct bucket_table *old_tbl, unsigned int hash) + __acquires(old_bucket_lock) +{ + spin_lock_bh(bucket_lock(old_tbl, hash)); + if (new_tbl != old_tbl) + spin_lock_bh_nested(bucket_lock(new_tbl, hash), + RHT_LOCK_NESTED); +} + +static void unlock_buckets(struct bucket_table *new_tbl, + struct bucket_table *old_tbl, unsigned int hash) + __releases(old_bucket_lock) +{ + if (new_tbl != old_tbl) + spin_unlock_bh(bucket_lock(new_tbl, hash)); + spin_unlock_bh(bucket_lock(old_tbl, hash)); } -EXPORT_SYMBOL_GPL(rht_shrink_below_30); -static void hashtable_chain_unzip(const struct rhashtable *ht, +/** + * Unlink entries on bucket which hash to different bucket. + * + * Returns true if no more work needs to be performed on the bucket. + */ +static bool hashtable_chain_unzip(struct rhashtable *ht, const struct bucket_table *new_tbl, - struct bucket_table *old_tbl, size_t n) + struct bucket_table *old_tbl, + size_t old_hash) { struct rhash_head *he, *p, *next; - unsigned int h; + unsigned int new_hash, new_hash2; + + ASSERT_BUCKET_LOCK(ht, old_tbl, old_hash); /* Old bucket empty, no work needed. */ - p = rht_dereference(old_tbl->buckets[n], ht); - if (!p) - return; + p = rht_dereference_bucket(old_tbl->buckets[old_hash], old_tbl, + old_hash); + if (rht_is_a_nulls(p)) + return false; + + new_hash = head_hashfn(ht, new_tbl, p); + ASSERT_BUCKET_LOCK(ht, new_tbl, new_hash); /* Advance the old bucket pointer one or more times until it * reaches a node that doesn't hash to the same bucket as the * previous node p. Call the previous node p; */ - h = head_hashfn(ht, p, new_tbl->size); - rht_for_each(he, p->next, ht) { - if (head_hashfn(ht, he, new_tbl->size) != h) + rht_for_each_continue(he, p->next, old_tbl, old_hash) { + new_hash2 = head_hashfn(ht, new_tbl, he); + ASSERT_BUCKET_LOCK(ht, new_tbl, new_hash2); + + if (new_hash != new_hash2) break; p = he; } - RCU_INIT_POINTER(old_tbl->buckets[n], p->next); + rcu_assign_pointer(old_tbl->buckets[old_hash], p->next); /* Find the subsequent node which does hash to the same * bucket as node P, or NULL if no such node exists. */ - next = NULL; - if (he) { - rht_for_each(he, he->next, ht) { - if (head_hashfn(ht, he, new_tbl->size) == h) { + INIT_RHT_NULLS_HEAD(next, ht, old_hash); + if (!rht_is_a_nulls(he)) { + rht_for_each_continue(he, he->next, old_tbl, old_hash) { + if (head_hashfn(ht, new_tbl, he) == new_hash) { next = he; break; } @@ -194,7 +340,20 @@ static void hashtable_chain_unzip(const struct rhashtable *ht, /* Set p's next pointer to that subsequent node pointer, * bypassing the nodes which do not hash to p's bucket */ - RCU_INIT_POINTER(p->next, next); + rcu_assign_pointer(p->next, next); + + p = rht_dereference_bucket(old_tbl->buckets[old_hash], old_tbl, + old_hash); + + return !rht_is_a_nulls(p); +} + +static void link_old_to_new(struct rhashtable *ht, struct bucket_table *new_tbl, + unsigned int new_hash, struct rhash_head *entry) +{ + ASSERT_BUCKET_LOCK(ht, new_tbl, new_hash); + + rcu_assign_pointer(*bucket_tail(new_tbl, new_hash), entry); } /** @@ -207,53 +366,58 @@ static void hashtable_chain_unzip(const struct rhashtable *ht, * This function may only be called in a context where it is safe to call * synchronize_rcu(), e.g. not within a rcu_read_lock() section. * - * The caller must ensure that no concurrent table mutations take place. - * It is however valid to have concurrent lookups if they are RCU protected. + * The caller must ensure that no concurrent resizing occurs by holding + * ht->mutex. + * + * It is valid to have concurrent insertions and deletions protected by per + * bucket locks or concurrent RCU protected lookups and traversals. */ int rhashtable_expand(struct rhashtable *ht) { struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht); struct rhash_head *he; - unsigned int i, h; - bool complete; + unsigned int new_hash, old_hash; + bool complete = false; ASSERT_RHT_MUTEX(ht); - if (ht->p.max_shift && ht->shift >= ht->p.max_shift) - return 0; - - new_tbl = bucket_table_alloc(old_tbl->size * 2); + new_tbl = bucket_table_alloc(ht, old_tbl->size * 2); if (new_tbl == NULL) return -ENOMEM; - ht->shift++; + atomic_inc(&ht->shift); - /* For each new bucket, search the corresponding old bucket - * for the first entry that hashes to the new bucket, and - * link the new bucket to that entry. Since all the entries - * which will end up in the new bucket appear in the same - * old bucket, this constructs an entirely valid new hash - * table, but with multiple buckets "zipped" together into a - * single imprecise chain. + /* Make insertions go into the new, empty table right away. Deletions + * and lookups will be attempted in both tables until we synchronize. + * The synchronize_rcu() guarantees for the new table to be picked up + * so no new additions go into the old table while we relink. */ - for (i = 0; i < new_tbl->size; i++) { - h = i & (old_tbl->size - 1); - rht_for_each(he, old_tbl->buckets[h], ht) { - if (head_hashfn(ht, he, new_tbl->size) == i) { - RCU_INIT_POINTER(new_tbl->buckets[i], he); + rcu_assign_pointer(ht->future_tbl, new_tbl); + synchronize_rcu(); + + /* For each new bucket, search the corresponding old bucket for the + * first entry that hashes to the new bucket, and link the end of + * newly formed bucket chain (containing entries added to future + * table) to that entry. Since all the entries which will end up in + * the new bucket appear in the same old bucket, this constructs an + * entirely valid new hash table, but with multiple buckets + * "zipped" together into a single imprecise chain. + */ + for (new_hash = 0; new_hash < new_tbl->size; new_hash++) { + old_hash = rht_bucket_index(old_tbl, new_hash); + lock_buckets(new_tbl, old_tbl, new_hash); + rht_for_each(he, old_tbl, old_hash) { + if (head_hashfn(ht, new_tbl, he) == new_hash) { + link_old_to_new(ht, new_tbl, new_hash, he); break; } } + unlock_buckets(new_tbl, old_tbl, new_hash); + cond_resched(); } - /* Publish the new table pointer. Lookups may now traverse - * the new table, but they will not benefit from any - * additional efficiency until later steps unzip the buckets. - */ - rcu_assign_pointer(ht->tbl, new_tbl); - /* Unzip interleaved hash chains */ - do { + while (!complete && !ht->being_destroyed) { /* Wait for readers. All new readers will see the new * table, and thus no references to the old table will * remain. @@ -265,12 +429,20 @@ int rhashtable_expand(struct rhashtable *ht) * table): ... */ complete = true; - for (i = 0; i < old_tbl->size; i++) { - hashtable_chain_unzip(ht, new_tbl, old_tbl, i); - if (old_tbl->buckets[i] != NULL) + for (old_hash = 0; old_hash < old_tbl->size; old_hash++) { + lock_buckets(new_tbl, old_tbl, old_hash); + + if (hashtable_chain_unzip(ht, new_tbl, old_tbl, + old_hash)) complete = false; + + unlock_buckets(new_tbl, old_tbl, old_hash); + cond_resched(); } - } while (!complete); + } + + rcu_assign_pointer(ht->tbl, new_tbl); + synchronize_rcu(); bucket_table_free(old_tbl); return 0; @@ -284,45 +456,52 @@ EXPORT_SYMBOL_GPL(rhashtable_expand); * This function may only be called in a context where it is safe to call * synchronize_rcu(), e.g. not within a rcu_read_lock() section. * + * The caller must ensure that no concurrent resizing occurs by holding + * ht->mutex. + * * The caller must ensure that no concurrent table mutations take place. * It is however valid to have concurrent lookups if they are RCU protected. + * + * It is valid to have concurrent insertions and deletions protected by per + * bucket locks or concurrent RCU protected lookups and traversals. */ int rhashtable_shrink(struct rhashtable *ht) { - struct bucket_table *ntbl, *tbl = rht_dereference(ht->tbl, ht); - struct rhash_head __rcu **pprev; - unsigned int i; + struct bucket_table *new_tbl, *tbl = rht_dereference(ht->tbl, ht); + unsigned int new_hash; ASSERT_RHT_MUTEX(ht); - if (ht->shift <= ht->p.min_shift) - return 0; - - ntbl = bucket_table_alloc(tbl->size / 2); - if (ntbl == NULL) + new_tbl = bucket_table_alloc(ht, tbl->size / 2); + if (new_tbl == NULL) return -ENOMEM; - ht->shift--; + rcu_assign_pointer(ht->future_tbl, new_tbl); + synchronize_rcu(); - /* Link each bucket in the new table to the first bucket - * in the old table that contains entries which will hash - * to the new bucket. + /* Link the first entry in the old bucket to the end of the + * bucket in the new table. As entries are concurrently being + * added to the new table, lock down the new bucket. As we + * always divide the size in half when shrinking, each bucket + * in the new table maps to exactly two buckets in the old + * table. */ - for (i = 0; i < ntbl->size; i++) { - ntbl->buckets[i] = tbl->buckets[i]; + for (new_hash = 0; new_hash < new_tbl->size; new_hash++) { + lock_buckets(new_tbl, tbl, new_hash); - /* Link each bucket in the new table to the first bucket - * in the old table that contains entries which will hash - * to the new bucket. - */ - for (pprev = &ntbl->buckets[i]; *pprev != NULL; - pprev = &rht_dereference(*pprev, ht)->next) - ; - RCU_INIT_POINTER(*pprev, tbl->buckets[i + ntbl->size]); + rcu_assign_pointer(*bucket_tail(new_tbl, new_hash), + tbl->buckets[new_hash]); + ASSERT_BUCKET_LOCK(ht, tbl, new_hash + new_tbl->size); + rcu_assign_pointer(*bucket_tail(new_tbl, new_hash), + tbl->buckets[new_hash + new_tbl->size]); + + unlock_buckets(new_tbl, tbl, new_hash); + cond_resched(); } /* Publish the new, valid hash table */ - rcu_assign_pointer(ht->tbl, ntbl); + rcu_assign_pointer(ht->tbl, new_tbl); + atomic_dec(&ht->shift); /* Wait for readers. No new readers will have references to the * old hash table. @@ -335,59 +514,87 @@ int rhashtable_shrink(struct rhashtable *ht) } EXPORT_SYMBOL_GPL(rhashtable_shrink); -/** - * rhashtable_insert - insert object into hash hash table - * @ht: hash table - * @obj: pointer to hash head inside object - * - * Will automatically grow the table via rhashtable_expand() if the the - * grow_decision function specified at rhashtable_init() returns true. - * - * The caller must ensure that no concurrent table mutations occur. It is - * however valid to have concurrent lookups if they are RCU protected. - */ -void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj) +static void rht_deferred_worker(struct work_struct *work) { - struct bucket_table *tbl = rht_dereference(ht->tbl, ht); - u32 hash; + struct rhashtable *ht; + struct bucket_table *tbl; + struct rhashtable_walker *walker; - ASSERT_RHT_MUTEX(ht); + ht = container_of(work, struct rhashtable, run_work); + mutex_lock(&ht->mutex); + if (ht->being_destroyed) + goto unlock; - hash = head_hashfn(ht, obj, tbl->size); - RCU_INIT_POINTER(obj->next, tbl->buckets[hash]); - rcu_assign_pointer(tbl->buckets[hash], obj); - ht->nelems++; + tbl = rht_dereference(ht->tbl, ht); + + list_for_each_entry(walker, &ht->walkers, list) + walker->resize = true; - if (ht->p.grow_decision && ht->p.grow_decision(ht, tbl->size)) + if (rht_grow_above_75(ht, tbl->size)) rhashtable_expand(ht); + else if (rht_shrink_below_30(ht, tbl->size)) + rhashtable_shrink(ht); +unlock: + mutex_unlock(&ht->mutex); +} + +static void __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj, + struct bucket_table *tbl, + const struct bucket_table *old_tbl, u32 hash) +{ + bool no_resize_running = tbl == old_tbl; + struct rhash_head *head; + + hash = rht_bucket_index(tbl, hash); + head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); + + ASSERT_BUCKET_LOCK(ht, tbl, hash); + + if (rht_is_a_nulls(head)) + INIT_RHT_NULLS_HEAD(obj->next, ht, hash); + else + RCU_INIT_POINTER(obj->next, head); + + rcu_assign_pointer(tbl->buckets[hash], obj); + + atomic_inc(&ht->nelems); + if (no_resize_running && rht_grow_above_75(ht, tbl->size)) + schedule_work(&ht->run_work); } -EXPORT_SYMBOL_GPL(rhashtable_insert); /** - * rhashtable_remove_pprev - remove object from hash table given previous element + * rhashtable_insert - insert object into hash table * @ht: hash table * @obj: pointer to hash head inside object - * @pprev: pointer to previous element * - * Identical to rhashtable_remove() but caller is alreayd aware of the element - * in front of the element to be deleted. This is in particular useful for - * deletion when combined with walking or lookup. + * Will take a per bucket spinlock to protect against mutual mutations + * on the same bucket. Multiple insertions may occur in parallel unless + * they map to the same bucket lock. + * + * It is safe to call this function from atomic context. + * + * Will trigger an automatic deferred table resizing if the size grows + * beyond the watermark indicated by grow_decision() which can be passed + * to rhashtable_init(). */ -void rhashtable_remove_pprev(struct rhashtable *ht, struct rhash_head *obj, - struct rhash_head __rcu **pprev) +void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj) { - struct bucket_table *tbl = rht_dereference(ht->tbl, ht); + struct bucket_table *tbl, *old_tbl; + unsigned hash; - ASSERT_RHT_MUTEX(ht); + rcu_read_lock(); - RCU_INIT_POINTER(*pprev, obj->next); - ht->nelems--; + tbl = rht_dereference_rcu(ht->future_tbl, ht); + old_tbl = rht_dereference_rcu(ht->tbl, ht); + hash = obj_raw_hashfn(ht, rht_obj(ht, obj)); - if (ht->p.shrink_decision && - ht->p.shrink_decision(ht, tbl->size)) - rhashtable_shrink(ht); + lock_buckets(tbl, old_tbl, hash); + __rhashtable_insert(ht, obj, tbl, old_tbl, hash); + unlock_buckets(tbl, old_tbl, hash); + + rcu_read_unlock(); } -EXPORT_SYMBOL_GPL(rhashtable_remove_pprev); +EXPORT_SYMBOL_GPL(rhashtable_insert); /** * rhashtable_remove - remove object from hash table @@ -398,7 +605,7 @@ EXPORT_SYMBOL_GPL(rhashtable_remove_pprev); * walk the bucket chain upon removal. The removal operation is thus * considerable slow if the hash table is not correctly sized. * - * Will automatically shrink the table via rhashtable_expand() if the the + * Will automatically shrink the table via rhashtable_expand() if the * shrink_decision function specified at rhashtable_init() returns true. * * The caller must ensure that no concurrent table mutations occur. It is @@ -406,30 +613,90 @@ EXPORT_SYMBOL_GPL(rhashtable_remove_pprev); */ bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj) { - struct bucket_table *tbl = rht_dereference(ht->tbl, ht); + struct bucket_table *tbl, *new_tbl, *old_tbl; struct rhash_head __rcu **pprev; - struct rhash_head *he; - u32 h; - - ASSERT_RHT_MUTEX(ht); - - h = head_hashfn(ht, obj, tbl->size); + struct rhash_head *he, *he2; + unsigned int hash, new_hash; + bool ret = false; - pprev = &tbl->buckets[h]; - rht_for_each(he, tbl->buckets[h], ht) { + rcu_read_lock(); + old_tbl = rht_dereference_rcu(ht->tbl, ht); + tbl = new_tbl = rht_dereference_rcu(ht->future_tbl, ht); + new_hash = obj_raw_hashfn(ht, rht_obj(ht, obj)); + + lock_buckets(new_tbl, old_tbl, new_hash); +restart: + hash = rht_bucket_index(tbl, new_hash); + pprev = &tbl->buckets[hash]; + rht_for_each(he, tbl, hash) { if (he != obj) { pprev = &he->next; continue; } - rhashtable_remove_pprev(ht, he, pprev); - return true; + ASSERT_BUCKET_LOCK(ht, tbl, hash); + + if (old_tbl->size > new_tbl->size && tbl == old_tbl && + !rht_is_a_nulls(obj->next) && + head_hashfn(ht, tbl, obj->next) != hash) { + rcu_assign_pointer(*pprev, (struct rhash_head *) rht_marker(ht, hash)); + } else if (unlikely(old_tbl->size < new_tbl->size && tbl == new_tbl)) { + rht_for_each_continue(he2, obj->next, tbl, hash) { + if (head_hashfn(ht, tbl, he2) == hash) { + rcu_assign_pointer(*pprev, he2); + goto found; + } + } + + rcu_assign_pointer(*pprev, (struct rhash_head *) rht_marker(ht, hash)); + } else { + rcu_assign_pointer(*pprev, obj->next); + } + +found: + ret = true; + break; + } + + /* The entry may be linked in either 'tbl', 'future_tbl', or both. + * 'future_tbl' only exists for a short period of time during + * resizing. Thus traversing both is fine and the added cost is + * very rare. + */ + if (tbl != old_tbl) { + tbl = old_tbl; + goto restart; + } + + unlock_buckets(new_tbl, old_tbl, new_hash); + + if (ret) { + bool no_resize_running = new_tbl == old_tbl; + + atomic_dec(&ht->nelems); + if (no_resize_running && rht_shrink_below_30(ht, new_tbl->size)) + schedule_work(&ht->run_work); } - return false; + rcu_read_unlock(); + + return ret; } EXPORT_SYMBOL_GPL(rhashtable_remove); +struct rhashtable_compare_arg { + struct rhashtable *ht; + const void *key; +}; + +static bool rhashtable_compare(void *ptr, void *arg) +{ + struct rhashtable_compare_arg *x = arg; + struct rhashtable *ht = x->ht; + + return !memcmp(ptr + ht->p.key_offset, x->key, ht->p.key_len); +} + /** * rhashtable_lookup - lookup key in hash table * @ht: hash table @@ -439,65 +706,316 @@ EXPORT_SYMBOL_GPL(rhashtable_remove); * for a entry with an identical key. The first matching entry is returned. * * This lookup function may only be used for fixed key hash table (key_len - * paramter set). It will BUG() if used inappropriately. + * parameter set). It will BUG() if used inappropriately. * - * Lookups may occur in parallel with hash mutations as long as the lookup is - * guarded by rcu_read_lock(). The caller must take care of this. + * Lookups may occur in parallel with hashtable mutations and resizing. */ -void *rhashtable_lookup(const struct rhashtable *ht, const void *key) +void *rhashtable_lookup(struct rhashtable *ht, const void *key) { - const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); - struct rhash_head *he; - u32 h; + struct rhashtable_compare_arg arg = { + .ht = ht, + .key = key, + }; BUG_ON(!ht->p.key_len); - h = __hashfn(ht, key, ht->p.key_len, tbl->size); - rht_for_each_rcu(he, tbl->buckets[h], ht) { - if (memcmp(rht_obj(ht, he) + ht->p.key_offset, key, - ht->p.key_len)) - continue; - return (void *) he - ht->p.head_offset; - } - - return NULL; + return rhashtable_lookup_compare(ht, key, &rhashtable_compare, &arg); } EXPORT_SYMBOL_GPL(rhashtable_lookup); /** * rhashtable_lookup_compare - search hash table with compare function * @ht: hash table - * @hash: hash value of desired entry + * @key: the pointer to the key * @compare: compare function, must return true on match * @arg: argument passed on to compare function * * Traverses the bucket chain behind the provided hash value and calls the * specified compare function for each entry. * - * Lookups may occur in parallel with hash mutations as long as the lookup is - * guarded by rcu_read_lock(). The caller must take care of this. + * Lookups may occur in parallel with hashtable mutations and resizing. * * Returns the first entry on which the compare function returned true. */ -void *rhashtable_lookup_compare(const struct rhashtable *ht, u32 hash, +void *rhashtable_lookup_compare(struct rhashtable *ht, const void *key, bool (*compare)(void *, void *), void *arg) { - const struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); + const struct bucket_table *tbl, *old_tbl; struct rhash_head *he; + u32 hash; - if (unlikely(hash >= tbl->size)) - return NULL; + rcu_read_lock(); - rht_for_each_rcu(he, tbl->buckets[hash], ht) { + old_tbl = rht_dereference_rcu(ht->tbl, ht); + tbl = rht_dereference_rcu(ht->future_tbl, ht); + hash = key_hashfn(ht, key, ht->p.key_len); +restart: + rht_for_each_rcu(he, tbl, rht_bucket_index(tbl, hash)) { if (!compare(rht_obj(ht, he), arg)) continue; - return (void *) he - ht->p.head_offset; + rcu_read_unlock(); + return rht_obj(ht, he); + } + + if (unlikely(tbl != old_tbl)) { + tbl = old_tbl; + goto restart; } + rcu_read_unlock(); return NULL; } EXPORT_SYMBOL_GPL(rhashtable_lookup_compare); +/** + * rhashtable_lookup_insert - lookup and insert object into hash table + * @ht: hash table + * @obj: pointer to hash head inside object + * + * Locks down the bucket chain in both the old and new table if a resize + * is in progress to ensure that writers can't remove from the old table + * and can't insert to the new table during the atomic operation of search + * and insertion. Searches for duplicates in both the old and new table if + * a resize is in progress. + * + * This lookup function may only be used for fixed key hash table (key_len + * parameter set). It will BUG() if used inappropriately. + * + * It is safe to call this function from atomic context. + * + * Will trigger an automatic deferred table resizing if the size grows + * beyond the watermark indicated by grow_decision() which can be passed + * to rhashtable_init(). + */ +bool rhashtable_lookup_insert(struct rhashtable *ht, struct rhash_head *obj) +{ + struct rhashtable_compare_arg arg = { + .ht = ht, + .key = rht_obj(ht, obj) + ht->p.key_offset, + }; + + BUG_ON(!ht->p.key_len); + + return rhashtable_lookup_compare_insert(ht, obj, &rhashtable_compare, + &arg); +} +EXPORT_SYMBOL_GPL(rhashtable_lookup_insert); + +/** + * rhashtable_lookup_compare_insert - search and insert object to hash table + * with compare function + * @ht: hash table + * @obj: pointer to hash head inside object + * @compare: compare function, must return true on match + * @arg: argument passed on to compare function + * + * Locks down the bucket chain in both the old and new table if a resize + * is in progress to ensure that writers can't remove from the old table + * and can't insert to the new table during the atomic operation of search + * and insertion. Searches for duplicates in both the old and new table if + * a resize is in progress. + * + * Lookups may occur in parallel with hashtable mutations and resizing. + * + * Will trigger an automatic deferred table resizing if the size grows + * beyond the watermark indicated by grow_decision() which can be passed + * to rhashtable_init(). + */ +bool rhashtable_lookup_compare_insert(struct rhashtable *ht, + struct rhash_head *obj, + bool (*compare)(void *, void *), + void *arg) +{ + struct bucket_table *new_tbl, *old_tbl; + u32 new_hash; + bool success = true; + + BUG_ON(!ht->p.key_len); + + rcu_read_lock(); + old_tbl = rht_dereference_rcu(ht->tbl, ht); + new_tbl = rht_dereference_rcu(ht->future_tbl, ht); + new_hash = obj_raw_hashfn(ht, rht_obj(ht, obj)); + + lock_buckets(new_tbl, old_tbl, new_hash); + + if (rhashtable_lookup_compare(ht, rht_obj(ht, obj) + ht->p.key_offset, + compare, arg)) { + success = false; + goto exit; + } + + __rhashtable_insert(ht, obj, new_tbl, old_tbl, new_hash); + +exit: + unlock_buckets(new_tbl, old_tbl, new_hash); + rcu_read_unlock(); + + return success; +} +EXPORT_SYMBOL_GPL(rhashtable_lookup_compare_insert); + +/** + * rhashtable_walk_init - Initialise an iterator + * @ht: Table to walk over + * @iter: Hash table Iterator + * + * This function prepares a hash table walk. + * + * Note that if you restart a walk after rhashtable_walk_stop you + * may see the same object twice. Also, you may miss objects if + * there are removals in between rhashtable_walk_stop and the next + * call to rhashtable_walk_start. + * + * For a completely stable walk you should construct your own data + * structure outside the hash table. + * + * This function may sleep so you must not call it from interrupt + * context or with spin locks held. + * + * You must call rhashtable_walk_exit if this function returns + * successfully. + */ +int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter) +{ + iter->ht = ht; + iter->p = NULL; + iter->slot = 0; + iter->skip = 0; + + iter->walker = kmalloc(sizeof(*iter->walker), GFP_KERNEL); + if (!iter->walker) + return -ENOMEM; + + INIT_LIST_HEAD(&iter->walker->list); + iter->walker->resize = false; + + mutex_lock(&ht->mutex); + list_add(&iter->walker->list, &ht->walkers); + mutex_unlock(&ht->mutex); + + return 0; +} +EXPORT_SYMBOL_GPL(rhashtable_walk_init); + +/** + * rhashtable_walk_exit - Free an iterator + * @iter: Hash table Iterator + * + * This function frees resources allocated by rhashtable_walk_init. + */ +void rhashtable_walk_exit(struct rhashtable_iter *iter) +{ + mutex_lock(&iter->ht->mutex); + list_del(&iter->walker->list); + mutex_unlock(&iter->ht->mutex); + kfree(iter->walker); +} +EXPORT_SYMBOL_GPL(rhashtable_walk_exit); + +/** + * rhashtable_walk_start - Start a hash table walk + * @iter: Hash table iterator + * + * Start a hash table walk. Note that we take the RCU lock in all + * cases including when we return an error. So you must always call + * rhashtable_walk_stop to clean up. + * + * Returns zero if successful. + * + * Returns -EAGAIN if resize event occured. Note that the iterator + * will rewind back to the beginning and you may use it immediately + * by calling rhashtable_walk_next. + */ +int rhashtable_walk_start(struct rhashtable_iter *iter) +{ + rcu_read_lock(); + + if (iter->walker->resize) { + iter->slot = 0; + iter->skip = 0; + iter->walker->resize = false; + return -EAGAIN; + } + + return 0; +} +EXPORT_SYMBOL_GPL(rhashtable_walk_start); + +/** + * rhashtable_walk_next - Return the next object and advance the iterator + * @iter: Hash table iterator + * + * Note that you must call rhashtable_walk_stop when you are finished + * with the walk. + * + * Returns the next object or NULL when the end of the table is reached. + * + * Returns -EAGAIN if resize event occured. Note that the iterator + * will rewind back to the beginning and you may continue to use it. + */ +void *rhashtable_walk_next(struct rhashtable_iter *iter) +{ + const struct bucket_table *tbl; + struct rhashtable *ht = iter->ht; + struct rhash_head *p = iter->p; + void *obj = NULL; + + tbl = rht_dereference_rcu(ht->tbl, ht); + + if (p) { + p = rht_dereference_bucket_rcu(p->next, tbl, iter->slot); + goto next; + } + + for (; iter->slot < tbl->size; iter->slot++) { + int skip = iter->skip; + + rht_for_each_rcu(p, tbl, iter->slot) { + if (!skip) + break; + skip--; + } + +next: + if (!rht_is_a_nulls(p)) { + iter->skip++; + iter->p = p; + obj = rht_obj(ht, p); + goto out; + } + + iter->skip = 0; + } + + iter->p = NULL; + +out: + if (iter->walker->resize) { + iter->p = NULL; + iter->slot = 0; + iter->skip = 0; + iter->walker->resize = false; + return ERR_PTR(-EAGAIN); + } + + return obj; +} +EXPORT_SYMBOL_GPL(rhashtable_walk_next); + +/** + * rhashtable_walk_stop - Finish a hash table walk + * @iter: Hash table iterator + * + * Finish a hash table walk. + */ +void rhashtable_walk_stop(struct rhashtable_iter *iter) +{ + rcu_read_unlock(); + iter->p = NULL; +} +EXPORT_SYMBOL_GPL(rhashtable_walk_stop); + static size_t rounded_hashtable_size(struct rhashtable_params *params) { return max(roundup_pow_of_two(params->nelem_hint * 4 / 3), @@ -525,9 +1043,7 @@ static size_t rounded_hashtable_size(struct rhashtable_params *params) * .key_offset = offsetof(struct test_obj, key), * .key_len = sizeof(int), * .hashfn = jhash, - * #ifdef CONFIG_PROVE_LOCKING - * .mutex_is_held = &my_mutex_is_held, - * #endif + * .nulls_base = (1U << RHT_BASE_SHIFT), * }; * * Configuration Example 2: Variable length keys @@ -547,9 +1063,6 @@ static size_t rounded_hashtable_size(struct rhashtable_params *params) * .head_offset = offsetof(struct test_obj, node), * .hashfn = jhash, * .obj_hashfn = my_hash_fn, - * #ifdef CONFIG_PROVE_LOCKING - * .mutex_is_held = &my_mutex_is_held, - * #endif * }; */ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params) @@ -563,24 +1076,39 @@ int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params) (!params->key_len && !params->obj_hashfn)) return -EINVAL; + if (params->nulls_base && params->nulls_base < (1U << RHT_BASE_SHIFT)) + return -EINVAL; + params->min_shift = max_t(size_t, params->min_shift, ilog2(HASH_MIN_SIZE)); if (params->nelem_hint) size = rounded_hashtable_size(params); - tbl = bucket_table_alloc(size); + memset(ht, 0, sizeof(*ht)); + mutex_init(&ht->mutex); + memcpy(&ht->p, params, sizeof(*params)); + INIT_LIST_HEAD(&ht->walkers); + + if (params->locks_mul) + ht->p.locks_mul = roundup_pow_of_two(params->locks_mul); + else + ht->p.locks_mul = BUCKET_LOCKS_PER_CPU; + + tbl = bucket_table_alloc(ht, size); if (tbl == NULL) return -ENOMEM; - memset(ht, 0, sizeof(*ht)); - ht->shift = ilog2(tbl->size); - memcpy(&ht->p, params, sizeof(*params)); + atomic_set(&ht->nelems, 0); + atomic_set(&ht->shift, ilog2(tbl->size)); RCU_INIT_POINTER(ht->tbl, tbl); + RCU_INIT_POINTER(ht->future_tbl, tbl); if (!ht->p.hash_rnd) get_random_bytes(&ht->p.hash_rnd, sizeof(ht->p.hash_rnd)); + INIT_WORK(&ht->run_work, rht_deferred_worker); + return 0; } EXPORT_SYMBOL_GPL(rhashtable_init); @@ -593,216 +1121,14 @@ EXPORT_SYMBOL_GPL(rhashtable_init); * has to make sure that no resizing may happen by unpublishing the hashtable * and waiting for the quiescent cycle before releasing the bucket array. */ -void rhashtable_destroy(const struct rhashtable *ht) -{ - bucket_table_free(ht->tbl); -} -EXPORT_SYMBOL_GPL(rhashtable_destroy); - -/************************************************************************** - * Self Test - **************************************************************************/ - -#ifdef CONFIG_TEST_RHASHTABLE - -#define TEST_HT_SIZE 8 -#define TEST_ENTRIES 2048 -#define TEST_PTR ((void *) 0xdeadbeef) -#define TEST_NEXPANDS 4 - -#ifdef CONFIG_PROVE_LOCKING -static int test_mutex_is_held(void *parent) -{ - return 1; -} -#endif - -struct test_obj { - void *ptr; - int value; - struct rhash_head node; -}; - -static int __init test_rht_lookup(struct rhashtable *ht) -{ - unsigned int i; - - for (i = 0; i < TEST_ENTRIES * 2; i++) { - struct test_obj *obj; - bool expected = !(i % 2); - u32 key = i; - - obj = rhashtable_lookup(ht, &key); - - if (expected && !obj) { - pr_warn("Test failed: Could not find key %u\n", key); - return -ENOENT; - } else if (!expected && obj) { - pr_warn("Test failed: Unexpected entry found for key %u\n", - key); - return -EEXIST; - } else if (expected && obj) { - if (obj->ptr != TEST_PTR || obj->value != i) { - pr_warn("Test failed: Lookup value mismatch %p!=%p, %u!=%u\n", - obj->ptr, TEST_PTR, obj->value, i); - return -EINVAL; - } - } - } - - return 0; -} - -static void test_bucket_stats(struct rhashtable *ht, bool quiet) -{ - unsigned int cnt, rcu_cnt, i, total = 0; - struct test_obj *obj; - struct bucket_table *tbl; - - tbl = rht_dereference_rcu(ht->tbl, ht); - for (i = 0; i < tbl->size; i++) { - rcu_cnt = cnt = 0; - - if (!quiet) - pr_info(" [%#4x/%zu]", i, tbl->size); - - rht_for_each_entry_rcu(obj, tbl->buckets[i], node) { - cnt++; - total++; - if (!quiet) - pr_cont(" [%p],", obj); - } - - rht_for_each_entry_rcu(obj, tbl->buckets[i], node) - rcu_cnt++; - - if (rcu_cnt != cnt) - pr_warn("Test failed: Chain count mismach %d != %d", - cnt, rcu_cnt); - - if (!quiet) - pr_cont("\n [%#x] first element: %p, chain length: %u\n", - i, tbl->buckets[i], cnt); - } - - pr_info(" Traversal complete: counted=%u, nelems=%zu, entries=%d\n", - total, ht->nelems, TEST_ENTRIES); - - if (total != ht->nelems || total != TEST_ENTRIES) - pr_warn("Test failed: Total count mismatch ^^^"); -} - -static int __init test_rhashtable(struct rhashtable *ht) -{ - struct bucket_table *tbl; - struct test_obj *obj, *next; - int err; - unsigned int i; - - /* - * Insertion Test: - * Insert TEST_ENTRIES into table with all keys even numbers - */ - pr_info(" Adding %d keys\n", TEST_ENTRIES); - for (i = 0; i < TEST_ENTRIES; i++) { - struct test_obj *obj; - - obj = kzalloc(sizeof(*obj), GFP_KERNEL); - if (!obj) { - err = -ENOMEM; - goto error; - } - - obj->ptr = TEST_PTR; - obj->value = i * 2; - - rhashtable_insert(ht, &obj->node); - } - - rcu_read_lock(); - test_bucket_stats(ht, true); - test_rht_lookup(ht); - rcu_read_unlock(); - - for (i = 0; i < TEST_NEXPANDS; i++) { - pr_info(" Table expansion iteration %u...\n", i); - rhashtable_expand(ht); - - rcu_read_lock(); - pr_info(" Verifying lookups...\n"); - test_rht_lookup(ht); - rcu_read_unlock(); - } - - for (i = 0; i < TEST_NEXPANDS; i++) { - pr_info(" Table shrinkage iteration %u...\n", i); - rhashtable_shrink(ht); - - rcu_read_lock(); - pr_info(" Verifying lookups...\n"); - test_rht_lookup(ht); - rcu_read_unlock(); - } - - rcu_read_lock(); - test_bucket_stats(ht, true); - rcu_read_unlock(); - - pr_info(" Deleting %d keys\n", TEST_ENTRIES); - for (i = 0; i < TEST_ENTRIES; i++) { - u32 key = i * 2; - - obj = rhashtable_lookup(ht, &key); - BUG_ON(!obj); - - rhashtable_remove(ht, &obj->node); - kfree(obj); - } - - return 0; - -error: - tbl = rht_dereference_rcu(ht->tbl, ht); - for (i = 0; i < tbl->size; i++) - rht_for_each_entry_safe(obj, next, tbl->buckets[i], ht, node) - kfree(obj); - - return err; -} - -static int __init test_rht_init(void) +void rhashtable_destroy(struct rhashtable *ht) { - struct rhashtable ht; - struct rhashtable_params params = { - .nelem_hint = TEST_HT_SIZE, - .head_offset = offsetof(struct test_obj, node), - .key_offset = offsetof(struct test_obj, value), - .key_len = sizeof(int), - .hashfn = jhash, -#ifdef CONFIG_PROVE_LOCKING - .mutex_is_held = &test_mutex_is_held, -#endif - .grow_decision = rht_grow_above_75, - .shrink_decision = rht_shrink_below_30, - }; - int err; - - pr_info("Running resizable hashtable tests...\n"); + ht->being_destroyed = true; - err = rhashtable_init(&ht, ¶ms); - if (err < 0) { - pr_warn("Test failed: Unable to initialize hashtable: %d\n", - err); - return err; - } - - err = test_rhashtable(&ht); - - rhashtable_destroy(&ht); + cancel_work_sync(&ht->run_work); - return err; + mutex_lock(&ht->mutex); + bucket_table_free(rht_dereference(ht->tbl, ht)); + mutex_unlock(&ht->mutex); } - -subsys_initcall(test_rht_init); - -#endif /* CONFIG_TEST_RHASHTABLE */ +EXPORT_SYMBOL_GPL(rhashtable_destroy); diff --git a/lib/seq_buf.c b/lib/seq_buf.c index 4eedfedb9e31..5c94e1012a91 100644 --- a/lib/seq_buf.c +++ b/lib/seq_buf.c @@ -61,7 +61,7 @@ int seq_buf_vprintf(struct seq_buf *s, const char *fmt, va_list args) if (s->len < s->size) { len = vsnprintf(s->buffer + s->len, s->size - s->len, fmt, args); - if (seq_buf_can_fit(s, len)) { + if (s->len + len < s->size) { s->len += len; return 0; } @@ -91,42 +91,6 @@ int seq_buf_printf(struct seq_buf *s, const char *fmt, ...) return ret; } -/** - * seq_buf_bitmask - write a bitmask array in its ASCII representation - * @s: seq_buf descriptor - * @maskp: points to an array of unsigned longs that represent a bitmask - * @nmaskbits: The number of bits that are valid in @maskp - * - * Writes a ASCII representation of a bitmask string into @s. - * - * Returns zero on success, -1 on overflow. - */ -int seq_buf_bitmask(struct seq_buf *s, const unsigned long *maskp, - int nmaskbits) -{ - unsigned int len = seq_buf_buffer_left(s); - int ret; - - WARN_ON(s->size == 0); - - /* - * Note, because bitmap_scnprintf() only returns the number of bytes - * written and not the number that would be written, we use the last - * byte of the buffer to let us know if we overflowed. There's a small - * chance that the bitmap could have fit exactly inside the buffer, but - * it's not that critical if that does happen. - */ - if (len > 1) { - ret = bitmap_scnprintf(s->buffer + s->len, len, maskp, nmaskbits); - if (ret < len) { - s->len += ret; - return 0; - } - } - seq_buf_set_overflow(s); - return -1; -} - #ifdef CONFIG_BINARY_PRINTF /** * seq_buf_bprintf - Write the printf string from binary arguments @@ -154,7 +118,7 @@ int seq_buf_bprintf(struct seq_buf *s, const char *fmt, const u32 *binary) if (s->len < s->size) { ret = bstr_printf(s->buffer + s->len, len, fmt, binary); - if (seq_buf_can_fit(s, ret)) { + if (s->len + ret < s->size) { s->len += ret; return 0; } diff --git a/lib/show_mem.c b/lib/show_mem.c index 7de89f4a36cf..adc98e1825ba 100644 --- a/lib/show_mem.c +++ b/lib/show_mem.c @@ -6,7 +6,6 @@ */ #include <linux/mm.h> -#include <linux/nmi.h> #include <linux/quicklist.h> #include <linux/cma.h> diff --git a/lib/sort.c b/lib/sort.c index 926d00429ed2..43c9fe73ae2e 100644 --- a/lib/sort.c +++ b/lib/sort.c @@ -4,10 +4,9 @@ * Jan 23 2005 Matt Mackall <mpm@selenic.com> */ -#include <linux/kernel.h> -#include <linux/module.h> +#include <linux/types.h> +#include <linux/export.h> #include <linux/sort.h> -#include <linux/slab.h> static void u32_swap(void *a, void *b, int size) { @@ -85,6 +84,7 @@ void sort(void *base, size_t num, size_t size, EXPORT_SYMBOL(sort); #if 0 +#include <linux/slab.h> /* a simple boot-time regression test */ int cmpint(const void *a, const void *b) diff --git a/lib/stmp_device.c b/lib/stmp_device.c index 8ac9bcc4289a..a904656f4fd7 100644 --- a/lib/stmp_device.c +++ b/lib/stmp_device.c @@ -15,7 +15,8 @@ #include <linux/io.h> #include <linux/errno.h> #include <linux/delay.h> -#include <linux/module.h> +#include <linux/compiler.h> +#include <linux/export.h> #include <linux/stmp_device.h> #define STMP_MODULE_CLKGATE (1 << 30) diff --git a/lib/string.c b/lib/string.c index 10063300b830..ce81aaec3839 100644 --- a/lib/string.c +++ b/lib/string.c @@ -58,14 +58,6 @@ int strncasecmp(const char *s1, const char *s2, size_t len) } EXPORT_SYMBOL(strncasecmp); #endif -#ifndef __HAVE_ARCH_STRNICMP -#undef strnicmp -int strnicmp(const char *s1, const char *s2, size_t len) -{ - return strncasecmp(s1, s2, len); -} -EXPORT_SYMBOL(strnicmp); -#endif #ifndef __HAVE_ARCH_STRCASECMP int strcasecmp(const char *s1, const char *s2) @@ -321,12 +313,12 @@ EXPORT_SYMBOL(strchrnul); */ char *strrchr(const char *s, int c) { - const char *p = s + strlen(s); - do { - if (*p == (char)c) - return (char *)p; - } while (--p >= s); - return NULL; + const char *last = NULL; + do { + if (*s == (char)c) + last = s; + } while (*s++); + return (char *)last; } EXPORT_SYMBOL(strrchr); #endif @@ -604,6 +596,11 @@ EXPORT_SYMBOL(memset); * @s: Pointer to the start of the area. * @count: The size of the area. * + * Note: usually using memset() is just fine (!), but in cases + * where clearing out _local_ data at the end of a scope is + * necessary, memzero_explicit() should be used instead in + * order to prevent the compiler from optimising away zeroing. + * * memzero_explicit() doesn't need an arch-specific version as * it just invokes the one of memset() implicitly. */ diff --git a/lib/string_helpers.c b/lib/string_helpers.c index 58b78ba57439..8f8c4417f228 100644 --- a/lib/string_helpers.c +++ b/lib/string_helpers.c @@ -20,19 +20,18 @@ * @len: length of buffer * * This function returns a string formatted to 3 significant figures - * giving the size in the required units. Returns 0 on success or - * error on failure. @buf is always zero terminated. + * giving the size in the required units. @buf should have room for + * at least 9 bytes and will always be zero terminated. * */ -int string_get_size(u64 size, const enum string_size_units units, - char *buf, int len) +void string_get_size(u64 size, const enum string_size_units units, + char *buf, int len) { static const char *const units_10[] = { - "B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB", NULL + "B", "kB", "MB", "GB", "TB", "PB", "EB" }; static const char *const units_2[] = { - "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB", - NULL + "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB" }; static const char *const *const units_str[] = { [STRING_UNITS_10] = units_10, @@ -43,13 +42,13 @@ int string_get_size(u64 size, const enum string_size_units units, [STRING_UNITS_2] = 1024, }; int i, j; - u64 remainder = 0, sf_cap; + u32 remainder = 0, sf_cap; char tmp[8]; tmp[0] = '\0'; i = 0; if (size >= divisor[units]) { - while (size >= divisor[units] && units_str[units][i]) { + while (size >= divisor[units]) { remainder = do_div(size, divisor[units]); i++; } @@ -60,17 +59,14 @@ int string_get_size(u64 size, const enum string_size_units units, if (j) { remainder *= 1000; - do_div(remainder, divisor[units]); - snprintf(tmp, sizeof(tmp), ".%03lld", - (unsigned long long)remainder); + remainder /= divisor[units]; + snprintf(tmp, sizeof(tmp), ".%03u", remainder); tmp[j+1] = '\0'; } } - snprintf(buf, len, "%lld%s %s", (unsigned long long)size, + snprintf(buf, len, "%u%s %s", (u32)size, tmp, units_str[units][i]); - - return 0; } EXPORT_SYMBOL(string_get_size); diff --git a/lib/strncpy_from_user.c b/lib/strncpy_from_user.c index bb2b201d6ad0..e0af6ff73d14 100644 --- a/lib/strncpy_from_user.c +++ b/lib/strncpy_from_user.c @@ -1,4 +1,5 @@ -#include <linux/module.h> +#include <linux/compiler.h> +#include <linux/export.h> #include <linux/uaccess.h> #include <linux/kernel.h> #include <linux/errno.h> diff --git a/lib/test-hexdump.c b/lib/test-hexdump.c new file mode 100644 index 000000000000..daf29a390a89 --- /dev/null +++ b/lib/test-hexdump.c @@ -0,0 +1,180 @@ +/* + * Test cases for lib/hexdump.c module. + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/init.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/random.h> +#include <linux/string.h> + +static const unsigned char data_b[] = { + '\xbe', '\x32', '\xdb', '\x7b', '\x0a', '\x18', '\x93', '\xb2', /* 00 - 07 */ + '\x70', '\xba', '\xc4', '\x24', '\x7d', '\x83', '\x34', '\x9b', /* 08 - 0f */ + '\xa6', '\x9c', '\x31', '\xad', '\x9c', '\x0f', '\xac', '\xe9', /* 10 - 17 */ + '\x4c', '\xd1', '\x19', '\x99', '\x43', '\xb1', '\xaf', '\x0c', /* 18 - 1f */ +}; + +static const unsigned char data_a[] = ".2.{....p..$}.4...1.....L...C..."; + +static const char *test_data_1_le[] __initconst = { + "be", "32", "db", "7b", "0a", "18", "93", "b2", + "70", "ba", "c4", "24", "7d", "83", "34", "9b", + "a6", "9c", "31", "ad", "9c", "0f", "ac", "e9", + "4c", "d1", "19", "99", "43", "b1", "af", "0c", +}; + +static const char *test_data_2_le[] __initconst = { + "32be", "7bdb", "180a", "b293", + "ba70", "24c4", "837d", "9b34", + "9ca6", "ad31", "0f9c", "e9ac", + "d14c", "9919", "b143", "0caf", +}; + +static const char *test_data_4_le[] __initconst = { + "7bdb32be", "b293180a", "24c4ba70", "9b34837d", + "ad319ca6", "e9ac0f9c", "9919d14c", "0cafb143", +}; + +static const char *test_data_8_le[] __initconst = { + "b293180a7bdb32be", "9b34837d24c4ba70", + "e9ac0f9cad319ca6", "0cafb1439919d14c", +}; + +static void __init test_hexdump(size_t len, int rowsize, int groupsize, + bool ascii) +{ + char test[32 * 3 + 2 + 32 + 1]; + char real[32 * 3 + 2 + 32 + 1]; + char *p; + const char **result; + size_t l = len; + int gs = groupsize, rs = rowsize; + unsigned int i; + + hex_dump_to_buffer(data_b, l, rs, gs, real, sizeof(real), ascii); + + if (rs != 16 && rs != 32) + rs = 16; + + if (l > rs) + l = rs; + + if (!is_power_of_2(gs) || gs > 8 || (len % gs != 0)) + gs = 1; + + if (gs == 8) + result = test_data_8_le; + else if (gs == 4) + result = test_data_4_le; + else if (gs == 2) + result = test_data_2_le; + else + result = test_data_1_le; + + memset(test, ' ', sizeof(test)); + + /* hex dump */ + p = test; + for (i = 0; i < l / gs; i++) { + const char *q = *result++; + size_t amount = strlen(q); + + strncpy(p, q, amount); + p += amount + 1; + } + if (i) + p--; + + /* ASCII part */ + if (ascii) { + p = test + rs * 2 + rs / gs + 1; + strncpy(p, data_a, l); + p += l; + } + + *p = '\0'; + + if (strcmp(test, real)) { + pr_err("Len: %zu row: %d group: %d\n", len, rowsize, groupsize); + pr_err("Result: '%s'\n", real); + pr_err("Expect: '%s'\n", test); + } +} + +static void __init test_hexdump_set(int rowsize, bool ascii) +{ + size_t d = min_t(size_t, sizeof(data_b), rowsize); + size_t len = get_random_int() % d + 1; + + test_hexdump(len, rowsize, 4, ascii); + test_hexdump(len, rowsize, 2, ascii); + test_hexdump(len, rowsize, 8, ascii); + test_hexdump(len, rowsize, 1, ascii); +} + +static void __init test_hexdump_overflow(bool ascii) +{ + char buf[56]; + const char *t = test_data_1_le[0]; + size_t l = get_random_int() % sizeof(buf); + bool a; + int e, r; + + memset(buf, ' ', sizeof(buf)); + + r = hex_dump_to_buffer(data_b, 1, 16, 1, buf, l, ascii); + + if (ascii) + e = 50; + else + e = 2; + buf[e + 2] = '\0'; + + if (!l) { + a = r == e && buf[0] == ' '; + } else if (l < 3) { + a = r == e && buf[0] == '\0'; + } else if (l < 4) { + a = r == e && !strcmp(buf, t); + } else if (ascii) { + if (l < 51) + a = r == e && buf[l - 1] == '\0' && buf[l - 2] == ' '; + else + a = r == e && buf[50] == '\0' && buf[49] == '.'; + } else { + a = r == e && buf[e] == '\0'; + } + + if (!a) { + pr_err("Len: %zu rc: %u strlen: %zu\n", l, r, strlen(buf)); + pr_err("Result: '%s'\n", buf); + } +} + +static int __init test_hexdump_init(void) +{ + unsigned int i; + int rowsize; + + pr_info("Running tests...\n"); + + rowsize = (get_random_int() % 2 + 1) * 16; + for (i = 0; i < 16; i++) + test_hexdump_set(rowsize, false); + + rowsize = (get_random_int() % 2 + 1) * 16; + for (i = 0; i < 16; i++) + test_hexdump_set(rowsize, true); + + for (i = 0; i < 16; i++) + test_hexdump_overflow(false); + + for (i = 0; i < 16; i++) + test_hexdump_overflow(true); + + return -EINVAL; +} +module_init(test_hexdump_init); +MODULE_LICENSE("Dual BSD/GPL"); diff --git a/lib/test_kasan.c b/lib/test_kasan.c new file mode 100644 index 000000000000..098c08eddfab --- /dev/null +++ b/lib/test_kasan.c @@ -0,0 +1,277 @@ +/* + * + * Copyright (c) 2014 Samsung Electronics Co., Ltd. + * Author: Andrey Ryabinin <a.ryabinin@samsung.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#define pr_fmt(fmt) "kasan test: %s " fmt, __func__ + +#include <linux/kernel.h> +#include <linux/printk.h> +#include <linux/slab.h> +#include <linux/string.h> +#include <linux/module.h> + +static noinline void __init kmalloc_oob_right(void) +{ + char *ptr; + size_t size = 123; + + pr_info("out-of-bounds to right\n"); + ptr = kmalloc(size, GFP_KERNEL); + if (!ptr) { + pr_err("Allocation failed\n"); + return; + } + + ptr[size] = 'x'; + kfree(ptr); +} + +static noinline void __init kmalloc_oob_left(void) +{ + char *ptr; + size_t size = 15; + + pr_info("out-of-bounds to left\n"); + ptr = kmalloc(size, GFP_KERNEL); + if (!ptr) { + pr_err("Allocation failed\n"); + return; + } + + *ptr = *(ptr - 1); + kfree(ptr); +} + +static noinline void __init kmalloc_node_oob_right(void) +{ + char *ptr; + size_t size = 4096; + + pr_info("kmalloc_node(): out-of-bounds to right\n"); + ptr = kmalloc_node(size, GFP_KERNEL, 0); + if (!ptr) { + pr_err("Allocation failed\n"); + return; + } + + ptr[size] = 0; + kfree(ptr); +} + +static noinline void __init kmalloc_large_oob_rigth(void) +{ + char *ptr; + size_t size = KMALLOC_MAX_CACHE_SIZE + 10; + + pr_info("kmalloc large allocation: out-of-bounds to right\n"); + ptr = kmalloc(size, GFP_KERNEL); + if (!ptr) { + pr_err("Allocation failed\n"); + return; + } + + ptr[size] = 0; + kfree(ptr); +} + +static noinline void __init kmalloc_oob_krealloc_more(void) +{ + char *ptr1, *ptr2; + size_t size1 = 17; + size_t size2 = 19; + + pr_info("out-of-bounds after krealloc more\n"); + ptr1 = kmalloc(size1, GFP_KERNEL); + ptr2 = krealloc(ptr1, size2, GFP_KERNEL); + if (!ptr1 || !ptr2) { + pr_err("Allocation failed\n"); + kfree(ptr1); + return; + } + + ptr2[size2] = 'x'; + kfree(ptr2); +} + +static noinline void __init kmalloc_oob_krealloc_less(void) +{ + char *ptr1, *ptr2; + size_t size1 = 17; + size_t size2 = 15; + + pr_info("out-of-bounds after krealloc less\n"); + ptr1 = kmalloc(size1, GFP_KERNEL); + ptr2 = krealloc(ptr1, size2, GFP_KERNEL); + if (!ptr1 || !ptr2) { + pr_err("Allocation failed\n"); + kfree(ptr1); + return; + } + ptr2[size1] = 'x'; + kfree(ptr2); +} + +static noinline void __init kmalloc_oob_16(void) +{ + struct { + u64 words[2]; + } *ptr1, *ptr2; + + pr_info("kmalloc out-of-bounds for 16-bytes access\n"); + ptr1 = kmalloc(sizeof(*ptr1) - 3, GFP_KERNEL); + ptr2 = kmalloc(sizeof(*ptr2), GFP_KERNEL); + if (!ptr1 || !ptr2) { + pr_err("Allocation failed\n"); + kfree(ptr1); + kfree(ptr2); + return; + } + *ptr1 = *ptr2; + kfree(ptr1); + kfree(ptr2); +} + +static noinline void __init kmalloc_oob_in_memset(void) +{ + char *ptr; + size_t size = 666; + + pr_info("out-of-bounds in memset\n"); + ptr = kmalloc(size, GFP_KERNEL); + if (!ptr) { + pr_err("Allocation failed\n"); + return; + } + + memset(ptr, 0, size+5); + kfree(ptr); +} + +static noinline void __init kmalloc_uaf(void) +{ + char *ptr; + size_t size = 10; + + pr_info("use-after-free\n"); + ptr = kmalloc(size, GFP_KERNEL); + if (!ptr) { + pr_err("Allocation failed\n"); + return; + } + + kfree(ptr); + *(ptr + 8) = 'x'; +} + +static noinline void __init kmalloc_uaf_memset(void) +{ + char *ptr; + size_t size = 33; + + pr_info("use-after-free in memset\n"); + ptr = kmalloc(size, GFP_KERNEL); + if (!ptr) { + pr_err("Allocation failed\n"); + return; + } + + kfree(ptr); + memset(ptr, 0, size); +} + +static noinline void __init kmalloc_uaf2(void) +{ + char *ptr1, *ptr2; + size_t size = 43; + + pr_info("use-after-free after another kmalloc\n"); + ptr1 = kmalloc(size, GFP_KERNEL); + if (!ptr1) { + pr_err("Allocation failed\n"); + return; + } + + kfree(ptr1); + ptr2 = kmalloc(size, GFP_KERNEL); + if (!ptr2) { + pr_err("Allocation failed\n"); + return; + } + + ptr1[40] = 'x'; + kfree(ptr2); +} + +static noinline void __init kmem_cache_oob(void) +{ + char *p; + size_t size = 200; + struct kmem_cache *cache = kmem_cache_create("test_cache", + size, 0, + 0, NULL); + if (!cache) { + pr_err("Cache allocation failed\n"); + return; + } + pr_info("out-of-bounds in kmem_cache_alloc\n"); + p = kmem_cache_alloc(cache, GFP_KERNEL); + if (!p) { + pr_err("Allocation failed\n"); + kmem_cache_destroy(cache); + return; + } + + *p = p[size]; + kmem_cache_free(cache, p); + kmem_cache_destroy(cache); +} + +static char global_array[10]; + +static noinline void __init kasan_global_oob(void) +{ + volatile int i = 3; + char *p = &global_array[ARRAY_SIZE(global_array) + i]; + + pr_info("out-of-bounds global variable\n"); + *(volatile char *)p; +} + +static noinline void __init kasan_stack_oob(void) +{ + char stack_array[10]; + volatile int i = 0; + char *p = &stack_array[ARRAY_SIZE(stack_array) + i]; + + pr_info("out-of-bounds on stack\n"); + *(volatile char *)p; +} + +static int __init kmalloc_tests_init(void) +{ + kmalloc_oob_right(); + kmalloc_oob_left(); + kmalloc_node_oob_right(); + kmalloc_large_oob_rigth(); + kmalloc_oob_krealloc_more(); + kmalloc_oob_krealloc_less(); + kmalloc_oob_16(); + kmalloc_oob_in_memset(); + kmalloc_uaf(); + kmalloc_uaf_memset(); + kmalloc_uaf2(); + kmem_cache_oob(); + kasan_stack_oob(); + kasan_global_oob(); + return -EAGAIN; +} + +module_init(kmalloc_tests_init); +MODULE_LICENSE("GPL"); diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c new file mode 100644 index 000000000000..67c7593d1dd6 --- /dev/null +++ b/lib/test_rhashtable.c @@ -0,0 +1,232 @@ +/* + * Resizable, Scalable, Concurrent Hash Table + * + * Copyright (c) 2014 Thomas Graf <tgraf@suug.ch> + * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net> + * + * Based on the following paper: + * https://www.usenix.org/legacy/event/atc11/tech/final_files/Triplett.pdf + * + * Code partially derived from nft_hash + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +/************************************************************************** + * Self Test + **************************************************************************/ + +#include <linux/init.h> +#include <linux/jhash.h> +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/rcupdate.h> +#include <linux/rhashtable.h> +#include <linux/slab.h> + + +#define TEST_HT_SIZE 8 +#define TEST_ENTRIES 2048 +#define TEST_PTR ((void *) 0xdeadbeef) +#define TEST_NEXPANDS 4 + +struct test_obj { + void *ptr; + int value; + struct rhash_head node; +}; + +static int __init test_rht_lookup(struct rhashtable *ht) +{ + unsigned int i; + + for (i = 0; i < TEST_ENTRIES * 2; i++) { + struct test_obj *obj; + bool expected = !(i % 2); + u32 key = i; + + obj = rhashtable_lookup(ht, &key); + + if (expected && !obj) { + pr_warn("Test failed: Could not find key %u\n", key); + return -ENOENT; + } else if (!expected && obj) { + pr_warn("Test failed: Unexpected entry found for key %u\n", + key); + return -EEXIST; + } else if (expected && obj) { + if (obj->ptr != TEST_PTR || obj->value != i) { + pr_warn("Test failed: Lookup value mismatch %p!=%p, %u!=%u\n", + obj->ptr, TEST_PTR, obj->value, i); + return -EINVAL; + } + } + } + + return 0; +} + +static void test_bucket_stats(struct rhashtable *ht, bool quiet) +{ + unsigned int cnt, rcu_cnt, i, total = 0; + struct rhash_head *pos; + struct test_obj *obj; + struct bucket_table *tbl; + + tbl = rht_dereference_rcu(ht->tbl, ht); + for (i = 0; i < tbl->size; i++) { + rcu_cnt = cnt = 0; + + if (!quiet) + pr_info(" [%#4x/%zu]", i, tbl->size); + + rht_for_each_entry_rcu(obj, pos, tbl, i, node) { + cnt++; + total++; + if (!quiet) + pr_cont(" [%p],", obj); + } + + rht_for_each_entry_rcu(obj, pos, tbl, i, node) + rcu_cnt++; + + if (rcu_cnt != cnt) + pr_warn("Test failed: Chain count mismach %d != %d", + cnt, rcu_cnt); + + if (!quiet) + pr_cont("\n [%#x] first element: %p, chain length: %u\n", + i, tbl->buckets[i], cnt); + } + + pr_info(" Traversal complete: counted=%u, nelems=%u, entries=%d\n", + total, atomic_read(&ht->nelems), TEST_ENTRIES); + + if (total != atomic_read(&ht->nelems) || total != TEST_ENTRIES) + pr_warn("Test failed: Total count mismatch ^^^"); +} + +static int __init test_rhashtable(struct rhashtable *ht) +{ + struct bucket_table *tbl; + struct test_obj *obj; + struct rhash_head *pos, *next; + int err; + unsigned int i; + + /* + * Insertion Test: + * Insert TEST_ENTRIES into table with all keys even numbers + */ + pr_info(" Adding %d keys\n", TEST_ENTRIES); + for (i = 0; i < TEST_ENTRIES; i++) { + struct test_obj *obj; + + obj = kzalloc(sizeof(*obj), GFP_KERNEL); + if (!obj) { + err = -ENOMEM; + goto error; + } + + obj->ptr = TEST_PTR; + obj->value = i * 2; + + rhashtable_insert(ht, &obj->node); + } + + rcu_read_lock(); + test_bucket_stats(ht, true); + test_rht_lookup(ht); + rcu_read_unlock(); + + for (i = 0; i < TEST_NEXPANDS; i++) { + pr_info(" Table expansion iteration %u...\n", i); + mutex_lock(&ht->mutex); + rhashtable_expand(ht); + mutex_unlock(&ht->mutex); + + rcu_read_lock(); + pr_info(" Verifying lookups...\n"); + test_rht_lookup(ht); + rcu_read_unlock(); + } + + for (i = 0; i < TEST_NEXPANDS; i++) { + pr_info(" Table shrinkage iteration %u...\n", i); + mutex_lock(&ht->mutex); + rhashtable_shrink(ht); + mutex_unlock(&ht->mutex); + + rcu_read_lock(); + pr_info(" Verifying lookups...\n"); + test_rht_lookup(ht); + rcu_read_unlock(); + } + + rcu_read_lock(); + test_bucket_stats(ht, true); + rcu_read_unlock(); + + pr_info(" Deleting %d keys\n", TEST_ENTRIES); + for (i = 0; i < TEST_ENTRIES; i++) { + u32 key = i * 2; + + obj = rhashtable_lookup(ht, &key); + BUG_ON(!obj); + + rhashtable_remove(ht, &obj->node); + kfree(obj); + } + + return 0; + +error: + tbl = rht_dereference_rcu(ht->tbl, ht); + for (i = 0; i < tbl->size; i++) + rht_for_each_entry_safe(obj, pos, next, tbl, i, node) + kfree(obj); + + return err; +} + +static struct rhashtable ht; + +static int __init test_rht_init(void) +{ + struct rhashtable_params params = { + .nelem_hint = TEST_HT_SIZE, + .head_offset = offsetof(struct test_obj, node), + .key_offset = offsetof(struct test_obj, value), + .key_len = sizeof(int), + .hashfn = jhash, + .max_shift = 1, /* we expand/shrink manually here */ + .nulls_base = (3U << RHT_BASE_SHIFT), + }; + int err; + + pr_info("Running resizable hashtable tests...\n"); + + err = rhashtable_init(&ht, ¶ms); + if (err < 0) { + pr_warn("Test failed: Unable to initialize hashtable: %d\n", + err); + return err; + } + + err = test_rhashtable(&ht); + + rhashtable_destroy(&ht); + + return err; +} + +static void __exit test_rht_exit(void) +{ +} + +module_init(test_rht_init); +module_exit(test_rht_exit); + +MODULE_LICENSE("GPL v2"); diff --git a/lib/vsprintf.c b/lib/vsprintf.c index ec337f64f52d..b235c96167d3 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -114,8 +114,9 @@ int skip_atoi(const char **s) { int i = 0; - while (isdigit(**s)) + do { i = i*10 + *((*s)++) - '0'; + } while (isdigit(**s)); return i; } @@ -793,6 +794,87 @@ char *hex_string(char *buf, char *end, u8 *addr, struct printf_spec spec, } static noinline_for_stack +char *bitmap_string(char *buf, char *end, unsigned long *bitmap, + struct printf_spec spec, const char *fmt) +{ + const int CHUNKSZ = 32; + int nr_bits = max_t(int, spec.field_width, 0); + int i, chunksz; + bool first = true; + + /* reused to print numbers */ + spec = (struct printf_spec){ .flags = SMALL | ZEROPAD, .base = 16 }; + + chunksz = nr_bits & (CHUNKSZ - 1); + if (chunksz == 0) + chunksz = CHUNKSZ; + + i = ALIGN(nr_bits, CHUNKSZ) - CHUNKSZ; + for (; i >= 0; i -= CHUNKSZ) { + u32 chunkmask, val; + int word, bit; + + chunkmask = ((1ULL << chunksz) - 1); + word = i / BITS_PER_LONG; + bit = i % BITS_PER_LONG; + val = (bitmap[word] >> bit) & chunkmask; + + if (!first) { + if (buf < end) + *buf = ','; + buf++; + } + first = false; + + spec.field_width = DIV_ROUND_UP(chunksz, 4); + buf = number(buf, end, val, spec); + + chunksz = CHUNKSZ; + } + return buf; +} + +static noinline_for_stack +char *bitmap_list_string(char *buf, char *end, unsigned long *bitmap, + struct printf_spec spec, const char *fmt) +{ + int nr_bits = max_t(int, spec.field_width, 0); + /* current bit is 'cur', most recently seen range is [rbot, rtop] */ + int cur, rbot, rtop; + bool first = true; + + /* reused to print numbers */ + spec = (struct printf_spec){ .base = 10 }; + + rbot = cur = find_first_bit(bitmap, nr_bits); + while (cur < nr_bits) { + rtop = cur; + cur = find_next_bit(bitmap, nr_bits, cur + 1); + if (cur < nr_bits && cur <= rtop + 1) + continue; + + if (!first) { + if (buf < end) + *buf = ','; + buf++; + } + first = false; + + buf = number(buf, end, rbot, spec); + if (rbot < rtop) { + if (buf < end) + *buf = '-'; + buf++; + + buf = number(buf, end, rtop, spec); + } + + rbot = cur; + } + return buf; +} + +static noinline_for_stack char *mac_address_string(char *buf, char *end, u8 *addr, struct printf_spec spec, const char *fmt) { @@ -1257,6 +1339,10 @@ int kptr_restrict __read_mostly; * - 'B' For backtraced symbolic direct pointers with offset * - 'R' For decoded struct resource, e.g., [mem 0x0-0x1f 64bit pref] * - 'r' For raw struct resource, e.g., [mem 0x0-0x1f flags 0x201] + * - 'b[l]' For a bitmap, the number of bits is determined by the field + * width which must be explicitly specified either as part of the + * format string '%32b[l]' or through '%*b[l]', [l] selects + * range-list format instead of hex format * - 'M' For a 6-byte MAC address, it prints the address in the * usual colon-separated hex notation * - 'm' For a 6-byte MAC address, it prints the hex address without colons @@ -1353,6 +1439,13 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, return resource_string(buf, end, ptr, spec, fmt); case 'h': return hex_string(buf, end, ptr, spec, fmt); + case 'b': + switch (fmt[1]) { + case 'l': + return bitmap_list_string(buf, end, ptr, spec, fmt); + default: + return bitmap_string(buf, end, ptr, spec, fmt); + } case 'M': /* Colon separated: 00:01:02:03:04:05 */ case 'm': /* Contiguous: 000102030405 */ /* [mM]F (FDDI) */ @@ -1604,8 +1697,7 @@ qualifier: case 'p': spec->type = FORMAT_TYPE_PTR; - return fmt - start; - /* skip alnum */ + return ++fmt - start; case '%': spec->type = FORMAT_TYPE_PERCENT_CHAR; @@ -1689,6 +1781,8 @@ qualifier: * %pB output the name of a backtrace symbol with its offset * %pR output the address range in a struct resource with decoded flags * %pr output the address range in a struct resource with raw flags + * %pb output the bitmap with field width as the number of bits + * %pbl output the bitmap as range list with field width as the number of bits * %pM output a 6-byte MAC address with colons * %pMR output a 6-byte MAC address with colons in reversed order * %pMF output a 6-byte MAC address with dashes @@ -1728,7 +1822,7 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) /* Reject out-of-range values early. Large positive sizes are used for unknown buffer sizes. */ - if (WARN_ON_ONCE((int) size < 0)) + if (WARN_ON_ONCE(size > INT_MAX)) return 0; str = buf; @@ -1794,7 +1888,7 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) break; case FORMAT_TYPE_PTR: - str = pointer(fmt+1, str, end, va_arg(args, void *), + str = pointer(fmt, str, end, va_arg(args, void *), spec); while (isalnum(*fmt)) fmt++; @@ -2232,7 +2326,7 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) } case FORMAT_TYPE_PTR: - str = pointer(fmt+1, str, end, get_arg(void *), spec); + str = pointer(fmt, str, end, get_arg(void *), spec); while (isalnum(*fmt)) fmt++; break; |