diff options
Diffstat (limited to 'lib')
39 files changed, 2596 insertions, 1459 deletions
diff --git a/lib/Kconfig b/lib/Kconfig index 87da53bb1fef..601965a948e8 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -18,9 +18,8 @@ config HAVE_ARCH_BITREVERSE default n depends on BITREVERSE help - This option provides an config for the architecture which have instruction - can do bitreverse operation, we use the hardware instruction if the architecture - have this capability. + This option enables the use of hardware bit-reversal instructions on + architectures which support such operations. config RATIONAL bool @@ -397,10 +396,6 @@ config CPUMASK_OFFSTACK them on the stack. This is a bit more expensive, but avoids stack overflow. -config DISABLE_OBSOLETE_CPUMASK_FUNCTIONS - bool "Disable obsolete cpumask functions" if DEBUG_PER_CPU_MAPS - depends on BROKEN - config CPU_RMAP bool depends on SMP diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index c5cefb3c009c..17670573dda8 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -865,6 +865,19 @@ config SCHED_STACK_END_CHECK data corruption or a sporadic crash at a later stage once the region is examined. The runtime overhead introduced is minimal. +config DEBUG_TIMEKEEPING + bool "Enable extra timekeeping sanity checking" + help + This option will enable additional timekeeping sanity checks + which may be helpful when diagnosing issues where timekeeping + problems are suspected. + + This may include checks in the timekeeping hotpaths, so this + option may have a (very small) performance impact to some + workloads. + + If unsure, say N. + config TIMER_STATS bool "Collect kernel timers statistics" depends on DEBUG_KERNEL && PROC_FS @@ -1180,16 +1193,7 @@ config DEBUG_CREDENTIALS menu "RCU Debugging" config PROVE_RCU - bool "RCU debugging: prove RCU correctness" - depends on PROVE_LOCKING - default n - help - This feature enables lockdep extensions that check for correct - use of RCU APIs. This is currently under development. Say Y - if you want to debug RCU usage or help work on the PROVE_RCU - feature. - - Say N if you are unsure. + def_bool PROVE_LOCKING config PROVE_RCU_REPEATEDLY bool "RCU debugging: don't disable PROVE_RCU on first splat" @@ -1257,6 +1261,30 @@ config RCU_TORTURE_TEST_RUNNABLE Say N here if you want the RCU torture tests to start only after being manually enabled via /proc. +config RCU_TORTURE_TEST_SLOW_INIT + bool "Slow down RCU grace-period initialization to expose races" + depends on RCU_TORTURE_TEST + help + This option makes grace-period initialization block for a + few jiffies between initializing each pair of consecutive + rcu_node structures. This helps to expose races involving + grace-period initialization, in other words, it makes your + kernel less stable. It can also greatly increase grace-period + latency, especially on systems with large numbers of CPUs. + This is useful when torture-testing RCU, but in almost no + other circumstance. + + Say Y here if you want your system to crash and hang more often. + Say N if you want a sane system. + +config RCU_TORTURE_TEST_SLOW_INIT_DELAY + int "How much to slow down RCU grace-period initialization" + range 0 5 + default 3 + help + This option specifies the number of jiffies to wait between + each rcu_node structure initialization. + config RCU_CPU_STALL_TIMEOUT int "RCU CPU stall timeout in seconds" depends on RCU_STALL_COMMON @@ -1732,6 +1760,18 @@ config TEST_UDELAY If unsure, say N. +config MEMTEST + bool "Memtest" + depends on HAVE_MEMBLOCK + ---help--- + This option adds a kernel parameter 'memtest', which allows memtest + to be set. + memtest=0, mean disabled; -- default + memtest=1, mean do 1 test pattern; + ... + memtest=17, mean do 17 test patterns. + If you are unsure how to answer this question, answer N. + source "samples/Kconfig" source "lib/Kconfig.kgdb" diff --git a/lib/Makefile b/lib/Makefile index 87eb3bffc283..6c37933336a0 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -24,8 +24,8 @@ obj-y += lockref.o obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ bust_spinlocks.o kasprintf.o bitmap.o scatterlist.o \ - gcd.o lcm.o list_sort.o uuid.o flex_array.o clz_ctz.o \ - bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \ + gcd.o lcm.o list_sort.o uuid.o flex_array.o iov_iter.o clz_ctz.o \ + bsearch.o find_bit.o llist.o memweight.o kfifo.o \ percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o obj-y += string_helpers.o obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o @@ -106,7 +106,7 @@ obj-$(CONFIG_AUDIT_GENERIC) += audit.o obj-$(CONFIG_AUDIT_COMPAT_GENERIC) += compat_audit.o obj-$(CONFIG_SWIOTLB) += swiotlb.o -obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o +obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o iommu-common.o obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o obj-$(CONFIG_CPU_NOTIFIER_ERROR_INJECT) += cpu-notifier-error-inject.o diff --git a/lib/bitmap.c b/lib/bitmap.c index d456f4c15a9f..64c0926f5dd8 100644 --- a/lib/bitmap.c +++ b/lib/bitmap.c @@ -42,36 +42,6 @@ * for the best explanations of this ordering. */ -int __bitmap_empty(const unsigned long *bitmap, unsigned int bits) -{ - unsigned int k, lim = bits/BITS_PER_LONG; - for (k = 0; k < lim; ++k) - if (bitmap[k]) - return 0; - - if (bits % BITS_PER_LONG) - if (bitmap[k] & BITMAP_LAST_WORD_MASK(bits)) - return 0; - - return 1; -} -EXPORT_SYMBOL(__bitmap_empty); - -int __bitmap_full(const unsigned long *bitmap, unsigned int bits) -{ - unsigned int k, lim = bits/BITS_PER_LONG; - for (k = 0; k < lim; ++k) - if (~bitmap[k]) - return 0; - - if (bits % BITS_PER_LONG) - if (~bitmap[k] & BITMAP_LAST_WORD_MASK(bits)) - return 0; - - return 1; -} -EXPORT_SYMBOL(__bitmap_full); - int __bitmap_equal(const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int bits) { diff --git a/lib/cpumask.c b/lib/cpumask.c index b6513a9f2892..830dd5dec40f 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -5,27 +5,6 @@ #include <linux/export.h> #include <linux/bootmem.h> -int __first_cpu(const cpumask_t *srcp) -{ - return min_t(int, NR_CPUS, find_first_bit(srcp->bits, NR_CPUS)); -} -EXPORT_SYMBOL(__first_cpu); - -int __next_cpu(int n, const cpumask_t *srcp) -{ - return min_t(int, NR_CPUS, find_next_bit(srcp->bits, NR_CPUS, n+1)); -} -EXPORT_SYMBOL(__next_cpu); - -#if NR_CPUS > 64 -int __next_cpu_nr(int n, const cpumask_t *srcp) -{ - return min_t(int, nr_cpu_ids, - find_next_bit(srcp->bits, nr_cpu_ids, n+1)); -} -EXPORT_SYMBOL(__next_cpu_nr); -#endif - /** * cpumask_next_and - get the next cpu in *src1p & *src2p * @n: the cpu prior to the place to search (ie. return will be > @n) @@ -37,10 +16,11 @@ EXPORT_SYMBOL(__next_cpu_nr); int cpumask_next_and(int n, const struct cpumask *src1p, const struct cpumask *src2p) { - while ((n = cpumask_next(n, src1p)) < nr_cpu_ids) - if (cpumask_test_cpu(n, src2p)) - break; - return n; + struct cpumask tmp; + + if (cpumask_and(&tmp, src1p, src2p)) + return cpumask_next(n, &tmp); + return nr_cpu_ids; } EXPORT_SYMBOL(cpumask_next_and); @@ -89,13 +69,6 @@ bool alloc_cpumask_var_node(cpumask_var_t *mask, gfp_t flags, int node) dump_stack(); } #endif - /* FIXME: Bandaid to save us from old primitives which go to NR_CPUS. */ - if (*mask) { - unsigned char *ptr = (unsigned char *)cpumask_bits(*mask); - unsigned int tail; - tail = BITS_TO_LONGS(NR_CPUS - nr_cpumask_bits) * sizeof(long); - memset(ptr + cpumask_size() - tail, 0, tail); - } return *mask != NULL; } diff --git a/lib/devres.c b/lib/devres.c index 0f1dd2e9d2c1..fbe2aac522e6 100644 --- a/lib/devres.c +++ b/lib/devres.c @@ -72,6 +72,34 @@ void __iomem *devm_ioremap_nocache(struct device *dev, resource_size_t offset, EXPORT_SYMBOL(devm_ioremap_nocache); /** + * devm_ioremap_wc - Managed ioremap_wc() + * @dev: Generic device to remap IO address for + * @offset: BUS offset to map + * @size: Size of map + * + * Managed ioremap_wc(). Map is automatically unmapped on driver detach. + */ +void __iomem *devm_ioremap_wc(struct device *dev, resource_size_t offset, + resource_size_t size) +{ + void __iomem **ptr, *addr; + + ptr = devres_alloc(devm_ioremap_release, sizeof(*ptr), GFP_KERNEL); + if (!ptr) + return NULL; + + addr = ioremap_wc(offset, size); + if (addr) { + *ptr = addr; + devres_add(dev, ptr); + } else + devres_free(ptr); + + return addr; +} +EXPORT_SYMBOL(devm_ioremap_wc); + +/** * devm_iounmap - Managed iounmap() * @dev: Generic device to unmap for * @addr: Address to unmap diff --git a/lib/div64.c b/lib/div64.c index 4382ad77777e..19ea7ed4b948 100644 --- a/lib/div64.c +++ b/lib/div64.c @@ -127,7 +127,7 @@ EXPORT_SYMBOL(div64_u64_rem); * by the book 'Hacker's Delight'. The original source and full proof * can be found here and is available for use without restriction. * - * 'http://www.hackersdelight.org/HDcode/newCode/divDouble.c.txt' + * 'http://www.hackersdelight.org/hdcodetxt/divDouble.c.txt' */ #ifndef div64_u64 u64 div64_u64(u64 dividend, u64 divisor) diff --git a/lib/dma-debug.c b/lib/dma-debug.c index 9722bd2dbc9b..ae4b65e17e64 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -361,7 +361,7 @@ static struct dma_debug_entry *bucket_find_contain(struct hash_bucket **bucket, unsigned int range = 0; while (range <= max_range) { - entry = __hash_bucket_find(*bucket, &index, containing_match); + entry = __hash_bucket_find(*bucket, ref, containing_match); if (entry) return entry; diff --git a/lib/find_bit.c b/lib/find_bit.c new file mode 100644 index 000000000000..18072ea9c20e --- /dev/null +++ b/lib/find_bit.c @@ -0,0 +1,193 @@ +/* bit search implementation + * + * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * Copyright (C) 2008 IBM Corporation + * 'find_last_bit' is written by Rusty Russell <rusty@rustcorp.com.au> + * (Inspired by David Howell's find_next_bit implementation) + * + * Rewritten by Yury Norov <yury.norov@gmail.com> to decrease + * size and improve performance, 2015. + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * as published by the Free Software Foundation; either version + * 2 of the License, or (at your option) any later version. + */ + +#include <linux/bitops.h> +#include <linux/bitmap.h> +#include <linux/export.h> +#include <linux/kernel.h> + +#if !defined(find_next_bit) || !defined(find_next_zero_bit) + +/* + * This is a common helper function for find_next_bit and + * find_next_zero_bit. The difference is the "invert" argument, which + * is XORed with each fetched word before searching it for one bits. + */ +static unsigned long _find_next_bit(const unsigned long *addr, + unsigned long nbits, unsigned long start, unsigned long invert) +{ + unsigned long tmp; + + if (!nbits || start >= nbits) + return nbits; + + tmp = addr[start / BITS_PER_LONG] ^ invert; + + /* Handle 1st word. */ + tmp &= BITMAP_FIRST_WORD_MASK(start); + start = round_down(start, BITS_PER_LONG); + + while (!tmp) { + start += BITS_PER_LONG; + if (start >= nbits) + return nbits; + + tmp = addr[start / BITS_PER_LONG] ^ invert; + } + + return min(start + __ffs(tmp), nbits); +} +#endif + +#ifndef find_next_bit +/* + * Find the next set bit in a memory region. + */ +unsigned long find_next_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) +{ + return _find_next_bit(addr, size, offset, 0UL); +} +EXPORT_SYMBOL(find_next_bit); +#endif + +#ifndef find_next_zero_bit +unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, + unsigned long offset) +{ + return _find_next_bit(addr, size, offset, ~0UL); +} +EXPORT_SYMBOL(find_next_zero_bit); +#endif + +#ifndef find_first_bit +/* + * Find the first set bit in a memory region. + */ +unsigned long find_first_bit(const unsigned long *addr, unsigned long size) +{ + unsigned long idx; + + for (idx = 0; idx * BITS_PER_LONG < size; idx++) { + if (addr[idx]) + return min(idx * BITS_PER_LONG + __ffs(addr[idx]), size); + } + + return size; +} +EXPORT_SYMBOL(find_first_bit); +#endif + +#ifndef find_first_zero_bit +/* + * Find the first cleared bit in a memory region. + */ +unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) +{ + unsigned long idx; + + for (idx = 0; idx * BITS_PER_LONG < size; idx++) { + if (addr[idx] != ~0UL) + return min(idx * BITS_PER_LONG + ffz(addr[idx]), size); + } + + return size; +} +EXPORT_SYMBOL(find_first_zero_bit); +#endif + +#ifndef find_last_bit +unsigned long find_last_bit(const unsigned long *addr, unsigned long size) +{ + if (size) { + unsigned long val = BITMAP_LAST_WORD_MASK(size); + unsigned long idx = (size-1) / BITS_PER_LONG; + + do { + val &= addr[idx]; + if (val) + return idx * BITS_PER_LONG + __fls(val); + + val = ~0ul; + } while (idx--); + } + return size; +} +EXPORT_SYMBOL(find_last_bit); +#endif + +#ifdef __BIG_ENDIAN + +/* include/linux/byteorder does not support "unsigned long" type */ +static inline unsigned long ext2_swab(const unsigned long y) +{ +#if BITS_PER_LONG == 64 + return (unsigned long) __swab64((u64) y); +#elif BITS_PER_LONG == 32 + return (unsigned long) __swab32((u32) y); +#else +#error BITS_PER_LONG not defined +#endif +} + +#if !defined(find_next_bit_le) || !defined(find_next_zero_bit_le) +static unsigned long _find_next_bit_le(const unsigned long *addr, + unsigned long nbits, unsigned long start, unsigned long invert) +{ + unsigned long tmp; + + if (!nbits || start >= nbits) + return nbits; + + tmp = addr[start / BITS_PER_LONG] ^ invert; + + /* Handle 1st word. */ + tmp &= ext2_swab(BITMAP_FIRST_WORD_MASK(start)); + start = round_down(start, BITS_PER_LONG); + + while (!tmp) { + start += BITS_PER_LONG; + if (start >= nbits) + return nbits; + + tmp = addr[start / BITS_PER_LONG] ^ invert; + } + + return min(start + __ffs(ext2_swab(tmp)), nbits); +} +#endif + +#ifndef find_next_zero_bit_le +unsigned long find_next_zero_bit_le(const void *addr, unsigned + long size, unsigned long offset) +{ + return _find_next_bit_le(addr, size, offset, ~0UL); +} +EXPORT_SYMBOL(find_next_zero_bit_le); +#endif + +#ifndef find_next_bit_le +unsigned long find_next_bit_le(const void *addr, unsigned + long size, unsigned long offset) +{ + return _find_next_bit_le(addr, size, offset, 0UL); +} +EXPORT_SYMBOL(find_next_bit_le); +#endif + +#endif /* __BIG_ENDIAN */ diff --git a/lib/find_last_bit.c b/lib/find_last_bit.c index 91ca09fbf6f9..3e3be40c6a6e 100644 --- a/lib/find_last_bit.c +++ b/lib/find_last_bit.c @@ -4,6 +4,9 @@ * Written by Rusty Russell <rusty@rustcorp.com.au> * (Inspired by David Howell's find_next_bit implementation) * + * Rewritten by Yury Norov <yury.norov@gmail.com> to decrease + * size and improve performance, 2015. + * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License * as published by the Free Software Foundation; either version @@ -11,37 +14,26 @@ */ #include <linux/bitops.h> +#include <linux/bitmap.h> #include <linux/export.h> -#include <asm/types.h> -#include <asm/byteorder.h> +#include <linux/kernel.h> #ifndef find_last_bit unsigned long find_last_bit(const unsigned long *addr, unsigned long size) { - unsigned long words; - unsigned long tmp; - - /* Start at final word. */ - words = size / BITS_PER_LONG; + if (size) { + unsigned long val = BITMAP_LAST_WORD_MASK(size); + unsigned long idx = (size-1) / BITS_PER_LONG; - /* Partial final word? */ - if (size & (BITS_PER_LONG-1)) { - tmp = (addr[words] & (~0UL >> (BITS_PER_LONG - - (size & (BITS_PER_LONG-1))))); - if (tmp) - goto found; - } + do { + val &= addr[idx]; + if (val) + return idx * BITS_PER_LONG + __fls(val); - while (words) { - tmp = addr[--words]; - if (tmp) { -found: - return words * BITS_PER_LONG + __fls(tmp); - } + val = ~0ul; + } while (idx--); } - - /* Not found */ return size; } EXPORT_SYMBOL(find_last_bit); diff --git a/lib/find_next_bit.c b/lib/find_next_bit.c deleted file mode 100644 index 0cbfc0b4398f..000000000000 --- a/lib/find_next_bit.c +++ /dev/null @@ -1,285 +0,0 @@ -/* find_next_bit.c: fallback find next bit implementation - * - * Copyright (C) 2004 Red Hat, Inc. All Rights Reserved. - * Written by David Howells (dhowells@redhat.com) - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - */ - -#include <linux/bitops.h> -#include <linux/export.h> -#include <asm/types.h> -#include <asm/byteorder.h> - -#define BITOP_WORD(nr) ((nr) / BITS_PER_LONG) - -#ifndef find_next_bit -/* - * Find the next set bit in a memory region. - */ -unsigned long find_next_bit(const unsigned long *addr, unsigned long size, - unsigned long offset) -{ - const unsigned long *p = addr + BITOP_WORD(offset); - unsigned long result = offset & ~(BITS_PER_LONG-1); - unsigned long tmp; - - if (offset >= size) - return size; - size -= result; - offset %= BITS_PER_LONG; - if (offset) { - tmp = *(p++); - tmp &= (~0UL << offset); - if (size < BITS_PER_LONG) - goto found_first; - if (tmp) - goto found_middle; - size -= BITS_PER_LONG; - result += BITS_PER_LONG; - } - while (size & ~(BITS_PER_LONG-1)) { - if ((tmp = *(p++))) - goto found_middle; - result += BITS_PER_LONG; - size -= BITS_PER_LONG; - } - if (!size) - return result; - tmp = *p; - -found_first: - tmp &= (~0UL >> (BITS_PER_LONG - size)); - if (tmp == 0UL) /* Are any bits set? */ - return result + size; /* Nope. */ -found_middle: - return result + __ffs(tmp); -} -EXPORT_SYMBOL(find_next_bit); -#endif - -#ifndef find_next_zero_bit -/* - * This implementation of find_{first,next}_zero_bit was stolen from - * Linus' asm-alpha/bitops.h. - */ -unsigned long find_next_zero_bit(const unsigned long *addr, unsigned long size, - unsigned long offset) -{ - const unsigned long *p = addr + BITOP_WORD(offset); - unsigned long result = offset & ~(BITS_PER_LONG-1); - unsigned long tmp; - - if (offset >= size) - return size; - size -= result; - offset %= BITS_PER_LONG; - if (offset) { - tmp = *(p++); - tmp |= ~0UL >> (BITS_PER_LONG - offset); - if (size < BITS_PER_LONG) - goto found_first; - if (~tmp) - goto found_middle; - size -= BITS_PER_LONG; - result += BITS_PER_LONG; - } - while (size & ~(BITS_PER_LONG-1)) { - if (~(tmp = *(p++))) - goto found_middle; - result += BITS_PER_LONG; - size -= BITS_PER_LONG; - } - if (!size) - return result; - tmp = *p; - -found_first: - tmp |= ~0UL << size; - if (tmp == ~0UL) /* Are any bits zero? */ - return result + size; /* Nope. */ -found_middle: - return result + ffz(tmp); -} -EXPORT_SYMBOL(find_next_zero_bit); -#endif - -#ifndef find_first_bit -/* - * Find the first set bit in a memory region. - */ -unsigned long find_first_bit(const unsigned long *addr, unsigned long size) -{ - const unsigned long *p = addr; - unsigned long result = 0; - unsigned long tmp; - - while (size & ~(BITS_PER_LONG-1)) { - if ((tmp = *(p++))) - goto found; - result += BITS_PER_LONG; - size -= BITS_PER_LONG; - } - if (!size) - return result; - - tmp = (*p) & (~0UL >> (BITS_PER_LONG - size)); - if (tmp == 0UL) /* Are any bits set? */ - return result + size; /* Nope. */ -found: - return result + __ffs(tmp); -} -EXPORT_SYMBOL(find_first_bit); -#endif - -#ifndef find_first_zero_bit -/* - * Find the first cleared bit in a memory region. - */ -unsigned long find_first_zero_bit(const unsigned long *addr, unsigned long size) -{ - const unsigned long *p = addr; - unsigned long result = 0; - unsigned long tmp; - - while (size & ~(BITS_PER_LONG-1)) { - if (~(tmp = *(p++))) - goto found; - result += BITS_PER_LONG; - size -= BITS_PER_LONG; - } - if (!size) - return result; - - tmp = (*p) | (~0UL << size); - if (tmp == ~0UL) /* Are any bits zero? */ - return result + size; /* Nope. */ -found: - return result + ffz(tmp); -} -EXPORT_SYMBOL(find_first_zero_bit); -#endif - -#ifdef __BIG_ENDIAN - -/* include/linux/byteorder does not support "unsigned long" type */ -static inline unsigned long ext2_swabp(const unsigned long * x) -{ -#if BITS_PER_LONG == 64 - return (unsigned long) __swab64p((u64 *) x); -#elif BITS_PER_LONG == 32 - return (unsigned long) __swab32p((u32 *) x); -#else -#error BITS_PER_LONG not defined -#endif -} - -/* include/linux/byteorder doesn't support "unsigned long" type */ -static inline unsigned long ext2_swab(const unsigned long y) -{ -#if BITS_PER_LONG == 64 - return (unsigned long) __swab64((u64) y); -#elif BITS_PER_LONG == 32 - return (unsigned long) __swab32((u32) y); -#else -#error BITS_PER_LONG not defined -#endif -} - -#ifndef find_next_zero_bit_le -unsigned long find_next_zero_bit_le(const void *addr, unsigned - long size, unsigned long offset) -{ - const unsigned long *p = addr; - unsigned long result = offset & ~(BITS_PER_LONG - 1); - unsigned long tmp; - - if (offset >= size) - return size; - p += BITOP_WORD(offset); - size -= result; - offset &= (BITS_PER_LONG - 1UL); - if (offset) { - tmp = ext2_swabp(p++); - tmp |= (~0UL >> (BITS_PER_LONG - offset)); - if (size < BITS_PER_LONG) - goto found_first; - if (~tmp) - goto found_middle; - size -= BITS_PER_LONG; - result += BITS_PER_LONG; - } - - while (size & ~(BITS_PER_LONG - 1)) { - if (~(tmp = *(p++))) - goto found_middle_swap; - result += BITS_PER_LONG; - size -= BITS_PER_LONG; - } - if (!size) - return result; - tmp = ext2_swabp(p); -found_first: - tmp |= ~0UL << size; - if (tmp == ~0UL) /* Are any bits zero? */ - return result + size; /* Nope. Skip ffz */ -found_middle: - return result + ffz(tmp); - -found_middle_swap: - return result + ffz(ext2_swab(tmp)); -} -EXPORT_SYMBOL(find_next_zero_bit_le); -#endif - -#ifndef find_next_bit_le -unsigned long find_next_bit_le(const void *addr, unsigned - long size, unsigned long offset) -{ - const unsigned long *p = addr; - unsigned long result = offset & ~(BITS_PER_LONG - 1); - unsigned long tmp; - - if (offset >= size) - return size; - p += BITOP_WORD(offset); - size -= result; - offset &= (BITS_PER_LONG - 1UL); - if (offset) { - tmp = ext2_swabp(p++); - tmp &= (~0UL << offset); - if (size < BITS_PER_LONG) - goto found_first; - if (tmp) - goto found_middle; - size -= BITS_PER_LONG; - result += BITS_PER_LONG; - } - - while (size & ~(BITS_PER_LONG - 1)) { - tmp = *(p++); - if (tmp) - goto found_middle_swap; - result += BITS_PER_LONG; - size -= BITS_PER_LONG; - } - if (!size) - return result; - tmp = ext2_swabp(p); -found_first: - tmp &= (~0UL >> (BITS_PER_LONG - size)); - if (tmp == 0UL) /* Are any bits set? */ - return result + size; /* Nope. */ -found_middle: - return result + __ffs(tmp); - -found_middle_swap: - return result + __ffs(ext2_swab(tmp)); -} -EXPORT_SYMBOL(find_next_bit_le); -#endif - -#endif /* __BIG_ENDIAN */ diff --git a/lib/iommu-common.c b/lib/iommu-common.c new file mode 100644 index 000000000000..df30632f0bef --- /dev/null +++ b/lib/iommu-common.c @@ -0,0 +1,270 @@ +/* + * IOMMU mmap management and range allocation functions. + * Based almost entirely upon the powerpc iommu allocator. + */ + +#include <linux/export.h> +#include <linux/bitmap.h> +#include <linux/bug.h> +#include <linux/iommu-helper.h> +#include <linux/iommu-common.h> +#include <linux/dma-mapping.h> +#include <linux/hash.h> + +#ifndef DMA_ERROR_CODE +#define DMA_ERROR_CODE (~(dma_addr_t)0x0) +#endif + +static unsigned long iommu_large_alloc = 15; + +static DEFINE_PER_CPU(unsigned int, iommu_hash_common); + +static inline bool need_flush(struct iommu_map_table *iommu) +{ + return (iommu->lazy_flush != NULL && + (iommu->flags & IOMMU_NEED_FLUSH) != 0); +} + +static inline void set_flush(struct iommu_map_table *iommu) +{ + iommu->flags |= IOMMU_NEED_FLUSH; +} + +static inline void clear_flush(struct iommu_map_table *iommu) +{ + iommu->flags &= ~IOMMU_NEED_FLUSH; +} + +static void setup_iommu_pool_hash(void) +{ + unsigned int i; + static bool do_once; + + if (do_once) + return; + do_once = true; + for_each_possible_cpu(i) + per_cpu(iommu_hash_common, i) = hash_32(i, IOMMU_POOL_HASHBITS); +} + +/* + * Initialize iommu_pool entries for the iommu_map_table. `num_entries' + * is the number of table entries. If `large_pool' is set to true, + * the top 1/4 of the table will be set aside for pool allocations + * of more than iommu_large_alloc pages. + */ +void iommu_tbl_pool_init(struct iommu_map_table *iommu, + unsigned long num_entries, + u32 table_shift, + void (*lazy_flush)(struct iommu_map_table *), + bool large_pool, u32 npools, + bool skip_span_boundary_check) +{ + unsigned int start, i; + struct iommu_pool *p = &(iommu->large_pool); + + setup_iommu_pool_hash(); + if (npools == 0) + iommu->nr_pools = IOMMU_NR_POOLS; + else + iommu->nr_pools = npools; + BUG_ON(npools > IOMMU_NR_POOLS); + + iommu->table_shift = table_shift; + iommu->lazy_flush = lazy_flush; + start = 0; + if (skip_span_boundary_check) + iommu->flags |= IOMMU_NO_SPAN_BOUND; + if (large_pool) + iommu->flags |= IOMMU_HAS_LARGE_POOL; + + if (!large_pool) + iommu->poolsize = num_entries/iommu->nr_pools; + else + iommu->poolsize = (num_entries * 3 / 4)/iommu->nr_pools; + for (i = 0; i < iommu->nr_pools; i++) { + spin_lock_init(&(iommu->pools[i].lock)); + iommu->pools[i].start = start; + iommu->pools[i].hint = start; + start += iommu->poolsize; /* start for next pool */ + iommu->pools[i].end = start - 1; + } + if (!large_pool) + return; + /* initialize large_pool */ + spin_lock_init(&(p->lock)); + p->start = start; + p->hint = p->start; + p->end = num_entries; +} +EXPORT_SYMBOL(iommu_tbl_pool_init); + +unsigned long iommu_tbl_range_alloc(struct device *dev, + struct iommu_map_table *iommu, + unsigned long npages, + unsigned long *handle, + unsigned long mask, + unsigned int align_order) +{ + unsigned int pool_hash = __this_cpu_read(iommu_hash_common); + unsigned long n, end, start, limit, boundary_size; + struct iommu_pool *pool; + int pass = 0; + unsigned int pool_nr; + unsigned int npools = iommu->nr_pools; + unsigned long flags; + bool large_pool = ((iommu->flags & IOMMU_HAS_LARGE_POOL) != 0); + bool largealloc = (large_pool && npages > iommu_large_alloc); + unsigned long shift; + unsigned long align_mask = 0; + + if (align_order > 0) + align_mask = 0xffffffffffffffffl >> (64 - align_order); + + /* Sanity check */ + if (unlikely(npages == 0)) { + WARN_ON_ONCE(1); + return DMA_ERROR_CODE; + } + + if (largealloc) { + pool = &(iommu->large_pool); + pool_nr = 0; /* to keep compiler happy */ + } else { + /* pick out pool_nr */ + pool_nr = pool_hash & (npools - 1); + pool = &(iommu->pools[pool_nr]); + } + spin_lock_irqsave(&pool->lock, flags); + + again: + if (pass == 0 && handle && *handle && + (*handle >= pool->start) && (*handle < pool->end)) + start = *handle; + else + start = pool->hint; + + limit = pool->end; + + /* The case below can happen if we have a small segment appended + * to a large, or when the previous alloc was at the very end of + * the available space. If so, go back to the beginning. If a + * flush is needed, it will get done based on the return value + * from iommu_area_alloc() below. + */ + if (start >= limit) + start = pool->start; + shift = iommu->table_map_base >> iommu->table_shift; + if (limit + shift > mask) { + limit = mask - shift + 1; + /* If we're constrained on address range, first try + * at the masked hint to avoid O(n) search complexity, + * but on second pass, start at 0 in pool 0. + */ + if ((start & mask) >= limit || pass > 0) { + spin_unlock(&(pool->lock)); + pool = &(iommu->pools[0]); + spin_lock(&(pool->lock)); + start = pool->start; + } else { + start &= mask; + } + } + + if (dev) + boundary_size = ALIGN(dma_get_seg_boundary(dev) + 1, + 1 << iommu->table_shift); + else + boundary_size = ALIGN(1ULL << 32, 1 << iommu->table_shift); + + boundary_size = boundary_size >> iommu->table_shift; + /* + * if the skip_span_boundary_check had been set during init, we set + * things up so that iommu_is_span_boundary() merely checks if the + * (index + npages) < num_tsb_entries + */ + if ((iommu->flags & IOMMU_NO_SPAN_BOUND) != 0) { + shift = 0; + boundary_size = iommu->poolsize * iommu->nr_pools; + } + n = iommu_area_alloc(iommu->map, limit, start, npages, shift, + boundary_size, align_mask); + if (n == -1) { + if (likely(pass == 0)) { + /* First failure, rescan from the beginning. */ + pool->hint = pool->start; + set_flush(iommu); + pass++; + goto again; + } else if (!largealloc && pass <= iommu->nr_pools) { + spin_unlock(&(pool->lock)); + pool_nr = (pool_nr + 1) & (iommu->nr_pools - 1); + pool = &(iommu->pools[pool_nr]); + spin_lock(&(pool->lock)); + pool->hint = pool->start; + set_flush(iommu); + pass++; + goto again; + } else { + /* give up */ + n = DMA_ERROR_CODE; + goto bail; + } + } + if (n < pool->hint || need_flush(iommu)) { + clear_flush(iommu); + iommu->lazy_flush(iommu); + } + + end = n + npages; + pool->hint = end; + + /* Update handle for SG allocations */ + if (handle) + *handle = end; +bail: + spin_unlock_irqrestore(&(pool->lock), flags); + + return n; +} +EXPORT_SYMBOL(iommu_tbl_range_alloc); + +static struct iommu_pool *get_pool(struct iommu_map_table *tbl, + unsigned long entry) +{ + struct iommu_pool *p; + unsigned long largepool_start = tbl->large_pool.start; + bool large_pool = ((tbl->flags & IOMMU_HAS_LARGE_POOL) != 0); + + /* The large pool is the last pool at the top of the table */ + if (large_pool && entry >= largepool_start) { + p = &tbl->large_pool; + } else { + unsigned int pool_nr = entry / tbl->poolsize; + + BUG_ON(pool_nr >= tbl->nr_pools); + p = &tbl->pools[pool_nr]; + } + return p; +} + +/* Caller supplies the index of the entry into the iommu map table + * itself when the mapping from dma_addr to the entry is not the + * default addr->entry mapping below. + */ +void iommu_tbl_range_free(struct iommu_map_table *iommu, u64 dma_addr, + unsigned long npages, unsigned long entry) +{ + struct iommu_pool *pool; + unsigned long flags; + unsigned long shift = iommu->table_shift; + + if (entry == DMA_ERROR_CODE) /* use default addr->entry mapping */ + entry = (dma_addr - iommu->table_map_base) >> shift; + pool = get_pool(iommu, entry); + + spin_lock_irqsave(&(pool->lock), flags); + bitmap_clear(iommu->map, entry, npages); + spin_unlock_irqrestore(&(pool->lock), flags); +} +EXPORT_SYMBOL(iommu_tbl_range_free); diff --git a/lib/ioremap.c b/lib/ioremap.c index 0c9216c48762..86c8911b0e3a 100644 --- a/lib/ioremap.c +++ b/lib/ioremap.c @@ -13,6 +13,43 @@ #include <asm/cacheflush.h> #include <asm/pgtable.h> +#ifdef CONFIG_HAVE_ARCH_HUGE_VMAP +static int __read_mostly ioremap_pud_capable; +static int __read_mostly ioremap_pmd_capable; +static int __read_mostly ioremap_huge_disabled; + +static int __init set_nohugeiomap(char *str) +{ + ioremap_huge_disabled = 1; + return 0; +} +early_param("nohugeiomap", set_nohugeiomap); + +void __init ioremap_huge_init(void) +{ + if (!ioremap_huge_disabled) { + if (arch_ioremap_pud_supported()) + ioremap_pud_capable = 1; + if (arch_ioremap_pmd_supported()) + ioremap_pmd_capable = 1; + } +} + +static inline int ioremap_pud_enabled(void) +{ + return ioremap_pud_capable; +} + +static inline int ioremap_pmd_enabled(void) +{ + return ioremap_pmd_capable; +} + +#else /* !CONFIG_HAVE_ARCH_HUGE_VMAP */ +static inline int ioremap_pud_enabled(void) { return 0; } +static inline int ioremap_pmd_enabled(void) { return 0; } +#endif /* CONFIG_HAVE_ARCH_HUGE_VMAP */ + static int ioremap_pte_range(pmd_t *pmd, unsigned long addr, unsigned long end, phys_addr_t phys_addr, pgprot_t prot) { @@ -43,6 +80,14 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr, return -ENOMEM; do { next = pmd_addr_end(addr, end); + + if (ioremap_pmd_enabled() && + ((next - addr) == PMD_SIZE) && + IS_ALIGNED(phys_addr + addr, PMD_SIZE)) { + if (pmd_set_huge(pmd, phys_addr + addr, prot)) + continue; + } + if (ioremap_pte_range(pmd, addr, next, phys_addr + addr, prot)) return -ENOMEM; } while (pmd++, addr = next, addr != end); @@ -61,6 +106,14 @@ static inline int ioremap_pud_range(pgd_t *pgd, unsigned long addr, return -ENOMEM; do { next = pud_addr_end(addr, end); + + if (ioremap_pud_enabled() && + ((next - addr) == PUD_SIZE) && + IS_ALIGNED(phys_addr + addr, PUD_SIZE)) { + if (pud_set_huge(pud, phys_addr + addr, prot)) + continue; + } + if (ioremap_pmd_range(pud, addr, next, phys_addr + addr, prot)) return -ENOMEM; } while (pud++, addr = next, addr != end); diff --git a/lib/iov_iter.c b/lib/iov_iter.c new file mode 100644 index 000000000000..75232ad0a5e7 --- /dev/null +++ b/lib/iov_iter.c @@ -0,0 +1,851 @@ +#include <linux/export.h> +#include <linux/uio.h> +#include <linux/pagemap.h> +#include <linux/slab.h> +#include <linux/vmalloc.h> +#include <net/checksum.h> + +#define iterate_iovec(i, n, __v, __p, skip, STEP) { \ + size_t left; \ + size_t wanted = n; \ + __p = i->iov; \ + __v.iov_len = min(n, __p->iov_len - skip); \ + if (likely(__v.iov_len)) { \ + __v.iov_base = __p->iov_base + skip; \ + left = (STEP); \ + __v.iov_len -= left; \ + skip += __v.iov_len; \ + n -= __v.iov_len; \ + } else { \ + left = 0; \ + } \ + while (unlikely(!left && n)) { \ + __p++; \ + __v.iov_len = min(n, __p->iov_len); \ + if (unlikely(!__v.iov_len)) \ + continue; \ + __v.iov_base = __p->iov_base; \ + left = (STEP); \ + __v.iov_len -= left; \ + skip = __v.iov_len; \ + n -= __v.iov_len; \ + } \ + n = wanted - n; \ +} + +#define iterate_kvec(i, n, __v, __p, skip, STEP) { \ + size_t wanted = n; \ + __p = i->kvec; \ + __v.iov_len = min(n, __p->iov_len - skip); \ + if (likely(__v.iov_len)) { \ + __v.iov_base = __p->iov_base + skip; \ + (void)(STEP); \ + skip += __v.iov_len; \ + n -= __v.iov_len; \ + } \ + while (unlikely(n)) { \ + __p++; \ + __v.iov_len = min(n, __p->iov_len); \ + if (unlikely(!__v.iov_len)) \ + continue; \ + __v.iov_base = __p->iov_base; \ + (void)(STEP); \ + skip = __v.iov_len; \ + n -= __v.iov_len; \ + } \ + n = wanted; \ +} + +#define iterate_bvec(i, n, __v, __p, skip, STEP) { \ + size_t wanted = n; \ + __p = i->bvec; \ + __v.bv_len = min_t(size_t, n, __p->bv_len - skip); \ + if (likely(__v.bv_len)) { \ + __v.bv_page = __p->bv_page; \ + __v.bv_offset = __p->bv_offset + skip; \ + (void)(STEP); \ + skip += __v.bv_len; \ + n -= __v.bv_len; \ + } \ + while (unlikely(n)) { \ + __p++; \ + __v.bv_len = min_t(size_t, n, __p->bv_len); \ + if (unlikely(!__v.bv_len)) \ + continue; \ + __v.bv_page = __p->bv_page; \ + __v.bv_offset = __p->bv_offset; \ + (void)(STEP); \ + skip = __v.bv_len; \ + n -= __v.bv_len; \ + } \ + n = wanted; \ +} + +#define iterate_all_kinds(i, n, v, I, B, K) { \ + size_t skip = i->iov_offset; \ + if (unlikely(i->type & ITER_BVEC)) { \ + const struct bio_vec *bvec; \ + struct bio_vec v; \ + iterate_bvec(i, n, v, bvec, skip, (B)) \ + } else if (unlikely(i->type & ITER_KVEC)) { \ + const struct kvec *kvec; \ + struct kvec v; \ + iterate_kvec(i, n, v, kvec, skip, (K)) \ + } else { \ + const struct iovec *iov; \ + struct iovec v; \ + iterate_iovec(i, n, v, iov, skip, (I)) \ + } \ +} + +#define iterate_and_advance(i, n, v, I, B, K) { \ + size_t skip = i->iov_offset; \ + if (unlikely(i->type & ITER_BVEC)) { \ + const struct bio_vec *bvec; \ + struct bio_vec v; \ + iterate_bvec(i, n, v, bvec, skip, (B)) \ + if (skip == bvec->bv_len) { \ + bvec++; \ + skip = 0; \ + } \ + i->nr_segs -= bvec - i->bvec; \ + i->bvec = bvec; \ + } else if (unlikely(i->type & ITER_KVEC)) { \ + const struct kvec *kvec; \ + struct kvec v; \ + iterate_kvec(i, n, v, kvec, skip, (K)) \ + if (skip == kvec->iov_len) { \ + kvec++; \ + skip = 0; \ + } \ + i->nr_segs -= kvec - i->kvec; \ + i->kvec = kvec; \ + } else { \ + const struct iovec *iov; \ + struct iovec v; \ + iterate_iovec(i, n, v, iov, skip, (I)) \ + if (skip == iov->iov_len) { \ + iov++; \ + skip = 0; \ + } \ + i->nr_segs -= iov - i->iov; \ + i->iov = iov; \ + } \ + i->count -= n; \ + i->iov_offset = skip; \ +} + +static size_t copy_page_to_iter_iovec(struct page *page, size_t offset, size_t bytes, + struct iov_iter *i) +{ + size_t skip, copy, left, wanted; + const struct iovec *iov; + char __user *buf; + void *kaddr, *from; + + if (unlikely(bytes > i->count)) + bytes = i->count; + + if (unlikely(!bytes)) + return 0; + + wanted = bytes; + iov = i->iov; + skip = i->iov_offset; + buf = iov->iov_base + skip; + copy = min(bytes, iov->iov_len - skip); + + if (!fault_in_pages_writeable(buf, copy)) { + kaddr = kmap_atomic(page); + from = kaddr + offset; + + /* first chunk, usually the only one */ + left = __copy_to_user_inatomic(buf, from, copy); + copy -= left; + skip += copy; + from += copy; + bytes -= copy; + + while (unlikely(!left && bytes)) { + iov++; + buf = iov->iov_base; + copy = min(bytes, iov->iov_len); + left = __copy_to_user_inatomic(buf, from, copy); + copy -= left; + skip = copy; + from += copy; + bytes -= copy; + } + if (likely(!bytes)) { + kunmap_atomic(kaddr); + goto done; + } + offset = from - kaddr; + buf += copy; + kunmap_atomic(kaddr); + copy = min(bytes, iov->iov_len - skip); + } + /* Too bad - revert to non-atomic kmap */ + kaddr = kmap(page); + from = kaddr + offset; + left = __copy_to_user(buf, from, copy); + copy -= left; + skip += copy; + from += copy; + bytes -= copy; + while (unlikely(!left && bytes)) { + iov++; + buf = iov->iov_base; + copy = min(bytes, iov->iov_len); + left = __copy_to_user(buf, from, copy); + copy -= left; + skip = copy; + from += copy; + bytes -= copy; + } + kunmap(page); +done: + if (skip == iov->iov_len) { + iov++; + skip = 0; + } + i->count -= wanted - bytes; + i->nr_segs -= iov - i->iov; + i->iov = iov; + i->iov_offset = skip; + return wanted - bytes; +} + +static size_t copy_page_from_iter_iovec(struct page *page, size_t offset, size_t bytes, + struct iov_iter *i) +{ + size_t skip, copy, left, wanted; + const struct iovec *iov; + char __user *buf; + void *kaddr, *to; + + if (unlikely(bytes > i->count)) + bytes = i->count; + + if (unlikely(!bytes)) + return 0; + + wanted = bytes; + iov = i->iov; + skip = i->iov_offset; + buf = iov->iov_base + skip; + copy = min(bytes, iov->iov_len - skip); + + if (!fault_in_pages_readable(buf, copy)) { + kaddr = kmap_atomic(page); + to = kaddr + offset; + + /* first chunk, usually the only one */ + left = __copy_from_user_inatomic(to, buf, copy); + copy -= left; + skip += copy; + to += copy; + bytes -= copy; + + while (unlikely(!left && bytes)) { + iov++; + buf = iov->iov_base; + copy = min(bytes, iov->iov_len); + left = __copy_from_user_inatomic(to, buf, copy); + copy -= left; + skip = copy; + to += copy; + bytes -= copy; + } + if (likely(!bytes)) { + kunmap_atomic(kaddr); + goto done; + } + offset = to - kaddr; + buf += copy; + kunmap_atomic(kaddr); + copy = min(bytes, iov->iov_len - skip); + } + /* Too bad - revert to non-atomic kmap */ + kaddr = kmap(page); + to = kaddr + offset; + left = __copy_from_user(to, buf, copy); + copy -= left; + skip += copy; + to += copy; + bytes -= copy; + while (unlikely(!left && bytes)) { + iov++; + buf = iov->iov_base; + copy = min(bytes, iov->iov_len); + left = __copy_from_user(to, buf, copy); + copy -= left; + skip = copy; + to += copy; + bytes -= copy; + } + kunmap(page); +done: + if (skip == iov->iov_len) { + iov++; + skip = 0; + } + i->count -= wanted - bytes; + i->nr_segs -= iov - i->iov; + i->iov = iov; + i->iov_offset = skip; + return wanted - bytes; +} + +/* + * Fault in the first iovec of the given iov_iter, to a maximum length + * of bytes. Returns 0 on success, or non-zero if the memory could not be + * accessed (ie. because it is an invalid address). + * + * writev-intensive code may want this to prefault several iovecs -- that + * would be possible (callers must not rely on the fact that _only_ the + * first iovec will be faulted with the current implementation). + */ +int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes) +{ + if (!(i->type & (ITER_BVEC|ITER_KVEC))) { + char __user *buf = i->iov->iov_base + i->iov_offset; + bytes = min(bytes, i->iov->iov_len - i->iov_offset); + return fault_in_pages_readable(buf, bytes); + } + return 0; +} +EXPORT_SYMBOL(iov_iter_fault_in_readable); + +/* + * Fault in one or more iovecs of the given iov_iter, to a maximum length of + * bytes. For each iovec, fault in each page that constitutes the iovec. + * + * Return 0 on success, or non-zero if the memory could not be accessed (i.e. + * because it is an invalid address). + */ +int iov_iter_fault_in_multipages_readable(struct iov_iter *i, size_t bytes) +{ + size_t skip = i->iov_offset; + const struct iovec *iov; + int err; + struct iovec v; + + if (!(i->type & (ITER_BVEC|ITER_KVEC))) { + iterate_iovec(i, bytes, v, iov, skip, ({ + err = fault_in_multipages_readable(v.iov_base, + v.iov_len); + if (unlikely(err)) + return err; + 0;})) + } + return 0; +} +EXPORT_SYMBOL(iov_iter_fault_in_multipages_readable); + +void iov_iter_init(struct iov_iter *i, int direction, + const struct iovec *iov, unsigned long nr_segs, + size_t count) +{ + /* It will get better. Eventually... */ + if (segment_eq(get_fs(), KERNEL_DS)) { + direction |= ITER_KVEC; + i->type = direction; + i->kvec = (struct kvec *)iov; + } else { + i->type = direction; + i->iov = iov; + } + i->nr_segs = nr_segs; + i->iov_offset = 0; + i->count = count; +} +EXPORT_SYMBOL(iov_iter_init); + +static void memcpy_from_page(char *to, struct page *page, size_t offset, size_t len) +{ + char *from = kmap_atomic(page); + memcpy(to, from + offset, len); + kunmap_atomic(from); +} + +static void memcpy_to_page(struct page *page, size_t offset, char *from, size_t len) +{ + char *to = kmap_atomic(page); + memcpy(to + offset, from, len); + kunmap_atomic(to); +} + +static void memzero_page(struct page *page, size_t offset, size_t len) +{ + char *addr = kmap_atomic(page); + memset(addr + offset, 0, len); + kunmap_atomic(addr); +} + +size_t copy_to_iter(void *addr, size_t bytes, struct iov_iter *i) +{ + char *from = addr; + if (unlikely(bytes > i->count)) + bytes = i->count; + + if (unlikely(!bytes)) + return 0; + + iterate_and_advance(i, bytes, v, + __copy_to_user(v.iov_base, (from += v.iov_len) - v.iov_len, + v.iov_len), + memcpy_to_page(v.bv_page, v.bv_offset, + (from += v.bv_len) - v.bv_len, v.bv_len), + memcpy(v.iov_base, (from += v.iov_len) - v.iov_len, v.iov_len) + ) + + return bytes; +} +EXPORT_SYMBOL(copy_to_iter); + +size_t copy_from_iter(void *addr, size_t bytes, struct iov_iter *i) +{ + char *to = addr; + if (unlikely(bytes > i->count)) + bytes = i->count; + + if (unlikely(!bytes)) + return 0; + + iterate_and_advance(i, bytes, v, + __copy_from_user((to += v.iov_len) - v.iov_len, v.iov_base, + v.iov_len), + memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, + v.bv_offset, v.bv_len), + memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) + ) + + return bytes; +} +EXPORT_SYMBOL(copy_from_iter); + +size_t copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) +{ + char *to = addr; + if (unlikely(bytes > i->count)) + bytes = i->count; + + if (unlikely(!bytes)) + return 0; + + iterate_and_advance(i, bytes, v, + __copy_from_user_nocache((to += v.iov_len) - v.iov_len, + v.iov_base, v.iov_len), + memcpy_from_page((to += v.bv_len) - v.bv_len, v.bv_page, + v.bv_offset, v.bv_len), + memcpy((to += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) + ) + + return bytes; +} +EXPORT_SYMBOL(copy_from_iter_nocache); + +size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, + struct iov_iter *i) +{ + if (i->type & (ITER_BVEC|ITER_KVEC)) { + void *kaddr = kmap_atomic(page); + size_t wanted = copy_to_iter(kaddr + offset, bytes, i); + kunmap_atomic(kaddr); + return wanted; + } else + return copy_page_to_iter_iovec(page, offset, bytes, i); +} +EXPORT_SYMBOL(copy_page_to_iter); + +size_t copy_page_from_iter(struct page *page, size_t offset, size_t bytes, + struct iov_iter *i) +{ + if (i->type & (ITER_BVEC|ITER_KVEC)) { + void *kaddr = kmap_atomic(page); + size_t wanted = copy_from_iter(kaddr + offset, bytes, i); + kunmap_atomic(kaddr); + return wanted; + } else + return copy_page_from_iter_iovec(page, offset, bytes, i); +} +EXPORT_SYMBOL(copy_page_from_iter); + +size_t iov_iter_zero(size_t bytes, struct iov_iter *i) +{ + if (unlikely(bytes > i->count)) + bytes = i->count; + + if (unlikely(!bytes)) + return 0; + + iterate_and_advance(i, bytes, v, + __clear_user(v.iov_base, v.iov_len), + memzero_page(v.bv_page, v.bv_offset, v.bv_len), + memset(v.iov_base, 0, v.iov_len) + ) + + return bytes; +} +EXPORT_SYMBOL(iov_iter_zero); + +size_t iov_iter_copy_from_user_atomic(struct page *page, + struct iov_iter *i, unsigned long offset, size_t bytes) +{ + char *kaddr = kmap_atomic(page), *p = kaddr + offset; + iterate_all_kinds(i, bytes, v, + __copy_from_user_inatomic((p += v.iov_len) - v.iov_len, + v.iov_base, v.iov_len), + memcpy_from_page((p += v.bv_len) - v.bv_len, v.bv_page, + v.bv_offset, v.bv_len), + memcpy((p += v.iov_len) - v.iov_len, v.iov_base, v.iov_len) + ) + kunmap_atomic(kaddr); + return bytes; +} +EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); + +void iov_iter_advance(struct iov_iter *i, size_t size) +{ + iterate_and_advance(i, size, v, 0, 0, 0) +} +EXPORT_SYMBOL(iov_iter_advance); + +/* + * Return the count of just the current iov_iter segment. + */ +size_t iov_iter_single_seg_count(const struct iov_iter *i) +{ + if (i->nr_segs == 1) + return i->count; + else if (i->type & ITER_BVEC) + return min(i->count, i->bvec->bv_len - i->iov_offset); + else + return min(i->count, i->iov->iov_len - i->iov_offset); +} +EXPORT_SYMBOL(iov_iter_single_seg_count); + +void iov_iter_kvec(struct iov_iter *i, int direction, + const struct kvec *kvec, unsigned long nr_segs, + size_t count) +{ + BUG_ON(!(direction & ITER_KVEC)); + i->type = direction; + i->kvec = kvec; + i->nr_segs = nr_segs; + i->iov_offset = 0; + i->count = count; +} +EXPORT_SYMBOL(iov_iter_kvec); + +void iov_iter_bvec(struct iov_iter *i, int direction, + const struct bio_vec *bvec, unsigned long nr_segs, + size_t count) +{ + BUG_ON(!(direction & ITER_BVEC)); + i->type = direction; + i->bvec = bvec; + i->nr_segs = nr_segs; + i->iov_offset = 0; + i->count = count; +} +EXPORT_SYMBOL(iov_iter_bvec); + +unsigned long iov_iter_alignment(const struct iov_iter *i) +{ + unsigned long res = 0; + size_t size = i->count; + + if (!size) + return 0; + + iterate_all_kinds(i, size, v, + (res |= (unsigned long)v.iov_base | v.iov_len, 0), + res |= v.bv_offset | v.bv_len, + res |= (unsigned long)v.iov_base | v.iov_len + ) + return res; +} +EXPORT_SYMBOL(iov_iter_alignment); + +ssize_t iov_iter_get_pages(struct iov_iter *i, + struct page **pages, size_t maxsize, unsigned maxpages, + size_t *start) +{ + if (maxsize > i->count) + maxsize = i->count; + + if (!maxsize) + return 0; + + iterate_all_kinds(i, maxsize, v, ({ + unsigned long addr = (unsigned long)v.iov_base; + size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); + int n; + int res; + + if (len > maxpages * PAGE_SIZE) + len = maxpages * PAGE_SIZE; + addr &= ~(PAGE_SIZE - 1); + n = DIV_ROUND_UP(len, PAGE_SIZE); + res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, pages); + if (unlikely(res < 0)) + return res; + return (res == n ? len : res * PAGE_SIZE) - *start; + 0;}),({ + /* can't be more than PAGE_SIZE */ + *start = v.bv_offset; + get_page(*pages = v.bv_page); + return v.bv_len; + }),({ + return -EFAULT; + }) + ) + return 0; +} +EXPORT_SYMBOL(iov_iter_get_pages); + +static struct page **get_pages_array(size_t n) +{ + struct page **p = kmalloc(n * sizeof(struct page *), GFP_KERNEL); + if (!p) + p = vmalloc(n * sizeof(struct page *)); + return p; +} + +ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, + struct page ***pages, size_t maxsize, + size_t *start) +{ + struct page **p; + + if (maxsize > i->count) + maxsize = i->count; + + if (!maxsize) + return 0; + + iterate_all_kinds(i, maxsize, v, ({ + unsigned long addr = (unsigned long)v.iov_base; + size_t len = v.iov_len + (*start = addr & (PAGE_SIZE - 1)); + int n; + int res; + + addr &= ~(PAGE_SIZE - 1); + n = DIV_ROUND_UP(len, PAGE_SIZE); + p = get_pages_array(n); + if (!p) + return -ENOMEM; + res = get_user_pages_fast(addr, n, (i->type & WRITE) != WRITE, p); + if (unlikely(res < 0)) { + kvfree(p); + return res; + } + *pages = p; + return (res == n ? len : res * PAGE_SIZE) - *start; + 0;}),({ + /* can't be more than PAGE_SIZE */ + *start = v.bv_offset; + *pages = p = get_pages_array(1); + if (!p) + return -ENOMEM; + get_page(*p = v.bv_page); + return v.bv_len; + }),({ + return -EFAULT; + }) + ) + return 0; +} +EXPORT_SYMBOL(iov_iter_get_pages_alloc); + +size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, + struct iov_iter *i) +{ + char *to = addr; + __wsum sum, next; + size_t off = 0; + if (unlikely(bytes > i->count)) + bytes = i->count; + + if (unlikely(!bytes)) + return 0; + + sum = *csum; + iterate_and_advance(i, bytes, v, ({ + int err = 0; + next = csum_and_copy_from_user(v.iov_base, + (to += v.iov_len) - v.iov_len, + v.iov_len, 0, &err); + if (!err) { + sum = csum_block_add(sum, next, off); + off += v.iov_len; + } + err ? v.iov_len : 0; + }), ({ + char *p = kmap_atomic(v.bv_page); + next = csum_partial_copy_nocheck(p + v.bv_offset, + (to += v.bv_len) - v.bv_len, + v.bv_len, 0); + kunmap_atomic(p); + sum = csum_block_add(sum, next, off); + off += v.bv_len; + }),({ + next = csum_partial_copy_nocheck(v.iov_base, + (to += v.iov_len) - v.iov_len, + v.iov_len, 0); + sum = csum_block_add(sum, next, off); + off += v.iov_len; + }) + ) + *csum = sum; + return bytes; +} +EXPORT_SYMBOL(csum_and_copy_from_iter); + +size_t csum_and_copy_to_iter(void *addr, size_t bytes, __wsum *csum, + struct iov_iter *i) +{ + char *from = addr; + __wsum sum, next; + size_t off = 0; + if (unlikely(bytes > i->count)) + bytes = i->count; + + if (unlikely(!bytes)) + return 0; + + sum = *csum; + iterate_and_advance(i, bytes, v, ({ + int err = 0; + next = csum_and_copy_to_user((from += v.iov_len) - v.iov_len, + v.iov_base, + v.iov_len, 0, &err); + if (!err) { + sum = csum_block_add(sum, next, off); + off += v.iov_len; + } + err ? v.iov_len : 0; + }), ({ + char *p = kmap_atomic(v.bv_page); + next = csum_partial_copy_nocheck((from += v.bv_len) - v.bv_len, + p + v.bv_offset, + v.bv_len, 0); + kunmap_atomic(p); + sum = csum_block_add(sum, next, off); + off += v.bv_len; + }),({ + next = csum_partial_copy_nocheck((from += v.iov_len) - v.iov_len, + v.iov_base, + v.iov_len, 0); + sum = csum_block_add(sum, next, off); + off += v.iov_len; + }) + ) + *csum = sum; + return bytes; +} +EXPORT_SYMBOL(csum_and_copy_to_iter); + +int iov_iter_npages(const struct iov_iter *i, int maxpages) +{ + size_t size = i->count; + int npages = 0; + + if (!size) + return 0; + + iterate_all_kinds(i, size, v, ({ + unsigned long p = (unsigned long)v.iov_base; + npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) + - p / PAGE_SIZE; + if (npages >= maxpages) + return maxpages; + 0;}),({ + npages++; + if (npages >= maxpages) + return maxpages; + }),({ + unsigned long p = (unsigned long)v.iov_base; + npages += DIV_ROUND_UP(p + v.iov_len, PAGE_SIZE) + - p / PAGE_SIZE; + if (npages >= maxpages) + return maxpages; + }) + ) + return npages; +} +EXPORT_SYMBOL(iov_iter_npages); + +const void *dup_iter(struct iov_iter *new, struct iov_iter *old, gfp_t flags) +{ + *new = *old; + if (new->type & ITER_BVEC) + return new->bvec = kmemdup(new->bvec, + new->nr_segs * sizeof(struct bio_vec), + flags); + else + /* iovec and kvec have identical layout */ + return new->iov = kmemdup(new->iov, + new->nr_segs * sizeof(struct iovec), + flags); +} +EXPORT_SYMBOL(dup_iter); + +int import_iovec(int type, const struct iovec __user * uvector, + unsigned nr_segs, unsigned fast_segs, + struct iovec **iov, struct iov_iter *i) +{ + ssize_t n; + struct iovec *p; + n = rw_copy_check_uvector(type, uvector, nr_segs, fast_segs, + *iov, &p); + if (n < 0) { + if (p != *iov) + kfree(p); + *iov = NULL; + return n; + } + iov_iter_init(i, type, p, nr_segs, n); + *iov = p == *iov ? NULL : p; + return 0; +} +EXPORT_SYMBOL(import_iovec); + +#ifdef CONFIG_COMPAT +#include <linux/compat.h> + +int compat_import_iovec(int type, const struct compat_iovec __user * uvector, + unsigned nr_segs, unsigned fast_segs, + struct iovec **iov, struct iov_iter *i) +{ + ssize_t n; + struct iovec *p; + n = compat_rw_copy_check_uvector(type, uvector, nr_segs, fast_segs, + *iov, &p); + if (n < 0) { + if (p != *iov) + kfree(p); + *iov = NULL; + return n; + } + iov_iter_init(i, type, p, nr_segs, n); + *iov = p == *iov ? NULL : p; + return 0; +} +#endif + +int import_single_range(int rw, void __user *buf, size_t len, + struct iovec *iov, struct iov_iter *i) +{ + if (len > MAX_RW_COUNT) + len = MAX_RW_COUNT; + if (unlikely(!access_ok(!rw, buf, len))) + return -EFAULT; + + iov->iov_base = buf; + iov->iov_len = len; + iov_iter_init(i, rw, iov, 1, len); + return 0; +} diff --git a/lib/kobject.c b/lib/kobject.c index 03d4ab349fa7..3b841b97fccd 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -576,8 +576,13 @@ void kobject_del(struct kobject *kobj) */ struct kobject *kobject_get(struct kobject *kobj) { - if (kobj) + if (kobj) { + if (!kobj->state_initialized) + WARN(1, KERN_WARNING "kobject: '%s' (%p): is not " + "initialized, yet kobject_get() is being " + "called.\n", kobject_name(kobj), kobj); kref_get(&kobj->kref); + } return kobj; } diff --git a/lib/lcm.c b/lib/lcm.c index e97dbd51e756..03d7fcb420b5 100644 --- a/lib/lcm.c +++ b/lib/lcm.c @@ -12,3 +12,14 @@ unsigned long lcm(unsigned long a, unsigned long b) return 0; } EXPORT_SYMBOL_GPL(lcm); + +unsigned long lcm_not_zero(unsigned long a, unsigned long b) +{ + unsigned long l = lcm(a, b); + + if (l) + return l; + + return (b ? : a); +} +EXPORT_SYMBOL_GPL(lcm_not_zero); diff --git a/lib/lockref.c b/lib/lockref.c index ecb9a665ec19..494994bf17c8 100644 --- a/lib/lockref.c +++ b/lib/lockref.c @@ -18,7 +18,7 @@ #define CMPXCHG_LOOP(CODE, SUCCESS) do { \ struct lockref old; \ BUILD_BUG_ON(sizeof(old) != 8); \ - old.lock_count = ACCESS_ONCE(lockref->lock_count); \ + old.lock_count = READ_ONCE(lockref->lock_count); \ while (likely(arch_spin_value_unlocked(old.lock.rlock.raw_lock))) { \ struct lockref new = old, prev = old; \ CODE \ diff --git a/lib/lru_cache.c b/lib/lru_cache.c index 852c81e3ba9a..028f5d996eef 100644 --- a/lib/lru_cache.c +++ b/lib/lru_cache.c @@ -247,10 +247,11 @@ size_t lc_seq_printf_stats(struct seq_file *seq, struct lru_cache *lc) * progress) and "changed", when this in fact lead to an successful * update of the cache. */ - return seq_printf(seq, "\t%s: used:%u/%u " - "hits:%lu misses:%lu starving:%lu locked:%lu changed:%lu\n", - lc->name, lc->used, lc->nr_elements, - lc->hits, lc->misses, lc->starving, lc->locked, lc->changed); + seq_printf(seq, "\t%s: used:%u/%u hits:%lu misses:%lu starving:%lu locked:%lu changed:%lu\n", + lc->name, lc->used, lc->nr_elements, + lc->hits, lc->misses, lc->starving, lc->locked, lc->changed); + + return 0; } static struct hlist_head *lc_hash_slot(struct lru_cache *lc, unsigned int enr) diff --git a/lib/lz4/lz4_decompress.c b/lib/lz4/lz4_decompress.c index 7a85967060a5..26cc6029b280 100644 --- a/lib/lz4/lz4_decompress.c +++ b/lib/lz4/lz4_decompress.c @@ -47,6 +47,11 @@ #include "lz4defs.h" +static const int dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; +#if LZ4_ARCH64 +static const int dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3}; +#endif + static int lz4_uncompress(const char *source, char *dest, int osize) { const BYTE *ip = (const BYTE *) source; @@ -56,10 +61,6 @@ static int lz4_uncompress(const char *source, char *dest, int osize) BYTE *cpy; unsigned token; size_t length; - size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; -#if LZ4_ARCH64 - size_t dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3}; -#endif while (1) { @@ -116,7 +117,7 @@ static int lz4_uncompress(const char *source, char *dest, int osize) /* copy repeated sequence */ if (unlikely((op - ref) < STEPSIZE)) { #if LZ4_ARCH64 - size_t dec64 = dec64table[op - ref]; + int dec64 = dec64table[op - ref]; #else const int dec64 = 0; #endif @@ -139,6 +140,9 @@ static int lz4_uncompress(const char *source, char *dest, int osize) /* Error: request to write beyond destination buffer */ if (cpy > oend) goto _output_error; + if ((ref + COPYLENGTH) > oend || + (op + COPYLENGTH) > oend) + goto _output_error; LZ4_SECURECOPY(ref, op, (oend - COPYLENGTH)); while (op < cpy) *op++ = *ref++; @@ -174,11 +178,6 @@ static int lz4_uncompress_unknownoutputsize(const char *source, char *dest, BYTE * const oend = op + maxoutputsize; BYTE *cpy; - size_t dec32table[] = {0, 3, 2, 3, 0, 0, 0, 0}; -#if LZ4_ARCH64 - size_t dec64table[] = {0, 0, 0, -1, 0, 1, 2, 3}; -#endif - /* Main Loop */ while (ip < iend) { @@ -246,7 +245,7 @@ static int lz4_uncompress_unknownoutputsize(const char *source, char *dest, /* copy repeated sequence */ if (unlikely((op - ref) < STEPSIZE)) { #if LZ4_ARCH64 - size_t dec64 = dec64table[op - ref]; + int dec64 = dec64table[op - ref]; #else const int dec64 = 0; #endif diff --git a/lib/nlattr.c b/lib/nlattr.c index 76a1b59523ab..f5907d23272d 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -279,6 +279,8 @@ int nla_memcpy(void *dest, const struct nlattr *src, int count) int minlen = min_t(int, count, nla_len(src)); memcpy(dest, nla_data(src), minlen); + if (count > minlen) + memset(dest + minlen, 0, count - minlen); return minlen; } diff --git a/lib/raid6/algos.c b/lib/raid6/algos.c index dbef2314901e..975c6e0434bd 100644 --- a/lib/raid6/algos.c +++ b/lib/raid6/algos.c @@ -131,11 +131,12 @@ static inline const struct raid6_recov_calls *raid6_choose_recov(void) static inline const struct raid6_calls *raid6_choose_gen( void *(*const dptrs)[(65536/PAGE_SIZE)+2], const int disks) { - unsigned long perf, bestperf, j0, j1; + unsigned long perf, bestgenperf, bestxorperf, j0, j1; + int start = (disks>>1)-1, stop = disks-3; /* work on the second half of the disks */ const struct raid6_calls *const *algo; const struct raid6_calls *best; - for (bestperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) { + for (bestgenperf = 0, bestxorperf = 0, best = NULL, algo = raid6_algos; *algo; algo++) { if (!best || (*algo)->prefer >= best->prefer) { if ((*algo)->valid && !(*algo)->valid()) continue; @@ -153,19 +154,45 @@ static inline const struct raid6_calls *raid6_choose_gen( } preempt_enable(); - if (perf > bestperf) { - bestperf = perf; + if (perf > bestgenperf) { + bestgenperf = perf; best = *algo; } - pr_info("raid6: %-8s %5ld MB/s\n", (*algo)->name, + pr_info("raid6: %-8s gen() %5ld MB/s\n", (*algo)->name, (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); + + if (!(*algo)->xor_syndrome) + continue; + + perf = 0; + + preempt_disable(); + j0 = jiffies; + while ((j1 = jiffies) == j0) + cpu_relax(); + while (time_before(jiffies, + j1 + (1<<RAID6_TIME_JIFFIES_LG2))) { + (*algo)->xor_syndrome(disks, start, stop, + PAGE_SIZE, *dptrs); + perf++; + } + preempt_enable(); + + if (best == *algo) + bestxorperf = perf; + + pr_info("raid6: %-8s xor() %5ld MB/s\n", (*algo)->name, + (perf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2+1)); } } if (best) { - pr_info("raid6: using algorithm %s (%ld MB/s)\n", + pr_info("raid6: using algorithm %s gen() %ld MB/s\n", best->name, - (bestperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); + (bestgenperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2)); + if (best->xor_syndrome) + pr_info("raid6: .... xor() %ld MB/s, rmw enabled\n", + (bestxorperf*HZ) >> (20-16+RAID6_TIME_JIFFIES_LG2+1)); raid6_call = *best; } else pr_err("raid6: Yikes! No algorithm found!\n"); diff --git a/lib/raid6/altivec.uc b/lib/raid6/altivec.uc index 7cc12b532e95..bec27fce7501 100644 --- a/lib/raid6/altivec.uc +++ b/lib/raid6/altivec.uc @@ -119,6 +119,7 @@ int raid6_have_altivec(void) const struct raid6_calls raid6_altivec$# = { raid6_altivec$#_gen_syndrome, + NULL, /* XOR not yet implemented */ raid6_have_altivec, "altivecx$#", 0 diff --git a/lib/raid6/avx2.c b/lib/raid6/avx2.c index bc3b1dd436eb..76734004358d 100644 --- a/lib/raid6/avx2.c +++ b/lib/raid6/avx2.c @@ -89,6 +89,7 @@ static void raid6_avx21_gen_syndrome(int disks, size_t bytes, void **ptrs) const struct raid6_calls raid6_avx2x1 = { raid6_avx21_gen_syndrome, + NULL, /* XOR not yet implemented */ raid6_have_avx2, "avx2x1", 1 /* Has cache hints */ @@ -150,6 +151,7 @@ static void raid6_avx22_gen_syndrome(int disks, size_t bytes, void **ptrs) const struct raid6_calls raid6_avx2x2 = { raid6_avx22_gen_syndrome, + NULL, /* XOR not yet implemented */ raid6_have_avx2, "avx2x2", 1 /* Has cache hints */ @@ -242,6 +244,7 @@ static void raid6_avx24_gen_syndrome(int disks, size_t bytes, void **ptrs) const struct raid6_calls raid6_avx2x4 = { raid6_avx24_gen_syndrome, + NULL, /* XOR not yet implemented */ raid6_have_avx2, "avx2x4", 1 /* Has cache hints */ diff --git a/lib/raid6/int.uc b/lib/raid6/int.uc index 5b50f8dfc5d2..558aeac9342a 100644 --- a/lib/raid6/int.uc +++ b/lib/raid6/int.uc @@ -107,9 +107,48 @@ static void raid6_int$#_gen_syndrome(int disks, size_t bytes, void **ptrs) } } +static void raid6_int$#_xor_syndrome(int disks, int start, int stop, + size_t bytes, void **ptrs) +{ + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + unative_t wd$$, wq$$, wp$$, w1$$, w2$$; + + z0 = stop; /* P/Q right side optimization */ + p = dptr[disks-2]; /* XOR parity */ + q = dptr[disks-1]; /* RS syndrome */ + + for ( d = 0 ; d < bytes ; d += NSIZE*$# ) { + /* P/Q data pages */ + wq$$ = wp$$ = *(unative_t *)&dptr[z0][d+$$*NSIZE]; + for ( z = z0-1 ; z >= start ; z-- ) { + wd$$ = *(unative_t *)&dptr[z][d+$$*NSIZE]; + wp$$ ^= wd$$; + w2$$ = MASK(wq$$); + w1$$ = SHLBYTE(wq$$); + w2$$ &= NBYTES(0x1d); + w1$$ ^= w2$$; + wq$$ = w1$$ ^ wd$$; + } + /* P/Q left side optimization */ + for ( z = start-1 ; z >= 0 ; z-- ) { + w2$$ = MASK(wq$$); + w1$$ = SHLBYTE(wq$$); + w2$$ &= NBYTES(0x1d); + wq$$ = w1$$ ^ w2$$; + } + *(unative_t *)&p[d+NSIZE*$$] ^= wp$$; + *(unative_t *)&q[d+NSIZE*$$] ^= wq$$; + } + +} + const struct raid6_calls raid6_intx$# = { raid6_int$#_gen_syndrome, - NULL, /* always valid */ + raid6_int$#_xor_syndrome, + NULL, /* always valid */ "int" NSTRING "x$#", 0 }; diff --git a/lib/raid6/mmx.c b/lib/raid6/mmx.c index 590c71c9e200..b3b0e1fcd3af 100644 --- a/lib/raid6/mmx.c +++ b/lib/raid6/mmx.c @@ -76,6 +76,7 @@ static void raid6_mmx1_gen_syndrome(int disks, size_t bytes, void **ptrs) const struct raid6_calls raid6_mmxx1 = { raid6_mmx1_gen_syndrome, + NULL, /* XOR not yet implemented */ raid6_have_mmx, "mmxx1", 0 @@ -134,6 +135,7 @@ static void raid6_mmx2_gen_syndrome(int disks, size_t bytes, void **ptrs) const struct raid6_calls raid6_mmxx2 = { raid6_mmx2_gen_syndrome, + NULL, /* XOR not yet implemented */ raid6_have_mmx, "mmxx2", 0 diff --git a/lib/raid6/neon.c b/lib/raid6/neon.c index 36ad4705df1a..d9ad6ee284f4 100644 --- a/lib/raid6/neon.c +++ b/lib/raid6/neon.c @@ -42,6 +42,7 @@ } \ struct raid6_calls const raid6_neonx ## _n = { \ raid6_neon ## _n ## _gen_syndrome, \ + NULL, /* XOR not yet implemented */ \ raid6_have_neon, \ "neonx" #_n, \ 0 \ diff --git a/lib/raid6/sse1.c b/lib/raid6/sse1.c index f76297139445..9025b8ca9aa3 100644 --- a/lib/raid6/sse1.c +++ b/lib/raid6/sse1.c @@ -92,6 +92,7 @@ static void raid6_sse11_gen_syndrome(int disks, size_t bytes, void **ptrs) const struct raid6_calls raid6_sse1x1 = { raid6_sse11_gen_syndrome, + NULL, /* XOR not yet implemented */ raid6_have_sse1_or_mmxext, "sse1x1", 1 /* Has cache hints */ @@ -154,6 +155,7 @@ static void raid6_sse12_gen_syndrome(int disks, size_t bytes, void **ptrs) const struct raid6_calls raid6_sse1x2 = { raid6_sse12_gen_syndrome, + NULL, /* XOR not yet implemented */ raid6_have_sse1_or_mmxext, "sse1x2", 1 /* Has cache hints */ diff --git a/lib/raid6/sse2.c b/lib/raid6/sse2.c index 85b82c85f28e..1d2276b007ee 100644 --- a/lib/raid6/sse2.c +++ b/lib/raid6/sse2.c @@ -88,8 +88,58 @@ static void raid6_sse21_gen_syndrome(int disks, size_t bytes, void **ptrs) kernel_fpu_end(); } + +static void raid6_sse21_xor_syndrome(int disks, int start, int stop, + size_t bytes, void **ptrs) + { + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = stop; /* P/Q right side optimization */ + p = dptr[disks-2]; /* XOR parity */ + q = dptr[disks-1]; /* RS syndrome */ + + kernel_fpu_begin(); + + asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0])); + + for ( d = 0 ; d < bytes ; d += 16 ) { + asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d])); + asm volatile("movdqa %0,%%xmm2" : : "m" (p[d])); + asm volatile("pxor %xmm4,%xmm2"); + /* P/Q data pages */ + for ( z = z0-1 ; z >= start ; z-- ) { + asm volatile("pxor %xmm5,%xmm5"); + asm volatile("pcmpgtb %xmm4,%xmm5"); + asm volatile("paddb %xmm4,%xmm4"); + asm volatile("pand %xmm0,%xmm5"); + asm volatile("pxor %xmm5,%xmm4"); + asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d])); + asm volatile("pxor %xmm5,%xmm2"); + asm volatile("pxor %xmm5,%xmm4"); + } + /* P/Q left side optimization */ + for ( z = start-1 ; z >= 0 ; z-- ) { + asm volatile("pxor %xmm5,%xmm5"); + asm volatile("pcmpgtb %xmm4,%xmm5"); + asm volatile("paddb %xmm4,%xmm4"); + asm volatile("pand %xmm0,%xmm5"); + asm volatile("pxor %xmm5,%xmm4"); + } + asm volatile("pxor %0,%%xmm4" : : "m" (q[d])); + /* Don't use movntdq for r/w memory area < cache line */ + asm volatile("movdqa %%xmm4,%0" : "=m" (q[d])); + asm volatile("movdqa %%xmm2,%0" : "=m" (p[d])); + } + + asm volatile("sfence" : : : "memory"); + kernel_fpu_end(); +} + const struct raid6_calls raid6_sse2x1 = { raid6_sse21_gen_syndrome, + raid6_sse21_xor_syndrome, raid6_have_sse2, "sse2x1", 1 /* Has cache hints */ @@ -150,8 +200,76 @@ static void raid6_sse22_gen_syndrome(int disks, size_t bytes, void **ptrs) kernel_fpu_end(); } + static void raid6_sse22_xor_syndrome(int disks, int start, int stop, + size_t bytes, void **ptrs) + { + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = stop; /* P/Q right side optimization */ + p = dptr[disks-2]; /* XOR parity */ + q = dptr[disks-1]; /* RS syndrome */ + + kernel_fpu_begin(); + + asm volatile("movdqa %0,%%xmm0" : : "m" (raid6_sse_constants.x1d[0])); + + for ( d = 0 ; d < bytes ; d += 32 ) { + asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d])); + asm volatile("movdqa %0,%%xmm6" :: "m" (dptr[z0][d+16])); + asm volatile("movdqa %0,%%xmm2" : : "m" (p[d])); + asm volatile("movdqa %0,%%xmm3" : : "m" (p[d+16])); + asm volatile("pxor %xmm4,%xmm2"); + asm volatile("pxor %xmm6,%xmm3"); + /* P/Q data pages */ + for ( z = z0-1 ; z >= start ; z-- ) { + asm volatile("pxor %xmm5,%xmm5"); + asm volatile("pxor %xmm7,%xmm7"); + asm volatile("pcmpgtb %xmm4,%xmm5"); + asm volatile("pcmpgtb %xmm6,%xmm7"); + asm volatile("paddb %xmm4,%xmm4"); + asm volatile("paddb %xmm6,%xmm6"); + asm volatile("pand %xmm0,%xmm5"); + asm volatile("pand %xmm0,%xmm7"); + asm volatile("pxor %xmm5,%xmm4"); + asm volatile("pxor %xmm7,%xmm6"); + asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d])); + asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16])); + asm volatile("pxor %xmm5,%xmm2"); + asm volatile("pxor %xmm7,%xmm3"); + asm volatile("pxor %xmm5,%xmm4"); + asm volatile("pxor %xmm7,%xmm6"); + } + /* P/Q left side optimization */ + for ( z = start-1 ; z >= 0 ; z-- ) { + asm volatile("pxor %xmm5,%xmm5"); + asm volatile("pxor %xmm7,%xmm7"); + asm volatile("pcmpgtb %xmm4,%xmm5"); + asm volatile("pcmpgtb %xmm6,%xmm7"); + asm volatile("paddb %xmm4,%xmm4"); + asm volatile("paddb %xmm6,%xmm6"); + asm volatile("pand %xmm0,%xmm5"); + asm volatile("pand %xmm0,%xmm7"); + asm volatile("pxor %xmm5,%xmm4"); + asm volatile("pxor %xmm7,%xmm6"); + } + asm volatile("pxor %0,%%xmm4" : : "m" (q[d])); + asm volatile("pxor %0,%%xmm6" : : "m" (q[d+16])); + /* Don't use movntdq for r/w memory area < cache line */ + asm volatile("movdqa %%xmm4,%0" : "=m" (q[d])); + asm volatile("movdqa %%xmm6,%0" : "=m" (q[d+16])); + asm volatile("movdqa %%xmm2,%0" : "=m" (p[d])); + asm volatile("movdqa %%xmm3,%0" : "=m" (p[d+16])); + } + + asm volatile("sfence" : : : "memory"); + kernel_fpu_end(); + } + const struct raid6_calls raid6_sse2x2 = { raid6_sse22_gen_syndrome, + raid6_sse22_xor_syndrome, raid6_have_sse2, "sse2x2", 1 /* Has cache hints */ @@ -248,8 +366,117 @@ static void raid6_sse24_gen_syndrome(int disks, size_t bytes, void **ptrs) kernel_fpu_end(); } + static void raid6_sse24_xor_syndrome(int disks, int start, int stop, + size_t bytes, void **ptrs) + { + u8 **dptr = (u8 **)ptrs; + u8 *p, *q; + int d, z, z0; + + z0 = stop; /* P/Q right side optimization */ + p = dptr[disks-2]; /* XOR parity */ + q = dptr[disks-1]; /* RS syndrome */ + + kernel_fpu_begin(); + + asm volatile("movdqa %0,%%xmm0" :: "m" (raid6_sse_constants.x1d[0])); + + for ( d = 0 ; d < bytes ; d += 64 ) { + asm volatile("movdqa %0,%%xmm4" :: "m" (dptr[z0][d])); + asm volatile("movdqa %0,%%xmm6" :: "m" (dptr[z0][d+16])); + asm volatile("movdqa %0,%%xmm12" :: "m" (dptr[z0][d+32])); + asm volatile("movdqa %0,%%xmm14" :: "m" (dptr[z0][d+48])); + asm volatile("movdqa %0,%%xmm2" : : "m" (p[d])); + asm volatile("movdqa %0,%%xmm3" : : "m" (p[d+16])); + asm volatile("movdqa %0,%%xmm10" : : "m" (p[d+32])); + asm volatile("movdqa %0,%%xmm11" : : "m" (p[d+48])); + asm volatile("pxor %xmm4,%xmm2"); + asm volatile("pxor %xmm6,%xmm3"); + asm volatile("pxor %xmm12,%xmm10"); + asm volatile("pxor %xmm14,%xmm11"); + /* P/Q data pages */ + for ( z = z0-1 ; z >= start ; z-- ) { + asm volatile("prefetchnta %0" :: "m" (dptr[z][d])); + asm volatile("prefetchnta %0" :: "m" (dptr[z][d+32])); + asm volatile("pxor %xmm5,%xmm5"); + asm volatile("pxor %xmm7,%xmm7"); + asm volatile("pxor %xmm13,%xmm13"); + asm volatile("pxor %xmm15,%xmm15"); + asm volatile("pcmpgtb %xmm4,%xmm5"); + asm volatile("pcmpgtb %xmm6,%xmm7"); + asm volatile("pcmpgtb %xmm12,%xmm13"); + asm volatile("pcmpgtb %xmm14,%xmm15"); + asm volatile("paddb %xmm4,%xmm4"); + asm volatile("paddb %xmm6,%xmm6"); + asm volatile("paddb %xmm12,%xmm12"); + asm volatile("paddb %xmm14,%xmm14"); + asm volatile("pand %xmm0,%xmm5"); + asm volatile("pand %xmm0,%xmm7"); + asm volatile("pand %xmm0,%xmm13"); + asm volatile("pand %xmm0,%xmm15"); + asm volatile("pxor %xmm5,%xmm4"); + asm volatile("pxor %xmm7,%xmm6"); + asm volatile("pxor %xmm13,%xmm12"); + asm volatile("pxor %xmm15,%xmm14"); + asm volatile("movdqa %0,%%xmm5" :: "m" (dptr[z][d])); + asm volatile("movdqa %0,%%xmm7" :: "m" (dptr[z][d+16])); + asm volatile("movdqa %0,%%xmm13" :: "m" (dptr[z][d+32])); + asm volatile("movdqa %0,%%xmm15" :: "m" (dptr[z][d+48])); + asm volatile("pxor %xmm5,%xmm2"); + asm volatile("pxor %xmm7,%xmm3"); + asm volatile("pxor %xmm13,%xmm10"); + asm volatile("pxor %xmm15,%xmm11"); + asm volatile("pxor %xmm5,%xmm4"); + asm volatile("pxor %xmm7,%xmm6"); + asm volatile("pxor %xmm13,%xmm12"); + asm volatile("pxor %xmm15,%xmm14"); + } + asm volatile("prefetchnta %0" :: "m" (q[d])); + asm volatile("prefetchnta %0" :: "m" (q[d+32])); + /* P/Q left side optimization */ + for ( z = start-1 ; z >= 0 ; z-- ) { + asm volatile("pxor %xmm5,%xmm5"); + asm volatile("pxor %xmm7,%xmm7"); + asm volatile("pxor %xmm13,%xmm13"); + asm volatile("pxor %xmm15,%xmm15"); + asm volatile("pcmpgtb %xmm4,%xmm5"); + asm volatile("pcmpgtb %xmm6,%xmm7"); + asm volatile("pcmpgtb %xmm12,%xmm13"); + asm volatile("pcmpgtb %xmm14,%xmm15"); + asm volatile("paddb %xmm4,%xmm4"); + asm volatile("paddb %xmm6,%xmm6"); + asm volatile("paddb %xmm12,%xmm12"); + asm volatile("paddb %xmm14,%xmm14"); + asm volatile("pand %xmm0,%xmm5"); + asm volatile("pand %xmm0,%xmm7"); + asm volatile("pand %xmm0,%xmm13"); + asm volatile("pand %xmm0,%xmm15"); + asm volatile("pxor %xmm5,%xmm4"); + asm volatile("pxor %xmm7,%xmm6"); + asm volatile("pxor %xmm13,%xmm12"); + asm volatile("pxor %xmm15,%xmm14"); + } + asm volatile("movntdq %%xmm2,%0" : "=m" (p[d])); + asm volatile("movntdq %%xmm3,%0" : "=m" (p[d+16])); + asm volatile("movntdq %%xmm10,%0" : "=m" (p[d+32])); + asm volatile("movntdq %%xmm11,%0" : "=m" (p[d+48])); + asm volatile("pxor %0,%%xmm4" : : "m" (q[d])); + asm volatile("pxor %0,%%xmm6" : : "m" (q[d+16])); + asm volatile("pxor %0,%%xmm12" : : "m" (q[d+32])); + asm volatile("pxor %0,%%xmm14" : : "m" (q[d+48])); + asm volatile("movntdq %%xmm4,%0" : "=m" (q[d])); + asm volatile("movntdq %%xmm6,%0" : "=m" (q[d+16])); + asm volatile("movntdq %%xmm12,%0" : "=m" (q[d+32])); + asm volatile("movntdq %%xmm14,%0" : "=m" (q[d+48])); + } + asm volatile("sfence" : : : "memory"); + kernel_fpu_end(); + } + + const struct raid6_calls raid6_sse2x4 = { raid6_sse24_gen_syndrome, + raid6_sse24_xor_syndrome, raid6_have_sse2, "sse2x4", 1 /* Has cache hints */ diff --git a/lib/raid6/test/test.c b/lib/raid6/test/test.c index 5a485b7a7d3c..3bebbabdb510 100644 --- a/lib/raid6/test/test.c +++ b/lib/raid6/test/test.c @@ -28,11 +28,11 @@ char *dataptrs[NDISKS]; char data[NDISKS][PAGE_SIZE]; char recovi[PAGE_SIZE], recovj[PAGE_SIZE]; -static void makedata(void) +static void makedata(int start, int stop) { int i, j; - for (i = 0; i < NDISKS; i++) { + for (i = start; i <= stop; i++) { for (j = 0; j < PAGE_SIZE; j++) data[i][j] = rand(); @@ -91,34 +91,55 @@ int main(int argc, char *argv[]) { const struct raid6_calls *const *algo; const struct raid6_recov_calls *const *ra; - int i, j; + int i, j, p1, p2; int err = 0; - makedata(); + makedata(0, NDISKS-1); for (ra = raid6_recov_algos; *ra; ra++) { if ((*ra)->valid && !(*ra)->valid()) continue; + raid6_2data_recov = (*ra)->data2; raid6_datap_recov = (*ra)->datap; printf("using recovery %s\n", (*ra)->name); for (algo = raid6_algos; *algo; algo++) { - if (!(*algo)->valid || (*algo)->valid()) { - raid6_call = **algo; + if ((*algo)->valid && !(*algo)->valid()) + continue; + + raid6_call = **algo; + + /* Nuke syndromes */ + memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE); + + /* Generate assumed good syndrome */ + raid6_call.gen_syndrome(NDISKS, PAGE_SIZE, + (void **)&dataptrs); + + for (i = 0; i < NDISKS-1; i++) + for (j = i+1; j < NDISKS; j++) + err += test_disks(i, j); + + if (!raid6_call.xor_syndrome) + continue; + + for (p1 = 0; p1 < NDISKS-2; p1++) + for (p2 = p1; p2 < NDISKS-2; p2++) { - /* Nuke syndromes */ - memset(data[NDISKS-2], 0xee, 2*PAGE_SIZE); + /* Simulate rmw run */ + raid6_call.xor_syndrome(NDISKS, p1, p2, PAGE_SIZE, + (void **)&dataptrs); + makedata(p1, p2); + raid6_call.xor_syndrome(NDISKS, p1, p2, PAGE_SIZE, + (void **)&dataptrs); - /* Generate assumed good syndrome */ - raid6_call.gen_syndrome(NDISKS, PAGE_SIZE, - (void **)&dataptrs); + for (i = 0; i < NDISKS-1; i++) + for (j = i+1; j < NDISKS; j++) + err += test_disks(i, j); + } - for (i = 0; i < NDISKS-1; i++) - for (j = i+1; j < NDISKS; j++) - err += test_disks(i, j); - } } printf("\n"); } diff --git a/lib/raid6/tilegx.uc b/lib/raid6/tilegx.uc index e7c29459cbcd..2dd291a11264 100644 --- a/lib/raid6/tilegx.uc +++ b/lib/raid6/tilegx.uc @@ -80,6 +80,7 @@ void raid6_tilegx$#_gen_syndrome(int disks, size_t bytes, void **ptrs) const struct raid6_calls raid6_tilegx$# = { raid6_tilegx$#_gen_syndrome, + NULL, /* XOR not yet implemented */ NULL, "tilegx$#", 0 diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 9cc4c4a90d00..4898442b837f 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -1,13 +1,13 @@ /* * Resizable, Scalable, Concurrent Hash Table * + * Copyright (c) 2015 Herbert Xu <herbert@gondor.apana.org.au> * Copyright (c) 2014-2015 Thomas Graf <tgraf@suug.ch> * Copyright (c) 2008-2014 Patrick McHardy <kaber@trash.net> * - * Based on the following paper: - * https://www.usenix.org/legacy/event/atc11/tech/final_files/Triplett.pdf - * * Code partially derived from nft_hash + * Rewritten with rehash code from br_multicast plus single list + * pointer as suggested by Josh Triplett * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -17,6 +17,7 @@ #include <linux/kernel.h> #include <linux/init.h> #include <linux/log2.h> +#include <linux/sched.h> #include <linux/slab.h> #include <linux/vmalloc.h> #include <linux/mm.h> @@ -26,121 +27,18 @@ #include <linux/err.h> #define HASH_DEFAULT_SIZE 64UL -#define HASH_MIN_SIZE 4UL +#define HASH_MIN_SIZE 4U #define BUCKET_LOCKS_PER_CPU 128UL -/* Base bits plus 1 bit for nulls marker */ -#define HASH_RESERVED_SPACE (RHT_BASE_BITS + 1) - -enum { - RHT_LOCK_NORMAL, - RHT_LOCK_NESTED, -}; - -/* The bucket lock is selected based on the hash and protects mutations - * on a group of hash buckets. - * - * A maximum of tbl->size/2 bucket locks is allocated. This ensures that - * a single lock always covers both buckets which may both contains - * entries which link to the same bucket of the old table during resizing. - * This allows to simplify the locking as locking the bucket in both - * tables during resize always guarantee protection. - * - * IMPORTANT: When holding the bucket lock of both the old and new table - * during expansions and shrinking, the old bucket lock must always be - * acquired first. - */ -static spinlock_t *bucket_lock(const struct bucket_table *tbl, u32 hash) -{ - return &tbl->locks[hash & tbl->locks_mask]; -} - -static void *rht_obj(const struct rhashtable *ht, const struct rhash_head *he) -{ - return (void *) he - ht->p.head_offset; -} - -static u32 rht_bucket_index(const struct bucket_table *tbl, u32 hash) -{ - return hash & (tbl->size - 1); -} - -static u32 obj_raw_hashfn(const struct rhashtable *ht, const void *ptr) -{ - u32 hash; - - if (unlikely(!ht->p.key_len)) - hash = ht->p.obj_hashfn(ptr, ht->p.hash_rnd); - else - hash = ht->p.hashfn(ptr + ht->p.key_offset, ht->p.key_len, - ht->p.hash_rnd); - - return hash >> HASH_RESERVED_SPACE; -} - -static u32 key_hashfn(struct rhashtable *ht, const void *key, u32 len) -{ - return ht->p.hashfn(key, len, ht->p.hash_rnd) >> HASH_RESERVED_SPACE; -} - -static u32 head_hashfn(const struct rhashtable *ht, +static u32 head_hashfn(struct rhashtable *ht, const struct bucket_table *tbl, const struct rhash_head *he) { - return rht_bucket_index(tbl, obj_raw_hashfn(ht, rht_obj(ht, he))); + return rht_head_hashfn(ht, tbl, he, ht->p); } #ifdef CONFIG_PROVE_LOCKING -static void debug_dump_buckets(const struct rhashtable *ht, - const struct bucket_table *tbl) -{ - struct rhash_head *he; - unsigned int i, hash; - - for (i = 0; i < tbl->size; i++) { - pr_warn(" [Bucket %d] ", i); - rht_for_each_rcu(he, tbl, i) { - hash = head_hashfn(ht, tbl, he); - pr_cont("[hash = %#x, lock = %p] ", - hash, bucket_lock(tbl, hash)); - } - pr_cont("\n"); - } - -} - -static void debug_dump_table(struct rhashtable *ht, - const struct bucket_table *tbl, - unsigned int hash) -{ - struct bucket_table *old_tbl, *future_tbl; - - pr_emerg("BUG: lock for hash %#x in table %p not held\n", - hash, tbl); - - rcu_read_lock(); - future_tbl = rht_dereference_rcu(ht->future_tbl, ht); - old_tbl = rht_dereference_rcu(ht->tbl, ht); - if (future_tbl != old_tbl) { - pr_warn("Future table %p (size: %zd)\n", - future_tbl, future_tbl->size); - debug_dump_buckets(ht, future_tbl); - } - - pr_warn("Table %p (size: %zd)\n", old_tbl, old_tbl->size); - debug_dump_buckets(ht, old_tbl); - - rcu_read_unlock(); -} - #define ASSERT_RHT_MUTEX(HT) BUG_ON(!lockdep_rht_mutex_is_held(HT)) -#define ASSERT_BUCKET_LOCK(HT, TBL, HASH) \ - do { \ - if (unlikely(!lockdep_rht_bucket_is_held(TBL, HASH))) { \ - debug_dump_table(HT, TBL, HASH); \ - BUG(); \ - } \ - } while (0) int lockdep_rht_mutex_is_held(struct rhashtable *ht) { @@ -150,30 +48,18 @@ EXPORT_SYMBOL_GPL(lockdep_rht_mutex_is_held); int lockdep_rht_bucket_is_held(const struct bucket_table *tbl, u32 hash) { - spinlock_t *lock = bucket_lock(tbl, hash); + spinlock_t *lock = rht_bucket_lock(tbl, hash); return (debug_locks) ? lockdep_is_held(lock) : 1; } EXPORT_SYMBOL_GPL(lockdep_rht_bucket_is_held); #else #define ASSERT_RHT_MUTEX(HT) -#define ASSERT_BUCKET_LOCK(HT, TBL, HASH) #endif -static struct rhash_head __rcu **bucket_tail(struct bucket_table *tbl, u32 n) -{ - struct rhash_head __rcu **pprev; - - for (pprev = &tbl->buckets[n]; - !rht_is_a_nulls(rht_dereference_bucket(*pprev, tbl, n)); - pprev = &rht_dereference_bucket(*pprev, tbl, n)->next) - ; - - return pprev; -} - -static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl) +static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl, + gfp_t gfp) { unsigned int i, size; #if defined(CONFIG_PROVE_LOCKING) @@ -190,12 +76,13 @@ static int alloc_bucket_locks(struct rhashtable *ht, struct bucket_table *tbl) if (sizeof(spinlock_t) != 0) { #ifdef CONFIG_NUMA - if (size * sizeof(spinlock_t) > PAGE_SIZE) + if (size * sizeof(spinlock_t) > PAGE_SIZE && + gfp == GFP_KERNEL) tbl->locks = vmalloc(size * sizeof(spinlock_t)); else #endif tbl->locks = kmalloc_array(size, sizeof(spinlock_t), - GFP_KERNEL); + gfp); if (!tbl->locks) return -ENOMEM; for (i = 0; i < size; i++) @@ -214,155 +101,181 @@ static void bucket_table_free(const struct bucket_table *tbl) kvfree(tbl); } +static void bucket_table_free_rcu(struct rcu_head *head) +{ + bucket_table_free(container_of(head, struct bucket_table, rcu)); +} + static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, - size_t nbuckets) + size_t nbuckets, + gfp_t gfp) { - struct bucket_table *tbl; + struct bucket_table *tbl = NULL; size_t size; int i; size = sizeof(*tbl) + nbuckets * sizeof(tbl->buckets[0]); - tbl = kzalloc(size, GFP_KERNEL | __GFP_NOWARN); - if (tbl == NULL) + if (size <= (PAGE_SIZE << PAGE_ALLOC_COSTLY_ORDER) || + gfp != GFP_KERNEL) + tbl = kzalloc(size, gfp | __GFP_NOWARN | __GFP_NORETRY); + if (tbl == NULL && gfp == GFP_KERNEL) tbl = vzalloc(size); - if (tbl == NULL) return NULL; tbl->size = nbuckets; - if (alloc_bucket_locks(ht, tbl) < 0) { + if (alloc_bucket_locks(ht, tbl, gfp) < 0) { bucket_table_free(tbl); return NULL; } + INIT_LIST_HEAD(&tbl->walkers); + + get_random_bytes(&tbl->hash_rnd, sizeof(tbl->hash_rnd)); + for (i = 0; i < nbuckets; i++) INIT_RHT_NULLS_HEAD(tbl->buckets[i], ht, i); return tbl; } -/** - * rht_grow_above_75 - returns true if nelems > 0.75 * table-size - * @ht: hash table - * @new_size: new table size - */ -bool rht_grow_above_75(const struct rhashtable *ht, size_t new_size) +static struct bucket_table *rhashtable_last_table(struct rhashtable *ht, + struct bucket_table *tbl) { - /* Expand table when exceeding 75% load */ - return atomic_read(&ht->nelems) > (new_size / 4 * 3) && - (ht->p.max_shift && atomic_read(&ht->shift) < ht->p.max_shift); -} -EXPORT_SYMBOL_GPL(rht_grow_above_75); + struct bucket_table *new_tbl; -/** - * rht_shrink_below_30 - returns true if nelems < 0.3 * table-size - * @ht: hash table - * @new_size: new table size - */ -bool rht_shrink_below_30(const struct rhashtable *ht, size_t new_size) -{ - /* Shrink table beneath 30% load */ - return atomic_read(&ht->nelems) < (new_size * 3 / 10) && - (atomic_read(&ht->shift) > ht->p.min_shift); -} -EXPORT_SYMBOL_GPL(rht_shrink_below_30); + do { + new_tbl = tbl; + tbl = rht_dereference_rcu(tbl->future_tbl, ht); + } while (tbl); -static void lock_buckets(struct bucket_table *new_tbl, - struct bucket_table *old_tbl, unsigned int hash) - __acquires(old_bucket_lock) -{ - spin_lock_bh(bucket_lock(old_tbl, hash)); - if (new_tbl != old_tbl) - spin_lock_bh_nested(bucket_lock(new_tbl, hash), - RHT_LOCK_NESTED); + return new_tbl; } -static void unlock_buckets(struct bucket_table *new_tbl, - struct bucket_table *old_tbl, unsigned int hash) - __releases(old_bucket_lock) +static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash) { - if (new_tbl != old_tbl) - spin_unlock_bh(bucket_lock(new_tbl, hash)); - spin_unlock_bh(bucket_lock(old_tbl, hash)); + struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); + struct bucket_table *new_tbl = rhashtable_last_table(ht, + rht_dereference_rcu(old_tbl->future_tbl, ht)); + struct rhash_head __rcu **pprev = &old_tbl->buckets[old_hash]; + int err = -ENOENT; + struct rhash_head *head, *next, *entry; + spinlock_t *new_bucket_lock; + unsigned int new_hash; + + rht_for_each(entry, old_tbl, old_hash) { + err = 0; + next = rht_dereference_bucket(entry->next, old_tbl, old_hash); + + if (rht_is_a_nulls(next)) + break; + + pprev = &entry->next; + } + + if (err) + goto out; + + new_hash = head_hashfn(ht, new_tbl, entry); + + new_bucket_lock = rht_bucket_lock(new_tbl, new_hash); + + spin_lock_nested(new_bucket_lock, SINGLE_DEPTH_NESTING); + head = rht_dereference_bucket(new_tbl->buckets[new_hash], + new_tbl, new_hash); + + if (rht_is_a_nulls(head)) + INIT_RHT_NULLS_HEAD(entry->next, ht, new_hash); + else + RCU_INIT_POINTER(entry->next, head); + + rcu_assign_pointer(new_tbl->buckets[new_hash], entry); + spin_unlock(new_bucket_lock); + + rcu_assign_pointer(*pprev, next); + +out: + return err; } -/** - * Unlink entries on bucket which hash to different bucket. - * - * Returns true if no more work needs to be performed on the bucket. - */ -static bool hashtable_chain_unzip(struct rhashtable *ht, - const struct bucket_table *new_tbl, - struct bucket_table *old_tbl, - size_t old_hash) +static void rhashtable_rehash_chain(struct rhashtable *ht, + unsigned int old_hash) { - struct rhash_head *he, *p, *next; - unsigned int new_hash, new_hash2; + struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); + spinlock_t *old_bucket_lock; - ASSERT_BUCKET_LOCK(ht, old_tbl, old_hash); + old_bucket_lock = rht_bucket_lock(old_tbl, old_hash); - /* Old bucket empty, no work needed. */ - p = rht_dereference_bucket(old_tbl->buckets[old_hash], old_tbl, - old_hash); - if (rht_is_a_nulls(p)) - return false; - - new_hash = head_hashfn(ht, new_tbl, p); - ASSERT_BUCKET_LOCK(ht, new_tbl, new_hash); + spin_lock_bh(old_bucket_lock); + while (!rhashtable_rehash_one(ht, old_hash)) + ; + old_tbl->rehash++; + spin_unlock_bh(old_bucket_lock); +} - /* Advance the old bucket pointer one or more times until it - * reaches a node that doesn't hash to the same bucket as the - * previous node p. Call the previous node p; - */ - rht_for_each_continue(he, p->next, old_tbl, old_hash) { - new_hash2 = head_hashfn(ht, new_tbl, he); - ASSERT_BUCKET_LOCK(ht, new_tbl, new_hash2); +static int rhashtable_rehash_attach(struct rhashtable *ht, + struct bucket_table *old_tbl, + struct bucket_table *new_tbl) +{ + /* Protect future_tbl using the first bucket lock. */ + spin_lock_bh(old_tbl->locks); - if (new_hash != new_hash2) - break; - p = he; + /* Did somebody beat us to it? */ + if (rcu_access_pointer(old_tbl->future_tbl)) { + spin_unlock_bh(old_tbl->locks); + return -EEXIST; } - rcu_assign_pointer(old_tbl->buckets[old_hash], p->next); - /* Find the subsequent node which does hash to the same - * bucket as node P, or NULL if no such node exists. + /* Make insertions go into the new, empty table right away. Deletions + * and lookups will be attempted in both tables until we synchronize. */ - INIT_RHT_NULLS_HEAD(next, ht, old_hash); - if (!rht_is_a_nulls(he)) { - rht_for_each_continue(he, he->next, old_tbl, old_hash) { - if (head_hashfn(ht, new_tbl, he) == new_hash) { - next = he; - break; - } - } - } + rcu_assign_pointer(old_tbl->future_tbl, new_tbl); - /* Set p's next pointer to that subsequent node pointer, - * bypassing the nodes which do not hash to p's bucket - */ - rcu_assign_pointer(p->next, next); + /* Ensure the new table is visible to readers. */ + smp_wmb(); - p = rht_dereference_bucket(old_tbl->buckets[old_hash], old_tbl, - old_hash); + spin_unlock_bh(old_tbl->locks); - return !rht_is_a_nulls(p); + return 0; } -static void link_old_to_new(struct rhashtable *ht, struct bucket_table *new_tbl, - unsigned int new_hash, struct rhash_head *entry) +static int rhashtable_rehash_table(struct rhashtable *ht) { - ASSERT_BUCKET_LOCK(ht, new_tbl, new_hash); + struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); + struct bucket_table *new_tbl; + struct rhashtable_walker *walker; + unsigned int old_hash; + + new_tbl = rht_dereference(old_tbl->future_tbl, ht); + if (!new_tbl) + return 0; + + for (old_hash = 0; old_hash < old_tbl->size; old_hash++) + rhashtable_rehash_chain(ht, old_hash); + + /* Publish the new table pointer. */ + rcu_assign_pointer(ht->tbl, new_tbl); - rcu_assign_pointer(*bucket_tail(new_tbl, new_hash), entry); + spin_lock(&ht->lock); + list_for_each_entry(walker, &old_tbl->walkers, list) + walker->tbl = NULL; + spin_unlock(&ht->lock); + + /* Wait for readers. All new readers will see the new + * table, and thus no references to the old table will + * remain. + */ + call_rcu(&old_tbl->rcu, bucket_table_free_rcu); + + return rht_dereference(new_tbl->future_tbl, ht) ? -EAGAIN : 0; } /** * rhashtable_expand - Expand hash table while allowing concurrent lookups * @ht: the hash table to expand * - * A secondary bucket array is allocated and the hash entries are migrated - * while keeping them on both lists until the end of the RCU grace period. + * A secondary bucket array is allocated and the hash entries are migrated. * * This function may only be called in a context where it is safe to call * synchronize_rcu(), e.g. not within a rcu_read_lock() section. @@ -373,87 +286,32 @@ static void link_old_to_new(struct rhashtable *ht, struct bucket_table *new_tbl, * It is valid to have concurrent insertions and deletions protected by per * bucket locks or concurrent RCU protected lookups and traversals. */ -int rhashtable_expand(struct rhashtable *ht) +static int rhashtable_expand(struct rhashtable *ht) { struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht); - struct rhash_head *he; - unsigned int new_hash, old_hash; - bool complete = false; + int err; ASSERT_RHT_MUTEX(ht); - new_tbl = bucket_table_alloc(ht, old_tbl->size * 2); + old_tbl = rhashtable_last_table(ht, old_tbl); + + new_tbl = bucket_table_alloc(ht, old_tbl->size * 2, GFP_KERNEL); if (new_tbl == NULL) return -ENOMEM; - atomic_inc(&ht->shift); + err = rhashtable_rehash_attach(ht, old_tbl, new_tbl); + if (err) + bucket_table_free(new_tbl); - /* Make insertions go into the new, empty table right away. Deletions - * and lookups will be attempted in both tables until we synchronize. - * The synchronize_rcu() guarantees for the new table to be picked up - * so no new additions go into the old table while we relink. - */ - rcu_assign_pointer(ht->future_tbl, new_tbl); - synchronize_rcu(); - - /* For each new bucket, search the corresponding old bucket for the - * first entry that hashes to the new bucket, and link the end of - * newly formed bucket chain (containing entries added to future - * table) to that entry. Since all the entries which will end up in - * the new bucket appear in the same old bucket, this constructs an - * entirely valid new hash table, but with multiple buckets - * "zipped" together into a single imprecise chain. - */ - for (new_hash = 0; new_hash < new_tbl->size; new_hash++) { - old_hash = rht_bucket_index(old_tbl, new_hash); - lock_buckets(new_tbl, old_tbl, new_hash); - rht_for_each(he, old_tbl, old_hash) { - if (head_hashfn(ht, new_tbl, he) == new_hash) { - link_old_to_new(ht, new_tbl, new_hash, he); - break; - } - } - unlock_buckets(new_tbl, old_tbl, new_hash); - } - - /* Unzip interleaved hash chains */ - while (!complete && !ht->being_destroyed) { - /* Wait for readers. All new readers will see the new - * table, and thus no references to the old table will - * remain. - */ - synchronize_rcu(); - - /* For each bucket in the old table (each of which - * contains items from multiple buckets of the new - * table): ... - */ - complete = true; - for (old_hash = 0; old_hash < old_tbl->size; old_hash++) { - lock_buckets(new_tbl, old_tbl, old_hash); - - if (hashtable_chain_unzip(ht, new_tbl, old_tbl, - old_hash)) - complete = false; - - unlock_buckets(new_tbl, old_tbl, old_hash); - } - } - - rcu_assign_pointer(ht->tbl, new_tbl); - synchronize_rcu(); - - bucket_table_free(old_tbl); - return 0; + return err; } -EXPORT_SYMBOL_GPL(rhashtable_expand); /** * rhashtable_shrink - Shrink hash table while allowing concurrent lookups * @ht: the hash table to shrink * - * This function may only be called in a context where it is safe to call - * synchronize_rcu(), e.g. not within a rcu_read_lock() section. + * This function shrinks the hash table to fit, i.e., the smallest + * size would not cause it to expand right away automatically. * * The caller must ensure that no concurrent resizing occurs by holding * ht->mutex. @@ -464,403 +322,146 @@ EXPORT_SYMBOL_GPL(rhashtable_expand); * It is valid to have concurrent insertions and deletions protected by per * bucket locks or concurrent RCU protected lookups and traversals. */ -int rhashtable_shrink(struct rhashtable *ht) +static int rhashtable_shrink(struct rhashtable *ht) { - struct bucket_table *new_tbl, *tbl = rht_dereference(ht->tbl, ht); - unsigned int new_hash; + struct bucket_table *new_tbl, *old_tbl = rht_dereference(ht->tbl, ht); + unsigned int size; + int err; ASSERT_RHT_MUTEX(ht); - new_tbl = bucket_table_alloc(ht, tbl->size / 2); - if (new_tbl == NULL) - return -ENOMEM; - - rcu_assign_pointer(ht->future_tbl, new_tbl); - synchronize_rcu(); - - /* Link the first entry in the old bucket to the end of the - * bucket in the new table. As entries are concurrently being - * added to the new table, lock down the new bucket. As we - * always divide the size in half when shrinking, each bucket - * in the new table maps to exactly two buckets in the old - * table. - */ - for (new_hash = 0; new_hash < new_tbl->size; new_hash++) { - lock_buckets(new_tbl, tbl, new_hash); - - rcu_assign_pointer(*bucket_tail(new_tbl, new_hash), - tbl->buckets[new_hash]); - ASSERT_BUCKET_LOCK(ht, tbl, new_hash + new_tbl->size); - rcu_assign_pointer(*bucket_tail(new_tbl, new_hash), - tbl->buckets[new_hash + new_tbl->size]); + size = roundup_pow_of_two(atomic_read(&ht->nelems) * 3 / 2); + if (size < ht->p.min_size) + size = ht->p.min_size; - unlock_buckets(new_tbl, tbl, new_hash); - } + if (old_tbl->size <= size) + return 0; - /* Publish the new, valid hash table */ - rcu_assign_pointer(ht->tbl, new_tbl); - atomic_dec(&ht->shift); + if (rht_dereference(old_tbl->future_tbl, ht)) + return -EEXIST; - /* Wait for readers. No new readers will have references to the - * old hash table. - */ - synchronize_rcu(); + new_tbl = bucket_table_alloc(ht, size, GFP_KERNEL); + if (new_tbl == NULL) + return -ENOMEM; - bucket_table_free(tbl); + err = rhashtable_rehash_attach(ht, old_tbl, new_tbl); + if (err) + bucket_table_free(new_tbl); - return 0; + return err; } -EXPORT_SYMBOL_GPL(rhashtable_shrink); static void rht_deferred_worker(struct work_struct *work) { struct rhashtable *ht; struct bucket_table *tbl; - struct rhashtable_walker *walker; + int err = 0; ht = container_of(work, struct rhashtable, run_work); mutex_lock(&ht->mutex); - if (ht->being_destroyed) - goto unlock; tbl = rht_dereference(ht->tbl, ht); + tbl = rhashtable_last_table(ht, tbl); - list_for_each_entry(walker, &ht->walkers, list) - walker->resize = true; - - if (ht->p.grow_decision && ht->p.grow_decision(ht, tbl->size)) + if (rht_grow_above_75(ht, tbl)) rhashtable_expand(ht); - else if (ht->p.shrink_decision && ht->p.shrink_decision(ht, tbl->size)) + else if (ht->p.automatic_shrinking && rht_shrink_below_30(ht, tbl)) rhashtable_shrink(ht); -unlock: + err = rhashtable_rehash_table(ht); + mutex_unlock(&ht->mutex); -} -static void rhashtable_wakeup_worker(struct rhashtable *ht) -{ - struct bucket_table *tbl = rht_dereference_rcu(ht->tbl, ht); - struct bucket_table *new_tbl = rht_dereference_rcu(ht->future_tbl, ht); - size_t size = tbl->size; - - /* Only adjust the table if no resizing is currently in progress. */ - if (tbl == new_tbl && - ((ht->p.grow_decision && ht->p.grow_decision(ht, size)) || - (ht->p.shrink_decision && ht->p.shrink_decision(ht, size)))) + if (err) schedule_work(&ht->run_work); } -static void __rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj, - struct bucket_table *tbl, u32 hash) +static bool rhashtable_check_elasticity(struct rhashtable *ht, + struct bucket_table *tbl, + unsigned int hash) { + unsigned int elasticity = ht->elasticity; struct rhash_head *head; - hash = rht_bucket_index(tbl, hash); - head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); - - ASSERT_BUCKET_LOCK(ht, tbl, hash); - - if (rht_is_a_nulls(head)) - INIT_RHT_NULLS_HEAD(obj->next, ht, hash); - else - RCU_INIT_POINTER(obj->next, head); - - rcu_assign_pointer(tbl->buckets[hash], obj); + rht_for_each(head, tbl, hash) + if (!--elasticity) + return true; - atomic_inc(&ht->nelems); - - rhashtable_wakeup_worker(ht); + return false; } -/** - * rhashtable_insert - insert object into hash table - * @ht: hash table - * @obj: pointer to hash head inside object - * - * Will take a per bucket spinlock to protect against mutual mutations - * on the same bucket. Multiple insertions may occur in parallel unless - * they map to the same bucket lock. - * - * It is safe to call this function from atomic context. - * - * Will trigger an automatic deferred table resizing if the size grows - * beyond the watermark indicated by grow_decision() which can be passed - * to rhashtable_init(). - */ -void rhashtable_insert(struct rhashtable *ht, struct rhash_head *obj) +int rhashtable_insert_rehash(struct rhashtable *ht) { - struct bucket_table *tbl, *old_tbl; - unsigned hash; - - rcu_read_lock(); - - tbl = rht_dereference_rcu(ht->future_tbl, ht); - old_tbl = rht_dereference_rcu(ht->tbl, ht); - hash = obj_raw_hashfn(ht, rht_obj(ht, obj)); - - lock_buckets(tbl, old_tbl, hash); - __rhashtable_insert(ht, obj, tbl, hash); - unlock_buckets(tbl, old_tbl, hash); - - rcu_read_unlock(); -} -EXPORT_SYMBOL_GPL(rhashtable_insert); - -/** - * rhashtable_remove - remove object from hash table - * @ht: hash table - * @obj: pointer to hash head inside object - * - * Since the hash chain is single linked, the removal operation needs to - * walk the bucket chain upon removal. The removal operation is thus - * considerable slow if the hash table is not correctly sized. - * - * Will automatically shrink the table via rhashtable_expand() if the - * shrink_decision function specified at rhashtable_init() returns true. - * - * The caller must ensure that no concurrent table mutations occur. It is - * however valid to have concurrent lookups if they are RCU protected. - */ -bool rhashtable_remove(struct rhashtable *ht, struct rhash_head *obj) -{ - struct bucket_table *tbl, *new_tbl, *old_tbl; - struct rhash_head __rcu **pprev; - struct rhash_head *he, *he2; - unsigned int hash, new_hash; - bool ret = false; + struct bucket_table *old_tbl; + struct bucket_table *new_tbl; + struct bucket_table *tbl; + unsigned int size; + int err; - rcu_read_lock(); old_tbl = rht_dereference_rcu(ht->tbl, ht); - tbl = new_tbl = rht_dereference_rcu(ht->future_tbl, ht); - new_hash = obj_raw_hashfn(ht, rht_obj(ht, obj)); - - lock_buckets(new_tbl, old_tbl, new_hash); -restart: - hash = rht_bucket_index(tbl, new_hash); - pprev = &tbl->buckets[hash]; - rht_for_each(he, tbl, hash) { - if (he != obj) { - pprev = &he->next; - continue; - } - - ASSERT_BUCKET_LOCK(ht, tbl, hash); - - if (old_tbl->size > new_tbl->size && tbl == old_tbl && - !rht_is_a_nulls(obj->next) && - head_hashfn(ht, tbl, obj->next) != hash) { - rcu_assign_pointer(*pprev, (struct rhash_head *) rht_marker(ht, hash)); - } else if (unlikely(old_tbl->size < new_tbl->size && tbl == new_tbl)) { - rht_for_each_continue(he2, obj->next, tbl, hash) { - if (head_hashfn(ht, tbl, he2) == hash) { - rcu_assign_pointer(*pprev, he2); - goto found; - } - } - - rcu_assign_pointer(*pprev, (struct rhash_head *) rht_marker(ht, hash)); - } else { - rcu_assign_pointer(*pprev, obj->next); - } - -found: - ret = true; - break; - } + tbl = rhashtable_last_table(ht, old_tbl); - /* The entry may be linked in either 'tbl', 'future_tbl', or both. - * 'future_tbl' only exists for a short period of time during - * resizing. Thus traversing both is fine and the added cost is - * very rare. - */ - if (tbl != old_tbl) { - tbl = old_tbl; - goto restart; - } + size = tbl->size; - unlock_buckets(new_tbl, old_tbl, new_hash); - - if (ret) { - atomic_dec(&ht->nelems); - rhashtable_wakeup_worker(ht); - } - - rcu_read_unlock(); + if (rht_grow_above_75(ht, tbl)) + size *= 2; + /* More than two rehashes (not resizes) detected. */ + else if (WARN_ON(old_tbl != tbl && old_tbl->size == size)) + return -EBUSY; - return ret; -} -EXPORT_SYMBOL_GPL(rhashtable_remove); - -struct rhashtable_compare_arg { - struct rhashtable *ht; - const void *key; -}; - -static bool rhashtable_compare(void *ptr, void *arg) -{ - struct rhashtable_compare_arg *x = arg; - struct rhashtable *ht = x->ht; - - return !memcmp(ptr + ht->p.key_offset, x->key, ht->p.key_len); -} - -/** - * rhashtable_lookup - lookup key in hash table - * @ht: hash table - * @key: pointer to key - * - * Computes the hash value for the key and traverses the bucket chain looking - * for a entry with an identical key. The first matching entry is returned. - * - * This lookup function may only be used for fixed key hash table (key_len - * parameter set). It will BUG() if used inappropriately. - * - * Lookups may occur in parallel with hashtable mutations and resizing. - */ -void *rhashtable_lookup(struct rhashtable *ht, const void *key) -{ - struct rhashtable_compare_arg arg = { - .ht = ht, - .key = key, - }; - - BUG_ON(!ht->p.key_len); - - return rhashtable_lookup_compare(ht, key, &rhashtable_compare, &arg); -} -EXPORT_SYMBOL_GPL(rhashtable_lookup); - -/** - * rhashtable_lookup_compare - search hash table with compare function - * @ht: hash table - * @key: the pointer to the key - * @compare: compare function, must return true on match - * @arg: argument passed on to compare function - * - * Traverses the bucket chain behind the provided hash value and calls the - * specified compare function for each entry. - * - * Lookups may occur in parallel with hashtable mutations and resizing. - * - * Returns the first entry on which the compare function returned true. - */ -void *rhashtable_lookup_compare(struct rhashtable *ht, const void *key, - bool (*compare)(void *, void *), void *arg) -{ - const struct bucket_table *tbl, *old_tbl; - struct rhash_head *he; - u32 hash; - - rcu_read_lock(); - - old_tbl = rht_dereference_rcu(ht->tbl, ht); - tbl = rht_dereference_rcu(ht->future_tbl, ht); - hash = key_hashfn(ht, key, ht->p.key_len); -restart: - rht_for_each_rcu(he, tbl, rht_bucket_index(tbl, hash)) { - if (!compare(rht_obj(ht, he), arg)) - continue; - rcu_read_unlock(); - return rht_obj(ht, he); - } + new_tbl = bucket_table_alloc(ht, size, GFP_ATOMIC); + if (new_tbl == NULL) + return -ENOMEM; - if (unlikely(tbl != old_tbl)) { - tbl = old_tbl; - goto restart; - } - rcu_read_unlock(); + err = rhashtable_rehash_attach(ht, tbl, new_tbl); + if (err) { + bucket_table_free(new_tbl); + if (err == -EEXIST) + err = 0; + } else + schedule_work(&ht->run_work); - return NULL; + return err; } -EXPORT_SYMBOL_GPL(rhashtable_lookup_compare); +EXPORT_SYMBOL_GPL(rhashtable_insert_rehash); -/** - * rhashtable_lookup_insert - lookup and insert object into hash table - * @ht: hash table - * @obj: pointer to hash head inside object - * - * Locks down the bucket chain in both the old and new table if a resize - * is in progress to ensure that writers can't remove from the old table - * and can't insert to the new table during the atomic operation of search - * and insertion. Searches for duplicates in both the old and new table if - * a resize is in progress. - * - * This lookup function may only be used for fixed key hash table (key_len - * parameter set). It will BUG() if used inappropriately. - * - * It is safe to call this function from atomic context. - * - * Will trigger an automatic deferred table resizing if the size grows - * beyond the watermark indicated by grow_decision() which can be passed - * to rhashtable_init(). - */ -bool rhashtable_lookup_insert(struct rhashtable *ht, struct rhash_head *obj) +int rhashtable_insert_slow(struct rhashtable *ht, const void *key, + struct rhash_head *obj, + struct bucket_table *tbl) { - struct rhashtable_compare_arg arg = { - .ht = ht, - .key = rht_obj(ht, obj) + ht->p.key_offset, - }; + struct rhash_head *head; + unsigned int hash; + int err; - BUG_ON(!ht->p.key_len); + tbl = rhashtable_last_table(ht, tbl); + hash = head_hashfn(ht, tbl, obj); + spin_lock_nested(rht_bucket_lock(tbl, hash), SINGLE_DEPTH_NESTING); - return rhashtable_lookup_compare_insert(ht, obj, &rhashtable_compare, - &arg); -} -EXPORT_SYMBOL_GPL(rhashtable_lookup_insert); + err = -EEXIST; + if (key && rhashtable_lookup_fast(ht, key, ht->p)) + goto exit; -/** - * rhashtable_lookup_compare_insert - search and insert object to hash table - * with compare function - * @ht: hash table - * @obj: pointer to hash head inside object - * @compare: compare function, must return true on match - * @arg: argument passed on to compare function - * - * Locks down the bucket chain in both the old and new table if a resize - * is in progress to ensure that writers can't remove from the old table - * and can't insert to the new table during the atomic operation of search - * and insertion. Searches for duplicates in both the old and new table if - * a resize is in progress. - * - * Lookups may occur in parallel with hashtable mutations and resizing. - * - * Will trigger an automatic deferred table resizing if the size grows - * beyond the watermark indicated by grow_decision() which can be passed - * to rhashtable_init(). - */ -bool rhashtable_lookup_compare_insert(struct rhashtable *ht, - struct rhash_head *obj, - bool (*compare)(void *, void *), - void *arg) -{ - struct bucket_table *new_tbl, *old_tbl; - u32 new_hash; - bool success = true; + err = -EAGAIN; + if (rhashtable_check_elasticity(ht, tbl, hash) || + rht_grow_above_100(ht, tbl)) + goto exit; - BUG_ON(!ht->p.key_len); + err = 0; - rcu_read_lock(); - old_tbl = rht_dereference_rcu(ht->tbl, ht); - new_tbl = rht_dereference_rcu(ht->future_tbl, ht); - new_hash = obj_raw_hashfn(ht, rht_obj(ht, obj)); + head = rht_dereference_bucket(tbl->buckets[hash], tbl, hash); - lock_buckets(new_tbl, old_tbl, new_hash); + RCU_INIT_POINTER(obj->next, head); - if (rhashtable_lookup_compare(ht, rht_obj(ht, obj) + ht->p.key_offset, - compare, arg)) { - success = false; - goto exit; - } + rcu_assign_pointer(tbl->buckets[hash], obj); - __rhashtable_insert(ht, obj, new_tbl, new_hash); + atomic_inc(&ht->nelems); exit: - unlock_buckets(new_tbl, old_tbl, new_hash); - rcu_read_unlock(); + spin_unlock(rht_bucket_lock(tbl, hash)); - return success; + return err; } -EXPORT_SYMBOL_GPL(rhashtable_lookup_compare_insert); +EXPORT_SYMBOL_GPL(rhashtable_insert_slow); /** * rhashtable_walk_init - Initialise an iterator @@ -895,7 +496,8 @@ int rhashtable_walk_init(struct rhashtable *ht, struct rhashtable_iter *iter) return -ENOMEM; mutex_lock(&ht->mutex); - list_add(&iter->walker->list, &ht->walkers); + iter->walker->tbl = rht_dereference(ht->tbl, ht); + list_add(&iter->walker->list, &iter->walker->tbl->walkers); mutex_unlock(&ht->mutex); return 0; @@ -911,7 +513,8 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_init); void rhashtable_walk_exit(struct rhashtable_iter *iter) { mutex_lock(&iter->ht->mutex); - list_del(&iter->walker->list); + if (iter->walker->tbl) + list_del(&iter->walker->list); mutex_unlock(&iter->ht->mutex); kfree(iter->walker); } @@ -932,13 +535,21 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_exit); * by calling rhashtable_walk_next. */ int rhashtable_walk_start(struct rhashtable_iter *iter) + __acquires(RCU) { + struct rhashtable *ht = iter->ht; + + mutex_lock(&ht->mutex); + + if (iter->walker->tbl) + list_del(&iter->walker->list); + rcu_read_lock(); - if (iter->walker->resize) { - iter->slot = 0; - iter->skip = 0; - iter->walker->resize = false; + mutex_unlock(&ht->mutex); + + if (!iter->walker->tbl) { + iter->walker->tbl = rht_dereference_rcu(ht->tbl, ht); return -EAGAIN; } @@ -960,13 +571,11 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_start); */ void *rhashtable_walk_next(struct rhashtable_iter *iter) { - const struct bucket_table *tbl; + struct bucket_table *tbl = iter->walker->tbl; struct rhashtable *ht = iter->ht; struct rhash_head *p = iter->p; void *obj = NULL; - tbl = rht_dereference_rcu(ht->tbl, ht); - if (p) { p = rht_dereference_bucket_rcu(p->next, tbl, iter->slot); goto next; @@ -992,17 +601,20 @@ next: iter->skip = 0; } - iter->p = NULL; + /* Ensure we see any new tables. */ + smp_rmb(); -out: - if (iter->walker->resize) { - iter->p = NULL; + iter->walker->tbl = rht_dereference_rcu(tbl->future_tbl, ht); + if (iter->walker->tbl) { iter->slot = 0; iter->skip = 0; - iter->walker->resize = false; return ERR_PTR(-EAGAIN); } + iter->p = NULL; + +out: + return obj; } EXPORT_SYMBOL_GPL(rhashtable_walk_next); @@ -1014,16 +626,39 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_next); * Finish a hash table walk. */ void rhashtable_walk_stop(struct rhashtable_iter *iter) + __releases(RCU) { - rcu_read_unlock(); + struct rhashtable *ht; + struct bucket_table *tbl = iter->walker->tbl; + + if (!tbl) + goto out; + + ht = iter->ht; + + spin_lock(&ht->lock); + if (tbl->rehash < tbl->size) + list_add(&iter->walker->list, &tbl->walkers); + else + iter->walker->tbl = NULL; + spin_unlock(&ht->lock); + iter->p = NULL; + +out: + rcu_read_unlock(); } EXPORT_SYMBOL_GPL(rhashtable_walk_stop); -static size_t rounded_hashtable_size(struct rhashtable_params *params) +static size_t rounded_hashtable_size(const struct rhashtable_params *params) { return max(roundup_pow_of_two(params->nelem_hint * 4 / 3), - 1UL << params->min_shift); + (unsigned long)params->min_size); +} + +static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed) +{ + return jhash2(key, length, seed); } /** @@ -1056,7 +691,7 @@ static size_t rounded_hashtable_size(struct rhashtable_params *params) * struct rhash_head node; * }; * - * u32 my_hash_fn(const void *data, u32 seed) + * u32 my_hash_fn(const void *data, u32 len, u32 seed) * { * struct test_obj *obj = data; * @@ -1069,72 +704,129 @@ static size_t rounded_hashtable_size(struct rhashtable_params *params) * .obj_hashfn = my_hash_fn, * }; */ -int rhashtable_init(struct rhashtable *ht, struct rhashtable_params *params) +int rhashtable_init(struct rhashtable *ht, + const struct rhashtable_params *params) { struct bucket_table *tbl; size_t size; size = HASH_DEFAULT_SIZE; - if ((params->key_len && !params->hashfn) || - (!params->key_len && !params->obj_hashfn)) + if ((!params->key_len && !params->obj_hashfn) || + (params->obj_hashfn && !params->obj_cmpfn)) return -EINVAL; if (params->nulls_base && params->nulls_base < (1U << RHT_BASE_SHIFT)) return -EINVAL; - params->min_shift = max_t(size_t, params->min_shift, - ilog2(HASH_MIN_SIZE)); - if (params->nelem_hint) size = rounded_hashtable_size(params); memset(ht, 0, sizeof(*ht)); mutex_init(&ht->mutex); + spin_lock_init(&ht->lock); memcpy(&ht->p, params, sizeof(*params)); - INIT_LIST_HEAD(&ht->walkers); + + if (params->min_size) + ht->p.min_size = roundup_pow_of_two(params->min_size); + + if (params->max_size) + ht->p.max_size = rounddown_pow_of_two(params->max_size); + + ht->p.min_size = max(ht->p.min_size, HASH_MIN_SIZE); + + /* The maximum (not average) chain length grows with the + * size of the hash table, at a rate of (log N)/(log log N). + * The value of 16 is selected so that even if the hash + * table grew to 2^32 you would not expect the maximum + * chain length to exceed it unless we are under attack + * (or extremely unlucky). + * + * As this limit is only to detect attacks, we don't need + * to set it to a lower value as you'd need the chain + * length to vastly exceed 16 to have any real effect + * on the system. + */ + if (!params->insecure_elasticity) + ht->elasticity = 16; if (params->locks_mul) ht->p.locks_mul = roundup_pow_of_two(params->locks_mul); else ht->p.locks_mul = BUCKET_LOCKS_PER_CPU; - tbl = bucket_table_alloc(ht, size); + ht->key_len = ht->p.key_len; + if (!params->hashfn) { + ht->p.hashfn = jhash; + + if (!(ht->key_len & (sizeof(u32) - 1))) { + ht->key_len /= sizeof(u32); + ht->p.hashfn = rhashtable_jhash2; + } + } + + tbl = bucket_table_alloc(ht, size, GFP_KERNEL); if (tbl == NULL) return -ENOMEM; atomic_set(&ht->nelems, 0); - atomic_set(&ht->shift, ilog2(tbl->size)); - RCU_INIT_POINTER(ht->tbl, tbl); - RCU_INIT_POINTER(ht->future_tbl, tbl); - if (!ht->p.hash_rnd) - get_random_bytes(&ht->p.hash_rnd, sizeof(ht->p.hash_rnd)); + RCU_INIT_POINTER(ht->tbl, tbl); - if (ht->p.grow_decision || ht->p.shrink_decision) - INIT_WORK(&ht->run_work, rht_deferred_worker); + INIT_WORK(&ht->run_work, rht_deferred_worker); return 0; } EXPORT_SYMBOL_GPL(rhashtable_init); /** - * rhashtable_destroy - destroy hash table + * rhashtable_free_and_destroy - free elements and destroy hash table * @ht: the hash table to destroy + * @free_fn: callback to release resources of element + * @arg: pointer passed to free_fn + * + * Stops an eventual async resize. If defined, invokes free_fn for each + * element to releasal resources. Please note that RCU protected + * readers may still be accessing the elements. Releasing of resources + * must occur in a compatible manner. Then frees the bucket array. * - * Frees the bucket array. This function is not rcu safe, therefore the caller - * has to make sure that no resizing may happen by unpublishing the hashtable - * and waiting for the quiescent cycle before releasing the bucket array. + * This function will eventually sleep to wait for an async resize + * to complete. The caller is responsible that no further write operations + * occurs in parallel. */ -void rhashtable_destroy(struct rhashtable *ht) +void rhashtable_free_and_destroy(struct rhashtable *ht, + void (*free_fn)(void *ptr, void *arg), + void *arg) { - ht->being_destroyed = true; + const struct bucket_table *tbl; + unsigned int i; - if (ht->p.grow_decision || ht->p.shrink_decision) - cancel_work_sync(&ht->run_work); + cancel_work_sync(&ht->run_work); mutex_lock(&ht->mutex); - bucket_table_free(rht_dereference(ht->tbl, ht)); + tbl = rht_dereference(ht->tbl, ht); + if (free_fn) { + for (i = 0; i < tbl->size; i++) { + struct rhash_head *pos, *next; + + for (pos = rht_dereference(tbl->buckets[i], ht), + next = !rht_is_a_nulls(pos) ? + rht_dereference(pos->next, ht) : NULL; + !rht_is_a_nulls(pos); + pos = next, + next = !rht_is_a_nulls(pos) ? + rht_dereference(pos->next, ht) : NULL) + free_fn(rht_obj(ht, pos), arg); + } + } + + bucket_table_free(tbl); mutex_unlock(&ht->mutex); } +EXPORT_SYMBOL_GPL(rhashtable_free_and_destroy); + +void rhashtable_destroy(struct rhashtable *ht) +{ + return rhashtable_free_and_destroy(ht, NULL, NULL); +} EXPORT_SYMBOL_GPL(rhashtable_destroy); diff --git a/lib/seq_buf.c b/lib/seq_buf.c index 88c0854bd752..5c94e1012a91 100644 --- a/lib/seq_buf.c +++ b/lib/seq_buf.c @@ -61,7 +61,7 @@ int seq_buf_vprintf(struct seq_buf *s, const char *fmt, va_list args) if (s->len < s->size) { len = vsnprintf(s->buffer + s->len, s->size - s->len, fmt, args); - if (seq_buf_can_fit(s, len)) { + if (s->len + len < s->size) { s->len += len; return 0; } @@ -118,7 +118,7 @@ int seq_buf_bprintf(struct seq_buf *s, const char *fmt, const u32 *binary) if (s->len < s->size) { ret = bstr_printf(s->buffer + s->len, len, fmt, binary); - if (seq_buf_can_fit(s, ret)) { + if (s->len + ret < s->size) { s->len += ret; return 0; } diff --git a/lib/sha1.c b/lib/sha1.c index 1df191e04a24..5a56dfd7b99d 100644 --- a/lib/sha1.c +++ b/lib/sha1.c @@ -198,3 +198,4 @@ void sha_init(__u32 *buf) buf[3] = 0x10325476; buf[4] = 0xc3d2e1f0; } +EXPORT_SYMBOL(sha_init); diff --git a/lib/string.c b/lib/string.c index ce81aaec3839..a5792019193c 100644 --- a/lib/string.c +++ b/lib/string.c @@ -607,7 +607,7 @@ EXPORT_SYMBOL(memset); void memzero_explicit(void *s, size_t count) { memset(s, 0, count); - OPTIMIZER_HIDE_VAR(s); + barrier(); } EXPORT_SYMBOL(memzero_explicit); diff --git a/lib/string_helpers.c b/lib/string_helpers.c index 8f8c4417f228..c98ae818eb4e 100644 --- a/lib/string_helpers.c +++ b/lib/string_helpers.c @@ -4,6 +4,7 @@ * Copyright 31 August 2008 James Bottomley * Copyright (C) 2013, Intel Corporation */ +#include <linux/bug.h> #include <linux/kernel.h> #include <linux/math64.h> #include <linux/export.h> @@ -14,7 +15,8 @@ /** * string_get_size - get the size in the specified units - * @size: The size to be converted + * @size: The size to be converted in blocks + * @blk_size: Size of the block (use 1 for size in bytes) * @units: units to use (powers of 1000 or 1024) * @buf: buffer to format to * @len: length of buffer @@ -24,14 +26,14 @@ * at least 9 bytes and will always be zero terminated. * */ -void string_get_size(u64 size, const enum string_size_units units, +void string_get_size(u64 size, u64 blk_size, const enum string_size_units units, char *buf, int len) { static const char *const units_10[] = { - "B", "kB", "MB", "GB", "TB", "PB", "EB" + "B", "kB", "MB", "GB", "TB", "PB", "EB", "ZB", "YB" }; static const char *const units_2[] = { - "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB" + "B", "KiB", "MiB", "GiB", "TiB", "PiB", "EiB", "ZiB", "YiB" }; static const char *const *const units_str[] = { [STRING_UNITS_10] = units_10, @@ -42,31 +44,57 @@ void string_get_size(u64 size, const enum string_size_units units, [STRING_UNITS_2] = 1024, }; int i, j; - u32 remainder = 0, sf_cap; + u32 remainder = 0, sf_cap, exp; char tmp[8]; + const char *unit; tmp[0] = '\0'; i = 0; - if (size >= divisor[units]) { - while (size >= divisor[units]) { - remainder = do_div(size, divisor[units]); - i++; - } + if (!size) + goto out; - sf_cap = size; - for (j = 0; sf_cap*10 < 1000; j++) - sf_cap *= 10; + while (blk_size >= divisor[units]) { + remainder = do_div(blk_size, divisor[units]); + i++; + } - if (j) { - remainder *= 1000; - remainder /= divisor[units]; - snprintf(tmp, sizeof(tmp), ".%03u", remainder); - tmp[j+1] = '\0'; - } + exp = divisor[units] / (u32)blk_size; + if (size >= exp) { + remainder = do_div(size, divisor[units]); + remainder *= blk_size; + i++; + } else { + remainder *= size; + } + + size *= blk_size; + size += remainder / divisor[units]; + remainder %= divisor[units]; + + while (size >= divisor[units]) { + remainder = do_div(size, divisor[units]); + i++; } + sf_cap = size; + for (j = 0; sf_cap*10 < 1000; j++) + sf_cap *= 10; + + if (j) { + remainder *= 1000; + remainder /= divisor[units]; + snprintf(tmp, sizeof(tmp), ".%03u", remainder); + tmp[j+1] = '\0'; + } + + out: + if (i >= ARRAY_SIZE(units_2)) + unit = "UNK"; + else + unit = units_str[units][i]; + snprintf(buf, len, "%u%s %s", (u32)size, - tmp, units_str[units][i]); + tmp, unit); } EXPORT_SYMBOL(string_get_size); @@ -239,29 +267,21 @@ int string_unescape(char *src, char *dst, size_t size, unsigned int flags) } EXPORT_SYMBOL(string_unescape); -static int escape_passthrough(unsigned char c, char **dst, size_t *osz) +static bool escape_passthrough(unsigned char c, char **dst, char *end) { char *out = *dst; - if (*osz < 1) - return -ENOMEM; - - *out++ = c; - - *dst = out; - *osz -= 1; - - return 1; + if (out < end) + *out = c; + *dst = out + 1; + return true; } -static int escape_space(unsigned char c, char **dst, size_t *osz) +static bool escape_space(unsigned char c, char **dst, char *end) { char *out = *dst; unsigned char to; - if (*osz < 2) - return -ENOMEM; - switch (c) { case '\n': to = 'n'; @@ -279,26 +299,25 @@ static int escape_space(unsigned char c, char **dst, size_t *osz) to = 'f'; break; default: - return 0; + return false; } - *out++ = '\\'; - *out++ = to; + if (out < end) + *out = '\\'; + ++out; + if (out < end) + *out = to; + ++out; *dst = out; - *osz -= 2; - - return 1; + return true; } -static int escape_special(unsigned char c, char **dst, size_t *osz) +static bool escape_special(unsigned char c, char **dst, char *end) { char *out = *dst; unsigned char to; - if (*osz < 2) - return -ENOMEM; - switch (c) { case '\\': to = '\\'; @@ -310,71 +329,78 @@ static int escape_special(unsigned char c, char **dst, size_t *osz) to = 'e'; break; default: - return 0; + return false; } - *out++ = '\\'; - *out++ = to; + if (out < end) + *out = '\\'; + ++out; + if (out < end) + *out = to; + ++out; *dst = out; - *osz -= 2; - - return 1; + return true; } -static int escape_null(unsigned char c, char **dst, size_t *osz) +static bool escape_null(unsigned char c, char **dst, char *end) { char *out = *dst; - if (*osz < 2) - return -ENOMEM; - if (c) - return 0; + return false; - *out++ = '\\'; - *out++ = '0'; + if (out < end) + *out = '\\'; + ++out; + if (out < end) + *out = '0'; + ++out; *dst = out; - *osz -= 2; - - return 1; + return true; } -static int escape_octal(unsigned char c, char **dst, size_t *osz) +static bool escape_octal(unsigned char c, char **dst, char *end) { char *out = *dst; - if (*osz < 4) - return -ENOMEM; - - *out++ = '\\'; - *out++ = ((c >> 6) & 0x07) + '0'; - *out++ = ((c >> 3) & 0x07) + '0'; - *out++ = ((c >> 0) & 0x07) + '0'; + if (out < end) + *out = '\\'; + ++out; + if (out < end) + *out = ((c >> 6) & 0x07) + '0'; + ++out; + if (out < end) + *out = ((c >> 3) & 0x07) + '0'; + ++out; + if (out < end) + *out = ((c >> 0) & 0x07) + '0'; + ++out; *dst = out; - *osz -= 4; - - return 1; + return true; } -static int escape_hex(unsigned char c, char **dst, size_t *osz) +static bool escape_hex(unsigned char c, char **dst, char *end) { char *out = *dst; - if (*osz < 4) - return -ENOMEM; - - *out++ = '\\'; - *out++ = 'x'; - *out++ = hex_asc_hi(c); - *out++ = hex_asc_lo(c); + if (out < end) + *out = '\\'; + ++out; + if (out < end) + *out = 'x'; + ++out; + if (out < end) + *out = hex_asc_hi(c); + ++out; + if (out < end) + *out = hex_asc_lo(c); + ++out; *dst = out; - *osz -= 4; - - return 1; + return true; } /** @@ -426,19 +452,17 @@ static int escape_hex(unsigned char c, char **dst, size_t *osz) * it if needs. * * Return: - * The amount of the characters processed to the destination buffer, or - * %-ENOMEM if the size of buffer is not enough to put an escaped character is - * returned. - * - * Even in the case of error @dst pointer will be updated to point to the byte - * after the last processed character. + * The total size of the escaped output that would be generated for + * the given input and flags. To check whether the output was + * truncated, compare the return value to osz. There is room left in + * dst for a '\0' terminator if and only if ret < osz. */ -int string_escape_mem(const char *src, size_t isz, char **dst, size_t osz, +int string_escape_mem(const char *src, size_t isz, char *dst, size_t osz, unsigned int flags, const char *esc) { - char *out = *dst, *p = out; + char *p = dst; + char *end = p + osz; bool is_dict = esc && *esc; - int ret = 0; while (isz--) { unsigned char c = *src++; @@ -458,55 +482,26 @@ int string_escape_mem(const char *src, size_t isz, char **dst, size_t osz, (is_dict && !strchr(esc, c))) { /* do nothing */ } else { - if (flags & ESCAPE_SPACE) { - ret = escape_space(c, &p, &osz); - if (ret < 0) - break; - if (ret > 0) - continue; - } - - if (flags & ESCAPE_SPECIAL) { - ret = escape_special(c, &p, &osz); - if (ret < 0) - break; - if (ret > 0) - continue; - } - - if (flags & ESCAPE_NULL) { - ret = escape_null(c, &p, &osz); - if (ret < 0) - break; - if (ret > 0) - continue; - } + if (flags & ESCAPE_SPACE && escape_space(c, &p, end)) + continue; + + if (flags & ESCAPE_SPECIAL && escape_special(c, &p, end)) + continue; + + if (flags & ESCAPE_NULL && escape_null(c, &p, end)) + continue; /* ESCAPE_OCTAL and ESCAPE_HEX always go last */ - if (flags & ESCAPE_OCTAL) { - ret = escape_octal(c, &p, &osz); - if (ret < 0) - break; + if (flags & ESCAPE_OCTAL && escape_octal(c, &p, end)) continue; - } - if (flags & ESCAPE_HEX) { - ret = escape_hex(c, &p, &osz); - if (ret < 0) - break; + + if (flags & ESCAPE_HEX && escape_hex(c, &p, end)) continue; - } } - ret = escape_passthrough(c, &p, &osz); - if (ret < 0) - break; + escape_passthrough(c, &p, end); } - *dst = p; - - if (ret < 0) - return ret; - - return p - out; + return p - dst; } EXPORT_SYMBOL(string_escape_mem); diff --git a/lib/test-hexdump.c b/lib/test-hexdump.c index daf29a390a89..c227cc43ec0a 100644 --- a/lib/test-hexdump.c +++ b/lib/test-hexdump.c @@ -18,26 +18,26 @@ static const unsigned char data_b[] = { static const unsigned char data_a[] = ".2.{....p..$}.4...1.....L...C..."; -static const char *test_data_1_le[] __initconst = { +static const char * const test_data_1_le[] __initconst = { "be", "32", "db", "7b", "0a", "18", "93", "b2", "70", "ba", "c4", "24", "7d", "83", "34", "9b", "a6", "9c", "31", "ad", "9c", "0f", "ac", "e9", "4c", "d1", "19", "99", "43", "b1", "af", "0c", }; -static const char *test_data_2_le[] __initconst = { +static const char *test_data_2_le[] __initdata = { "32be", "7bdb", "180a", "b293", "ba70", "24c4", "837d", "9b34", "9ca6", "ad31", "0f9c", "e9ac", "d14c", "9919", "b143", "0caf", }; -static const char *test_data_4_le[] __initconst = { +static const char *test_data_4_le[] __initdata = { "7bdb32be", "b293180a", "24c4ba70", "9b34837d", "ad319ca6", "e9ac0f9c", "9919d14c", "0cafb143", }; -static const char *test_data_8_le[] __initconst = { +static const char *test_data_8_le[] __initdata = { "b293180a7bdb32be", "9b34837d24c4ba70", "e9ac0f9cad319ca6", "0cafb1439919d14c", }; @@ -48,7 +48,7 @@ static void __init test_hexdump(size_t len, int rowsize, int groupsize, char test[32 * 3 + 2 + 32 + 1]; char real[32 * 3 + 2 + 32 + 1]; char *p; - const char **result; + const char * const *result; size_t l = len; int gs = groupsize, rs = rowsize; unsigned int i; diff --git a/lib/test-string_helpers.c b/lib/test-string_helpers.c index ab0d30e1e18f..8e376efd88a4 100644 --- a/lib/test-string_helpers.c +++ b/lib/test-string_helpers.c @@ -260,16 +260,28 @@ static __init const char *test_string_find_match(const struct test_string_2 *s2, return NULL; } +static __init void +test_string_escape_overflow(const char *in, int p, unsigned int flags, const char *esc, + int q_test, const char *name) +{ + int q_real; + + q_real = string_escape_mem(in, p, NULL, 0, flags, esc); + if (q_real != q_test) + pr_warn("Test '%s' failed: flags = %u, osz = 0, expected %d, got %d\n", + name, flags, q_test, q_real); +} + static __init void test_string_escape(const char *name, const struct test_string_2 *s2, unsigned int flags, const char *esc) { - int q_real = 512; - char *out_test = kmalloc(q_real, GFP_KERNEL); - char *out_real = kmalloc(q_real, GFP_KERNEL); + size_t out_size = 512; + char *out_test = kmalloc(out_size, GFP_KERNEL); + char *out_real = kmalloc(out_size, GFP_KERNEL); char *in = kmalloc(256, GFP_KERNEL); - char *buf = out_real; int p = 0, q_test = 0; + int q_real; if (!out_test || !out_real || !in) goto out; @@ -301,29 +313,19 @@ static __init void test_string_escape(const char *name, q_test += len; } - q_real = string_escape_mem(in, p, &buf, q_real, flags, esc); + q_real = string_escape_mem(in, p, out_real, out_size, flags, esc); test_string_check_buf(name, flags, in, p, out_real, q_real, out_test, q_test); + + test_string_escape_overflow(in, p, flags, esc, q_test, name); + out: kfree(in); kfree(out_real); kfree(out_test); } -static __init void test_string_escape_nomem(void) -{ - char *in = "\eb \\C\007\"\x90\r]"; - char out[64], *buf = out; - int rc = -ENOMEM, ret; - - ret = string_escape_str_any_np(in, &buf, strlen(in), NULL); - if (ret == rc) - return; - - pr_err("Test 'escape nomem' failed: got %d instead of %d\n", ret, rc); -} - static int __init test_string_helpers_init(void) { unsigned int i; @@ -342,8 +344,6 @@ static int __init test_string_helpers_init(void) for (i = 0; i < (ESCAPE_ANY_NP | ESCAPE_HEX) + 1; i++) test_string_escape("escape 1", escape1, i, TEST_STRING_2_DICT_1); - test_string_escape_nomem(); - return -EINVAL; } module_init(test_string_helpers_init); diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index 1dfeba73fc74..b2957540d3c7 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -38,6 +38,15 @@ struct test_obj { struct rhash_head node; }; +static const struct rhashtable_params test_rht_params = { + .nelem_hint = TEST_HT_SIZE, + .head_offset = offsetof(struct test_obj, node), + .key_offset = offsetof(struct test_obj, value), + .key_len = sizeof(int), + .hashfn = jhash, + .nulls_base = (3U << RHT_BASE_SHIFT), +}; + static int __init test_rht_lookup(struct rhashtable *ht) { unsigned int i; @@ -47,7 +56,7 @@ static int __init test_rht_lookup(struct rhashtable *ht) bool expected = !(i % 2); u32 key = i; - obj = rhashtable_lookup(ht, &key); + obj = rhashtable_lookup_fast(ht, &key, test_rht_params); if (expected && !obj) { pr_warn("Test failed: Could not find key %u\n", key); @@ -80,7 +89,7 @@ static void test_bucket_stats(struct rhashtable *ht, bool quiet) rcu_cnt = cnt = 0; if (!quiet) - pr_info(" [%#4x/%zu]", i, tbl->size); + pr_info(" [%#4x/%u]", i, tbl->size); rht_for_each_entry_rcu(obj, pos, tbl, i, node) { cnt++; @@ -133,7 +142,11 @@ static int __init test_rhashtable(struct rhashtable *ht) obj->ptr = TEST_PTR; obj->value = i * 2; - rhashtable_insert(ht, &obj->node); + err = rhashtable_insert_fast(ht, &obj->node, test_rht_params); + if (err) { + kfree(obj); + goto error; + } } rcu_read_lock(); @@ -141,30 +154,6 @@ static int __init test_rhashtable(struct rhashtable *ht) test_rht_lookup(ht); rcu_read_unlock(); - for (i = 0; i < TEST_NEXPANDS; i++) { - pr_info(" Table expansion iteration %u...\n", i); - mutex_lock(&ht->mutex); - rhashtable_expand(ht); - mutex_unlock(&ht->mutex); - - rcu_read_lock(); - pr_info(" Verifying lookups...\n"); - test_rht_lookup(ht); - rcu_read_unlock(); - } - - for (i = 0; i < TEST_NEXPANDS; i++) { - pr_info(" Table shrinkage iteration %u...\n", i); - mutex_lock(&ht->mutex); - rhashtable_shrink(ht); - mutex_unlock(&ht->mutex); - - rcu_read_lock(); - pr_info(" Verifying lookups...\n"); - test_rht_lookup(ht); - rcu_read_unlock(); - } - rcu_read_lock(); test_bucket_stats(ht, true); rcu_read_unlock(); @@ -173,10 +162,10 @@ static int __init test_rhashtable(struct rhashtable *ht) for (i = 0; i < TEST_ENTRIES; i++) { u32 key = i * 2; - obj = rhashtable_lookup(ht, &key); + obj = rhashtable_lookup_fast(ht, &key, test_rht_params); BUG_ON(!obj); - rhashtable_remove(ht, &obj->node); + rhashtable_remove_fast(ht, &obj->node, test_rht_params); kfree(obj); } @@ -191,24 +180,15 @@ error: return err; } +static struct rhashtable ht; + static int __init test_rht_init(void) { - struct rhashtable ht; - struct rhashtable_params params = { - .nelem_hint = TEST_HT_SIZE, - .head_offset = offsetof(struct test_obj, node), - .key_offset = offsetof(struct test_obj, value), - .key_len = sizeof(int), - .hashfn = jhash, - .nulls_base = (3U << RHT_BASE_SHIFT), - .grow_decision = rht_grow_above_75, - .shrink_decision = rht_shrink_below_30, - }; int err; pr_info("Running resizable hashtable tests...\n"); - err = rhashtable_init(&ht, ¶ms); + err = rhashtable_init(&ht, &test_rht_params); if (err < 0) { pr_warn("Test failed: Unable to initialize hashtable: %d\n", err); @@ -222,6 +202,11 @@ static int __init test_rht_init(void) return err; } +static void __exit test_rht_exit(void) +{ +} + module_init(test_rht_init); +module_exit(test_rht_exit); MODULE_LICENSE("GPL v2"); diff --git a/lib/vsprintf.c b/lib/vsprintf.c index b235c96167d3..da39c608a28c 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -17,6 +17,7 @@ */ #include <stdarg.h> +#include <linux/clk-provider.h> #include <linux/module.h> /* for KSYM_SYMBOL_LEN */ #include <linux/types.h> #include <linux/string.h> @@ -32,6 +33,7 @@ #include <asm/page.h> /* for PAGE_SIZE */ #include <asm/sections.h> /* for dereference_function_descriptor() */ +#include <asm/byteorder.h> /* cpu_to_le16 */ #include <linux/string_helpers.h> #include "kstrtox.h" @@ -121,142 +123,145 @@ int skip_atoi(const char **s) return i; } -/* Decimal conversion is by far the most typical, and is used - * for /proc and /sys data. This directly impacts e.g. top performance - * with many processes running. We optimize it for speed - * using ideas described at <http://www.cs.uiowa.edu/~jones/bcd/divide.html> - * (with permission from the author, Douglas W. Jones). +/* + * Decimal conversion is by far the most typical, and is used for + * /proc and /sys data. This directly impacts e.g. top performance + * with many processes running. We optimize it for speed by emitting + * two characters at a time, using a 200 byte lookup table. This + * roughly halves the number of multiplications compared to computing + * the digits one at a time. Implementation strongly inspired by the + * previous version, which in turn used ideas described at + * <http://www.cs.uiowa.edu/~jones/bcd/divide.html> (with permission + * from the author, Douglas W. Jones). + * + * It turns out there is precisely one 26 bit fixed-point + * approximation a of 64/100 for which x/100 == (x * (u64)a) >> 32 + * holds for all x in [0, 10^8-1], namely a = 0x28f5c29. The actual + * range happens to be somewhat larger (x <= 1073741898), but that's + * irrelevant for our purpose. + * + * For dividing a number in the range [10^4, 10^6-1] by 100, we still + * need a 32x32->64 bit multiply, so we simply use the same constant. + * + * For dividing a number in the range [100, 10^4-1] by 100, there are + * several options. The simplest is (x * 0x147b) >> 19, which is valid + * for all x <= 43698. */ -#if BITS_PER_LONG != 32 || BITS_PER_LONG_LONG != 64 -/* Formats correctly any integer in [0, 999999999] */ +static const u16 decpair[100] = { +#define _(x) (__force u16) cpu_to_le16(((x % 10) | ((x / 10) << 8)) + 0x3030) + _( 0), _( 1), _( 2), _( 3), _( 4), _( 5), _( 6), _( 7), _( 8), _( 9), + _(10), _(11), _(12), _(13), _(14), _(15), _(16), _(17), _(18), _(19), + _(20), _(21), _(22), _(23), _(24), _(25), _(26), _(27), _(28), _(29), + _(30), _(31), _(32), _(33), _(34), _(35), _(36), _(37), _(38), _(39), + _(40), _(41), _(42), _(43), _(44), _(45), _(46), _(47), _(48), _(49), + _(50), _(51), _(52), _(53), _(54), _(55), _(56), _(57), _(58), _(59), + _(60), _(61), _(62), _(63), _(64), _(65), _(66), _(67), _(68), _(69), + _(70), _(71), _(72), _(73), _(74), _(75), _(76), _(77), _(78), _(79), + _(80), _(81), _(82), _(83), _(84), _(85), _(86), _(87), _(88), _(89), + _(90), _(91), _(92), _(93), _(94), _(95), _(96), _(97), _(98), _(99), +#undef _ +}; + +/* + * This will print a single '0' even if r == 0, since we would + * immediately jump to out_r where two 0s would be written but only + * one of them accounted for in buf. This is needed by ip4_string + * below. All other callers pass a non-zero value of r. +*/ static noinline_for_stack -char *put_dec_full9(char *buf, unsigned q) +char *put_dec_trunc8(char *buf, unsigned r) { - unsigned r; + unsigned q; - /* - * Possible ways to approx. divide by 10 - * (x * 0x1999999a) >> 32 x < 1073741829 (multiply must be 64-bit) - * (x * 0xcccd) >> 19 x < 81920 (x < 262149 when 64-bit mul) - * (x * 0x6667) >> 18 x < 43699 - * (x * 0x3334) >> 17 x < 16389 - * (x * 0x199a) >> 16 x < 16389 - * (x * 0x0ccd) >> 15 x < 16389 - * (x * 0x0667) >> 14 x < 2739 - * (x * 0x0334) >> 13 x < 1029 - * (x * 0x019a) >> 12 x < 1029 - * (x * 0x00cd) >> 11 x < 1029 shorter code than * 0x67 (on i386) - * (x * 0x0067) >> 10 x < 179 - * (x * 0x0034) >> 9 x < 69 same - * (x * 0x001a) >> 8 x < 69 same - * (x * 0x000d) >> 7 x < 69 same, shortest code (on i386) - * (x * 0x0007) >> 6 x < 19 - * See <http://www.cs.uiowa.edu/~jones/bcd/divide.html> - */ - r = (q * (uint64_t)0x1999999a) >> 32; - *buf++ = (q - 10 * r) + '0'; /* 1 */ - q = (r * (uint64_t)0x1999999a) >> 32; - *buf++ = (r - 10 * q) + '0'; /* 2 */ - r = (q * (uint64_t)0x1999999a) >> 32; - *buf++ = (q - 10 * r) + '0'; /* 3 */ - q = (r * (uint64_t)0x1999999a) >> 32; - *buf++ = (r - 10 * q) + '0'; /* 4 */ - r = (q * (uint64_t)0x1999999a) >> 32; - *buf++ = (q - 10 * r) + '0'; /* 5 */ - /* Now value is under 10000, can avoid 64-bit multiply */ - q = (r * 0x199a) >> 16; - *buf++ = (r - 10 * q) + '0'; /* 6 */ - r = (q * 0xcd) >> 11; - *buf++ = (q - 10 * r) + '0'; /* 7 */ - q = (r * 0xcd) >> 11; - *buf++ = (r - 10 * q) + '0'; /* 8 */ - *buf++ = q + '0'; /* 9 */ + /* 1 <= r < 10^8 */ + if (r < 100) + goto out_r; + + /* 100 <= r < 10^8 */ + q = (r * (u64)0x28f5c29) >> 32; + *((u16 *)buf) = decpair[r - 100*q]; + buf += 2; + + /* 1 <= q < 10^6 */ + if (q < 100) + goto out_q; + + /* 100 <= q < 10^6 */ + r = (q * (u64)0x28f5c29) >> 32; + *((u16 *)buf) = decpair[q - 100*r]; + buf += 2; + + /* 1 <= r < 10^4 */ + if (r < 100) + goto out_r; + + /* 100 <= r < 10^4 */ + q = (r * 0x147b) >> 19; + *((u16 *)buf) = decpair[r - 100*q]; + buf += 2; +out_q: + /* 1 <= q < 100 */ + r = q; +out_r: + /* 1 <= r < 100 */ + *((u16 *)buf) = decpair[r]; + buf += r < 10 ? 1 : 2; return buf; } -#endif -/* Similar to above but do not pad with zeros. - * Code can be easily arranged to print 9 digits too, but our callers - * always call put_dec_full9() instead when the number has 9 decimal digits. - */ +#if BITS_PER_LONG == 64 && BITS_PER_LONG_LONG == 64 static noinline_for_stack -char *put_dec_trunc8(char *buf, unsigned r) +char *put_dec_full8(char *buf, unsigned r) { unsigned q; - /* Copy of previous function's body with added early returns */ - while (r >= 10000) { - q = r + '0'; - r = (r * (uint64_t)0x1999999a) >> 32; - *buf++ = q - 10*r; - } - - q = (r * 0x199a) >> 16; /* r <= 9999 */ - *buf++ = (r - 10 * q) + '0'; - if (q == 0) - return buf; - r = (q * 0xcd) >> 11; /* q <= 999 */ - *buf++ = (q - 10 * r) + '0'; - if (r == 0) - return buf; - q = (r * 0xcd) >> 11; /* r <= 99 */ - *buf++ = (r - 10 * q) + '0'; - if (q == 0) - return buf; - *buf++ = q + '0'; /* q <= 9 */ - return buf; -} + /* 0 <= r < 10^8 */ + q = (r * (u64)0x28f5c29) >> 32; + *((u16 *)buf) = decpair[r - 100*q]; + buf += 2; -/* There are two algorithms to print larger numbers. - * One is generic: divide by 1000000000 and repeatedly print - * groups of (up to) 9 digits. It's conceptually simple, - * but requires a (unsigned long long) / 1000000000 division. - * - * Second algorithm splits 64-bit unsigned long long into 16-bit chunks, - * manipulates them cleverly and generates groups of 4 decimal digits. - * It so happens that it does NOT require long long division. - * - * If long is > 32 bits, division of 64-bit values is relatively easy, - * and we will use the first algorithm. - * If long long is > 64 bits (strange architecture with VERY large long long), - * second algorithm can't be used, and we again use the first one. - * - * Else (if long is 32 bits and long long is 64 bits) we use second one. - */ + /* 0 <= q < 10^6 */ + r = (q * (u64)0x28f5c29) >> 32; + *((u16 *)buf) = decpair[q - 100*r]; + buf += 2; -#if BITS_PER_LONG != 32 || BITS_PER_LONG_LONG != 64 + /* 0 <= r < 10^4 */ + q = (r * 0x147b) >> 19; + *((u16 *)buf) = decpair[r - 100*q]; + buf += 2; -/* First algorithm: generic */ + /* 0 <= q < 100 */ + *((u16 *)buf) = decpair[q]; + buf += 2; + return buf; +} -static +static noinline_for_stack char *put_dec(char *buf, unsigned long long n) { - if (n >= 100*1000*1000) { - while (n >= 1000*1000*1000) - buf = put_dec_full9(buf, do_div(n, 1000*1000*1000)); - if (n >= 100*1000*1000) - return put_dec_full9(buf, n); - } + if (n >= 100*1000*1000) + buf = put_dec_full8(buf, do_div(n, 100*1000*1000)); + /* 1 <= n <= 1.6e11 */ + if (n >= 100*1000*1000) + buf = put_dec_full8(buf, do_div(n, 100*1000*1000)); + /* 1 <= n < 1e8 */ return put_dec_trunc8(buf, n); } -#else - -/* Second algorithm: valid only for 64-bit long longs */ +#elif BITS_PER_LONG == 32 && BITS_PER_LONG_LONG == 64 -/* See comment in put_dec_full9 for choice of constants */ -static noinline_for_stack -void put_dec_full4(char *buf, unsigned q) +static void +put_dec_full4(char *buf, unsigned r) { - unsigned r; - r = (q * 0xccd) >> 15; - buf[0] = (q - 10 * r) + '0'; - q = (r * 0xcd) >> 11; - buf[1] = (r - 10 * q) + '0'; - r = (q * 0xcd) >> 11; - buf[2] = (q - 10 * r) + '0'; - buf[3] = r + '0'; + unsigned q; + + /* 0 <= r < 10^4 */ + q = (r * 0x147b) >> 19; + *((u16 *)buf) = decpair[r - 100*q]; + buf += 2; + /* 0 <= q < 100 */ + *((u16 *)buf) = decpair[q]; } /* @@ -264,9 +269,9 @@ void put_dec_full4(char *buf, unsigned q) * The approximation x/10000 == (x * 0x346DC5D7) >> 43 * holds for all x < 1,128,869,999. The largest value this * helper will ever be asked to convert is 1,125,520,955. - * (d1 in the put_dec code, assuming n is all-ones). + * (second call in the put_dec code, assuming n is all-ones). */ -static +static noinline_for_stack unsigned put_dec_helper4(char *buf, unsigned x) { uint32_t q = (x * (uint64_t)0x346DC5D7) >> 43; @@ -293,6 +298,8 @@ char *put_dec(char *buf, unsigned long long n) d2 = (h ) & 0xffff; d3 = (h >> 16); /* implicit "& 0xffff" */ + /* n = 2^48 d3 + 2^32 d2 + 2^16 d1 + d0 + = 281_4749_7671_0656 d3 + 42_9496_7296 d2 + 6_5536 d1 + d0 */ q = 656 * d3 + 7296 * d2 + 5536 * d1 + ((uint32_t)n & 0xffff); q = put_dec_helper4(buf, q); @@ -322,7 +329,8 @@ char *put_dec(char *buf, unsigned long long n) */ int num_to_str(char *buf, int size, unsigned long long num) { - char tmp[sizeof(num) * 3]; + /* put_dec requires 2-byte alignment of the buffer. */ + char tmp[sizeof(num) * 3] __aligned(2); int idx, len; /* put_dec() may work incorrectly for num = 0 (generate "", not "0") */ @@ -340,11 +348,11 @@ int num_to_str(char *buf, int size, unsigned long long num) return len; } -#define ZEROPAD 1 /* pad with zero */ -#define SIGN 2 /* unsigned/signed long */ +#define SIGN 1 /* unsigned/signed, must be 1 */ +#define LEFT 2 /* left justified */ #define PLUS 4 /* show plus */ #define SPACE 8 /* space if plus */ -#define LEFT 16 /* left justified */ +#define ZEROPAD 16 /* pad with zero, must be 16 == '0' - ' ' */ #define SMALL 32 /* use lowercase in hex (must be 32 == 0x20) */ #define SPECIAL 64 /* prefix hex with "0x", octal with "0" */ @@ -383,10 +391,8 @@ static noinline_for_stack char *number(char *buf, char *end, unsigned long long num, struct printf_spec spec) { - /* we are called with base 8, 10 or 16, only, thus don't need "G..." */ - static const char digits[16] = "0123456789ABCDEF"; /* "GHIJKLMNOPQRSTUVWXYZ"; */ - - char tmp[66]; + /* put_dec requires 2-byte alignment of the buffer. */ + char tmp[3 * sizeof(num)] __aligned(2); char sign; char locase; int need_pfx = ((spec.flags & SPECIAL) && spec.base != 10); @@ -422,12 +428,7 @@ char *number(char *buf, char *end, unsigned long long num, /* generate full string in tmp[], in reverse order */ i = 0; if (num < spec.base) - tmp[i++] = digits[num] | locase; - /* Generic code, for any base: - else do { - tmp[i++] = (digits[do_div(num,base)] | locase); - } while (num != 0); - */ + tmp[i++] = hex_asc_upper[num] | locase; else if (spec.base != 10) { /* 8 or 16 */ int mask = spec.base - 1; int shift = 3; @@ -435,7 +436,7 @@ char *number(char *buf, char *end, unsigned long long num, if (spec.base == 16) shift = 4; do { - tmp[i++] = (digits[((unsigned char)num) & mask] | locase); + tmp[i++] = (hex_asc_upper[((unsigned char)num) & mask] | locase); num >>= shift; } while (num); } else { /* base 10 */ @@ -447,7 +448,7 @@ char *number(char *buf, char *end, unsigned long long num, spec.precision = i; /* leading space padding */ spec.field_width -= spec.precision; - if (!(spec.flags & (ZEROPAD+LEFT))) { + if (!(spec.flags & (ZEROPAD | LEFT))) { while (--spec.field_width >= 0) { if (buf < end) *buf = ' '; @@ -475,7 +476,8 @@ char *number(char *buf, char *end, unsigned long long num, } /* zero or space padding */ if (!(spec.flags & LEFT)) { - char c = (spec.flags & ZEROPAD) ? '0' : ' '; + char c = ' ' + (spec.flags & ZEROPAD); + BUILD_BUG_ON(' ' + ZEROPAD != '0'); while (--spec.field_width >= 0) { if (buf < end) *buf = c; @@ -783,11 +785,19 @@ char *hex_string(char *buf, char *end, u8 *addr, struct printf_spec spec, if (spec.field_width > 0) len = min_t(int, spec.field_width, 64); - for (i = 0; i < len && buf < end - 1; i++) { - buf = hex_byte_pack(buf, addr[i]); + for (i = 0; i < len; ++i) { + if (buf < end) + *buf = hex_asc_hi(addr[i]); + ++buf; + if (buf < end) + *buf = hex_asc_lo(addr[i]); + ++buf; - if (buf < end && separator && i != len - 1) - *buf++ = separator; + if (separator && i != len - 1) { + if (buf < end) + *buf = separator; + ++buf; + } } return buf; @@ -942,7 +952,7 @@ char *ip4_string(char *p, const u8 *addr, const char *fmt) break; } for (i = 0; i < 4; i++) { - char temp[3]; /* hold each IP quad in reverse order */ + char temp[4] __aligned(2); /* hold each IP quad in reverse order */ int digits = put_dec_trunc8(temp, addr[index]) - temp; if (leading_zeros) { if (digits < 3) @@ -1233,8 +1243,12 @@ char *escaped_string(char *buf, char *end, u8 *addr, struct printf_spec spec, len = spec.field_width < 0 ? 1 : spec.field_width; - /* Ignore the error. We print as many characters as we can */ - string_escape_mem(addr, len, &buf, end - buf, flags, NULL); + /* + * string_escape_mem() writes as many characters as it can to + * the given buffer, and returns the total size of the output + * had the buffer been big enough. + */ + buf += string_escape_mem(addr, len, buf, buf < end ? end - buf : 0, flags, NULL); return buf; } @@ -1322,6 +1336,30 @@ char *address_val(char *buf, char *end, const void *addr, return number(buf, end, num, spec); } +static noinline_for_stack +char *clock(char *buf, char *end, struct clk *clk, struct printf_spec spec, + const char *fmt) +{ + if (!IS_ENABLED(CONFIG_HAVE_CLK) || !clk) + return string(buf, end, NULL, spec); + + switch (fmt[1]) { + case 'r': + return number(buf, end, clk_get_rate(clk), spec); + + case 'n': + default: +#ifdef CONFIG_COMMON_CLK + return string(buf, end, __clk_get_name(clk), spec); +#else + spec.base = 16; + spec.field_width = sizeof(unsigned long) * 2 + 2; + spec.flags |= SPECIAL | SMALL | ZEROPAD; + return number(buf, end, (unsigned long)clk, spec); +#endif + } +} + int kptr_restrict __read_mostly; /* @@ -1404,6 +1442,11 @@ int kptr_restrict __read_mostly; * (default assumed to be phys_addr_t, passed by reference) * - 'd[234]' For a dentry name (optionally 2-4 last components) * - 'D[234]' Same as 'd' but for a struct file + * - 'C' For a clock, it prints the name (Common Clock Framework) or address + * (legacy clock framework) of the clock + * - 'Cn' For a clock, it prints the name (Common Clock Framework) or address + * (legacy clock framework) of the clock + * - 'Cr' For a clock, it prints the current rate of the clock * * Note: The difference between 'S' and 'F' is that on ia64 and ppc64 * function pointers are really function descriptors, which contain a @@ -1548,6 +1591,8 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, return address_val(buf, end, ptr, spec, fmt); case 'd': return dentry_name(buf, end, ptr, spec, fmt); + case 'C': + return clock(buf, end, ptr, spec, fmt); case 'D': return dentry_name(buf, end, ((const struct file *)ptr)->f_path.dentry, @@ -1738,29 +1783,21 @@ qualifier: if (spec->qualifier == 'L') spec->type = FORMAT_TYPE_LONG_LONG; else if (spec->qualifier == 'l') { - if (spec->flags & SIGN) - spec->type = FORMAT_TYPE_LONG; - else - spec->type = FORMAT_TYPE_ULONG; + BUILD_BUG_ON(FORMAT_TYPE_ULONG + SIGN != FORMAT_TYPE_LONG); + spec->type = FORMAT_TYPE_ULONG + (spec->flags & SIGN); } else if (_tolower(spec->qualifier) == 'z') { spec->type = FORMAT_TYPE_SIZE_T; } else if (spec->qualifier == 't') { spec->type = FORMAT_TYPE_PTRDIFF; } else if (spec->qualifier == 'H') { - if (spec->flags & SIGN) - spec->type = FORMAT_TYPE_BYTE; - else - spec->type = FORMAT_TYPE_UBYTE; + BUILD_BUG_ON(FORMAT_TYPE_UBYTE + SIGN != FORMAT_TYPE_BYTE); + spec->type = FORMAT_TYPE_UBYTE + (spec->flags & SIGN); } else if (spec->qualifier == 'h') { - if (spec->flags & SIGN) - spec->type = FORMAT_TYPE_SHORT; - else - spec->type = FORMAT_TYPE_USHORT; + BUILD_BUG_ON(FORMAT_TYPE_USHORT + SIGN != FORMAT_TYPE_SHORT); + spec->type = FORMAT_TYPE_USHORT + (spec->flags & SIGN); } else { - if (spec->flags & SIGN) - spec->type = FORMAT_TYPE_INT; - else - spec->type = FORMAT_TYPE_UINT; + BUILD_BUG_ON(FORMAT_TYPE_UINT + SIGN != FORMAT_TYPE_INT); + spec->type = FORMAT_TYPE_UINT + (spec->flags & SIGN); } return ++fmt - start; @@ -1800,6 +1837,11 @@ qualifier: * %*pE[achnops] print an escaped buffer * %*ph[CDN] a variable-length hex string with a separator (supports up to 64 * bytes of the input) + * %pC output the name (Common Clock Framework) or address (legacy clock + * framework) of a clock + * %pCn output the name (Common Clock Framework) or address (legacy clock + * framework) of a clock + * %pCr output the current rate of a clock * %n is ignored * * ** Please update Documentation/printk-formats.txt when making changes ** |