diff options
Diffstat (limited to 'lib')
41 files changed, 828 insertions, 3853 deletions
diff --git a/lib/Kconfig b/lib/Kconfig index e34b04b56057..706836ec314d 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -420,60 +420,15 @@ config HAS_IOPORT_MAP depends on HAS_IOMEM && !NO_IOPORT_MAP default y -config HAS_DMA - bool - depends on !NO_DMA - default y +source "kernel/dma/Kconfig" config SGL_ALLOC bool default n -config NEED_SG_DMA_LENGTH - bool - -config NEED_DMA_MAP_STATE - bool - -config ARCH_DMA_ADDR_T_64BIT - def_bool 64BIT || PHYS_ADDR_T_64BIT - config IOMMU_HELPER bool -config ARCH_HAS_SYNC_DMA_FOR_DEVICE - bool - -config ARCH_HAS_SYNC_DMA_FOR_CPU - bool - select NEED_DMA_MAP_STATE - -config DMA_DIRECT_OPS - bool - depends on HAS_DMA - -config DMA_NONCOHERENT_OPS - bool - depends on HAS_DMA - select DMA_DIRECT_OPS - -config DMA_NONCOHERENT_MMAP - bool - depends on DMA_NONCOHERENT_OPS - -config DMA_NONCOHERENT_CACHE_SYNC - bool - depends on DMA_NONCOHERENT_OPS - -config DMA_VIRT_OPS - bool - depends on HAS_DMA - -config SWIOTLB - bool - select DMA_DIRECT_OPS - select NEED_DMA_MAP_STATE - config CHECK_SIGNATURE bool diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 8838d1158d19..ab1b599202bc 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1,3 +1,5 @@ +menu "Kernel hacking" + menu "printk and dmesg options" config PRINTK_TIME @@ -30,6 +32,17 @@ config CONSOLE_LOGLEVEL_DEFAULT usage in the kernel. That is controlled by the MESSAGE_LOGLEVEL_DEFAULT option. +config CONSOLE_LOGLEVEL_QUIET + int "quiet console loglevel (1-15)" + range 1 15 + default "4" + help + loglevel to use when "quiet" is passed on the kernel commandline. + + When "quiet" is passed on the kernel commandline this loglevel + will be used as the loglevel. IOW passing "quiet" will be the + equivalent of passing "loglevel=<CONSOLE_LOGLEVEL_QUIET>" + config MESSAGE_LOGLEVEL_DEFAULT int "Default message log level (1-7)" range 1 7 @@ -198,14 +211,6 @@ config GDB_SCRIPTS instance. See Documentation/dev-tools/gdb-kernel-debugging.rst for further details. -config ENABLE_WARN_DEPRECATED - bool "Enable __deprecated logic" - default y - help - Enable the __deprecated logic in the kernel build. - Disable this to suppress the "warning: 'foo' is deprecated - (declared at kernel/power/somefile.c:1234)" messages. - config ENABLE_MUST_CHECK bool "Enable __must_check logic" default y @@ -1193,6 +1198,7 @@ config DEBUG_ATOMIC_SLEEP bool "Sleep inside atomic section checking" select PREEMPT_COUNT depends on DEBUG_KERNEL + depends on !ARCH_NO_PREEMPT help If you say Y here, various routines which may sleep will become very noisy if they are called inside atomic sections: when a spinlock is @@ -1718,7 +1724,7 @@ config KPROBES_SANITY_TEST default n help This option provides for testing basic kprobes functionality on - boot. A sample kprobe, jprobe and kretprobe are inserted and + boot. Samples of kprobe and kretprobe are inserted and verified for functionality. Say N if you are unsure. @@ -1802,6 +1808,13 @@ config TEST_BITMAP If unsure, say N. +config TEST_BITFIELD + tristate "Test bitfield functions at runtime" + help + Enable this option to test the bitfield functions at boot. + + If unsure, say N. + config TEST_UUID tristate "Test functions located in the uuid module at runtime" @@ -2034,3 +2047,7 @@ config IO_STRICT_DEVMEM if the driver using a given range cannot be disabled. If in doubt, say Y. + +source "arch/$(SRCARCH)/Kconfig.debug" + +endmenu # Kernel hacking diff --git a/lib/Kconfig.kasan b/lib/Kconfig.kasan index 3d35d062970d..befb127507c0 100644 --- a/lib/Kconfig.kasan +++ b/lib/Kconfig.kasan @@ -5,7 +5,8 @@ if HAVE_ARCH_KASAN config KASAN bool "KASan: runtime memory debugger" - depends on SLUB || (SLAB && !DEBUG_SLAB) + depends on (SLUB && SYSFS) || (SLAB && !DEBUG_SLAB) + select SLUB_DEBUG if SLUB select CONSTRUCTORS select STACKDEPOT help diff --git a/lib/Kconfig.ubsan b/lib/Kconfig.ubsan index 19d42ea75ec2..98fa559ebd80 100644 --- a/lib/Kconfig.ubsan +++ b/lib/Kconfig.ubsan @@ -1,9 +1,6 @@ config ARCH_HAS_UBSAN_SANITIZE_ALL bool -config ARCH_WANTS_UBSAN_NO_NULL - def_bool n - config UBSAN bool "Undefined behaviour sanity checker" help @@ -39,14 +36,6 @@ config UBSAN_ALIGNMENT Enabling this option on architectures that support unaligned accesses may produce a lot of false positives. -config UBSAN_NULL - bool "Enable checking of null pointers" - depends on UBSAN - default y if !ARCH_WANTS_UBSAN_NO_NULL - help - This option enables detection of memory accesses via a - null pointer. - config TEST_UBSAN tristate "Module for testing for undefined behavior detection" depends on m && UBSAN diff --git a/lib/Makefile b/lib/Makefile index 956b320292fe..d95bb2525101 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -23,15 +23,12 @@ lib-y := ctype.o string.o vsprintf.o cmdline.o \ sha1.o chacha20.o irq_regs.o argv_split.o \ flex_proportions.o ratelimit.o show_mem.o \ is_single_threaded.o plist.o decompress.o kobject_uevent.o \ - earlycpio.o seq_buf.o siphash.o \ + earlycpio.o seq_buf.o siphash.o dec_and_lock.o \ nmi_backtrace.o nodemask.o win_minmax.o lib-$(CONFIG_PRINTK) += dump_stack.o lib-$(CONFIG_MMU) += ioremap.o lib-$(CONFIG_SMP) += cpumask.o -lib-$(CONFIG_DMA_DIRECT_OPS) += dma-direct.o -lib-$(CONFIG_DMA_NONCOHERENT_OPS) += dma-noncoherent.o -lib-$(CONFIG_DMA_VIRT_OPS) += dma-virt.o lib-y += kobject.o klist.o obj-y += lockref.o @@ -40,7 +37,7 @@ obj-y += bcd.o div64.o sort.o parser.o debug_locks.o random32.o \ bust_spinlocks.o kasprintf.o bitmap.o scatterlist.o \ gcd.o lcm.o list_sort.o uuid.o flex_array.o iov_iter.o clz_ctz.o \ bsearch.o find_bit.o llist.o memweight.o kfifo.o \ - percpu-refcount.o percpu_ida.o rhashtable.o reciprocal_div.o \ + percpu-refcount.o rhashtable.o reciprocal_div.o \ once.o refcount.o usercopy.o errseq.o bucket_locks.o obj-$(CONFIG_STRING_SELFTEST) += test_string.o obj-y += string_helpers.o @@ -68,6 +65,7 @@ obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_keys.o obj-$(CONFIG_TEST_STATIC_KEYS) += test_static_key_base.o obj-$(CONFIG_TEST_PRINTF) += test_printf.o obj-$(CONFIG_TEST_BITMAP) += test_bitmap.o +obj-$(CONFIG_TEST_BITFIELD) += test_bitfield.o obj-$(CONFIG_TEST_UUID) += test_uuid.o obj-$(CONFIG_TEST_PARMAN) += test_parman.o obj-$(CONFIG_TEST_KMOD) += test_kmod.o @@ -98,10 +96,6 @@ obj-$(CONFIG_DEBUG_PREEMPT) += smp_processor_id.o obj-$(CONFIG_DEBUG_LIST) += list_debug.o obj-$(CONFIG_DEBUG_OBJECTS) += debugobjects.o -ifneq ($(CONFIG_HAVE_DEC_LOCK),y) - lib-y += dec_and_lock.o -endif - obj-$(CONFIG_BITREVERSE) += bitrev.o obj-$(CONFIG_RATIONAL) += rational.o obj-$(CONFIG_CRC_CCITT) += crc-ccitt.o @@ -123,6 +117,7 @@ obj-$(CONFIG_ZLIB_INFLATE) += zlib_inflate/ obj-$(CONFIG_ZLIB_DEFLATE) += zlib_deflate/ obj-$(CONFIG_REED_SOLOMON) += reed_solomon/ obj-$(CONFIG_BCH) += bch.o +CFLAGS_bch.o := $(call cc-option,-Wframe-larger-than=4500) obj-$(CONFIG_LZO_COMPRESS) += lzo/ obj-$(CONFIG_LZO_DECOMPRESS) += lzo/ obj-$(CONFIG_LZ4_COMPRESS) += lz4/ @@ -148,7 +143,6 @@ obj-$(CONFIG_SMP) += percpu_counter.o obj-$(CONFIG_AUDIT_GENERIC) += audit.o obj-$(CONFIG_AUDIT_COMPAT_GENERIC) += compat_audit.o -obj-$(CONFIG_SWIOTLB) += swiotlb.o obj-$(CONFIG_IOMMU_HELPER) += iommu-helper.o obj-$(CONFIG_FAULT_INJECTION) += fault-inject.o obj-$(CONFIG_NOTIFIER_ERROR_INJECTION) += notifier-error-inject.o @@ -169,8 +163,6 @@ obj-$(CONFIG_NLATTR) += nlattr.o obj-$(CONFIG_LRU_CACHE) += lru_cache.o -obj-$(CONFIG_DMA_API_DEBUG) += dma-debug.o - obj-$(CONFIG_GENERIC_CSUM) += checksum.o obj-$(CONFIG_GENERIC_ATOMIC64) += atomic64.o diff --git a/lib/atomic64.c b/lib/atomic64.c index 53c2d5edc826..1d91e31eceec 100644 --- a/lib/atomic64.c +++ b/lib/atomic64.c @@ -178,18 +178,18 @@ long long atomic64_xchg(atomic64_t *v, long long new) } EXPORT_SYMBOL(atomic64_xchg); -int atomic64_add_unless(atomic64_t *v, long long a, long long u) +long long atomic64_fetch_add_unless(atomic64_t *v, long long a, long long u) { unsigned long flags; raw_spinlock_t *lock = lock_addr(v); - int ret = 0; + long long val; raw_spin_lock_irqsave(lock, flags); - if (v->counter != u) { + val = v->counter; + if (val != u) v->counter += a; - ret = 1; - } raw_spin_unlock_irqrestore(lock, flags); - return ret; + + return val; } -EXPORT_SYMBOL(atomic64_add_unless); +EXPORT_SYMBOL(atomic64_fetch_add_unless); diff --git a/lib/bch.c b/lib/bch.c index bc89dfe4d1b3..7b0f2006698b 100644 --- a/lib/bch.c +++ b/lib/bch.c @@ -78,15 +78,22 @@ #define GF_M(_p) (CONFIG_BCH_CONST_M) #define GF_T(_p) (CONFIG_BCH_CONST_T) #define GF_N(_p) ((1 << (CONFIG_BCH_CONST_M))-1) +#define BCH_MAX_M (CONFIG_BCH_CONST_M) #else #define GF_M(_p) ((_p)->m) #define GF_T(_p) ((_p)->t) #define GF_N(_p) ((_p)->n) +#define BCH_MAX_M 15 #endif +#define BCH_MAX_T (((1 << BCH_MAX_M) - 1) / BCH_MAX_M) + #define BCH_ECC_WORDS(_p) DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 32) #define BCH_ECC_BYTES(_p) DIV_ROUND_UP(GF_M(_p)*GF_T(_p), 8) +#define BCH_ECC_MAX_WORDS DIV_ROUND_UP(BCH_MAX_M * BCH_MAX_T, 32) +#define BCH_ECC_MAX_BYTES DIV_ROUND_UP(BCH_MAX_M * BCH_MAX_T, 8) + #ifndef dbg #define dbg(_fmt, args...) do {} while (0) #endif @@ -187,7 +194,8 @@ void encode_bch(struct bch_control *bch, const uint8_t *data, const unsigned int l = BCH_ECC_WORDS(bch)-1; unsigned int i, mlen; unsigned long m; - uint32_t w, r[l+1]; + uint32_t w, r[BCH_ECC_MAX_WORDS]; + const size_t r_bytes = BCH_ECC_WORDS(bch) * sizeof(*r); const uint32_t * const tab0 = bch->mod8_tab; const uint32_t * const tab1 = tab0 + 256*(l+1); const uint32_t * const tab2 = tab1 + 256*(l+1); @@ -198,7 +206,7 @@ void encode_bch(struct bch_control *bch, const uint8_t *data, /* load ecc parity bytes into internal 32-bit buffer */ load_ecc8(bch, bch->ecc_buf, ecc); } else { - memset(bch->ecc_buf, 0, sizeof(r)); + memset(bch->ecc_buf, 0, r_bytes); } /* process first unaligned data bytes */ @@ -215,7 +223,7 @@ void encode_bch(struct bch_control *bch, const uint8_t *data, mlen = len/4; data += 4*mlen; len -= 4*mlen; - memcpy(r, bch->ecc_buf, sizeof(r)); + memcpy(r, bch->ecc_buf, r_bytes); /* * split each 32-bit word into 4 polynomials of weight 8 as follows: @@ -241,7 +249,7 @@ void encode_bch(struct bch_control *bch, const uint8_t *data, r[l] = p0[l]^p1[l]^p2[l]^p3[l]; } - memcpy(bch->ecc_buf, r, sizeof(r)); + memcpy(bch->ecc_buf, r, r_bytes); /* process last unaligned bytes */ if (len) @@ -434,7 +442,7 @@ static int solve_linear_system(struct bch_control *bch, unsigned int *rows, { const int m = GF_M(bch); unsigned int tmp, mask; - int rem, c, r, p, k, param[m]; + int rem, c, r, p, k, param[BCH_MAX_M]; k = 0; mask = 1 << m; @@ -1114,7 +1122,7 @@ static int build_deg2_base(struct bch_control *bch) { const int m = GF_M(bch); int i, j, r; - unsigned int sum, x, y, remaining, ak = 0, xi[m]; + unsigned int sum, x, y, remaining, ak = 0, xi[BCH_MAX_M]; /* find k s.t. Tr(a^k) = 1 and 0 <= k < m */ for (i = 0; i < m; i++) { @@ -1254,7 +1262,6 @@ struct bch_control *init_bch(int m, int t, unsigned int prim_poly) struct bch_control *bch = NULL; const int min_m = 5; - const int max_m = 15; /* default primitive polynomials */ static const unsigned int prim_poly_tab[] = { @@ -1270,7 +1277,7 @@ struct bch_control *init_bch(int m, int t, unsigned int prim_poly) goto fail; } #endif - if ((m < min_m) || (m > max_m)) + if ((m < min_m) || (m > BCH_MAX_M)) /* * values of m greater than 15 are not currently supported; * supporting m > 15 would require changing table base type diff --git a/lib/crc32.c b/lib/crc32.c index 2ef20fe84b69..a6c9afafc8c8 100644 --- a/lib/crc32.c +++ b/lib/crc32.c @@ -27,6 +27,7 @@ /* see: Documentation/crc32.txt for a description of algorithms */ #include <linux/crc32.h> +#include <linux/crc32poly.h> #include <linux/module.h> #include <linux/types.h> #include <linux/sched.h> @@ -184,7 +185,7 @@ static inline u32 __pure crc32_le_generic(u32 crc, unsigned char const *p, #if CRC_LE_BITS == 1 u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len) { - return crc32_le_generic(crc, p, len, NULL, CRCPOLY_LE); + return crc32_le_generic(crc, p, len, NULL, CRC32_POLY_LE); } u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len) { @@ -194,7 +195,7 @@ u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len) u32 __pure crc32_le(u32 crc, unsigned char const *p, size_t len) { return crc32_le_generic(crc, p, len, - (const u32 (*)[256])crc32table_le, CRCPOLY_LE); + (const u32 (*)[256])crc32table_le, CRC32_POLY_LE); } u32 __pure __crc32c_le(u32 crc, unsigned char const *p, size_t len) { @@ -268,7 +269,7 @@ static u32 __attribute_const__ crc32_generic_shift(u32 crc, size_t len, u32 __attribute_const__ crc32_le_shift(u32 crc, size_t len) { - return crc32_generic_shift(crc, len, CRCPOLY_LE); + return crc32_generic_shift(crc, len, CRC32_POLY_LE); } u32 __attribute_const__ __crc32c_le_shift(u32 crc, size_t len) @@ -330,13 +331,13 @@ static inline u32 __pure crc32_be_generic(u32 crc, unsigned char const *p, #if CRC_LE_BITS == 1 u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len) { - return crc32_be_generic(crc, p, len, NULL, CRCPOLY_BE); + return crc32_be_generic(crc, p, len, NULL, CRC32_POLY_BE); } #else u32 __pure crc32_be(u32 crc, unsigned char const *p, size_t len) { return crc32_be_generic(crc, p, len, - (const u32 (*)[256])crc32table_be, CRCPOLY_BE); + (const u32 (*)[256])crc32table_be, CRC32_POLY_BE); } #endif EXPORT_SYMBOL(crc32_be); diff --git a/lib/crc32defs.h b/lib/crc32defs.h index cb275a28a750..0c8fb5923e7e 100644 --- a/lib/crc32defs.h +++ b/lib/crc32defs.h @@ -1,18 +1,4 @@ /* SPDX-License-Identifier: GPL-2.0 */ -/* - * There are multiple 16-bit CRC polynomials in common use, but this is - * *the* standard CRC-32 polynomial, first popularized by Ethernet. - * x^32+x^26+x^23+x^22+x^16+x^12+x^11+x^10+x^8+x^7+x^5+x^4+x^2+x^1+x^0 - */ -#define CRCPOLY_LE 0xedb88320 -#define CRCPOLY_BE 0x04c11db7 - -/* - * This is the CRC32c polynomial, as outlined by Castagnoli. - * x^32+x^28+x^27+x^26+x^25+x^23+x^22+x^20+x^19+x^18+x^14+x^13+x^11+x^10+x^9+ - * x^8+x^6+x^0 - */ -#define CRC32C_POLY_LE 0x82F63B78 /* Try to choose an implementation variant via Kconfig */ #ifdef CONFIG_CRC32_SLICEBY8 diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 994be4805cec..70935ed91125 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -360,9 +360,12 @@ static void debug_object_is_on_stack(void *addr, int onstack) limit++; if (is_on_stack) - pr_warn("object is on stack, but not annotated\n"); + pr_warn("object %p is on stack %p, but NOT annotated.\n", addr, + task_stack_page(current)); else - pr_warn("object is not on stack, but annotated\n"); + pr_warn("object %p is NOT on stack %p, but annotated.\n", addr, + task_stack_page(current)); + WARN_ON(1); } @@ -1185,8 +1188,7 @@ void __init debug_objects_mem_init(void) if (!obj_cache || debug_objects_replace_static_objects()) { debug_objects_enabled = 0; - if (obj_cache) - kmem_cache_destroy(obj_cache); + kmem_cache_destroy(obj_cache); pr_warn("out of memory.\n"); } else debug_objects_selftest(); diff --git a/lib/dec_and_lock.c b/lib/dec_and_lock.c index 347fa7ac2e8a..9555b68bb774 100644 --- a/lib/dec_and_lock.c +++ b/lib/dec_and_lock.c @@ -33,3 +33,19 @@ int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) } EXPORT_SYMBOL(_atomic_dec_and_lock); + +int _atomic_dec_and_lock_irqsave(atomic_t *atomic, spinlock_t *lock, + unsigned long *flags) +{ + /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */ + if (atomic_add_unless(atomic, -1, 1)) + return 0; + + /* Otherwise do it the slow way */ + spin_lock_irqsave(lock, *flags); + if (atomic_dec_and_test(atomic)) + return 1; + spin_unlock_irqrestore(lock, *flags); + return 0; +} +EXPORT_SYMBOL(_atomic_dec_and_lock_irqsave); diff --git a/lib/decompress_bunzip2.c b/lib/decompress_bunzip2.c index 0234361b24b8..7c4932eed748 100644 --- a/lib/decompress_bunzip2.c +++ b/lib/decompress_bunzip2.c @@ -51,6 +51,7 @@ #endif /* STATIC */ #include <linux/decompress/mm.h> +#include <linux/crc32poly.h> #ifndef INT_MAX #define INT_MAX 0x7fffffff @@ -654,7 +655,7 @@ static int INIT start_bunzip(struct bunzip_data **bdp, void *inbuf, long len, for (i = 0; i < 256; i++) { c = i << 24; for (j = 8; j; j--) - c = c&0x80000000 ? (c << 1)^0x04c11db7 : (c << 1); + c = c&0x80000000 ? (c << 1)^(CRC32_POLY_BE) : (c << 1); bd->crc32Table[i] = c; } diff --git a/lib/devres.c b/lib/devres.c index 5bec1120b392..faccf1a037d0 100644 --- a/lib/devres.c +++ b/lib/devres.c @@ -4,6 +4,7 @@ #include <linux/io.h> #include <linux/gfp.h> #include <linux/export.h> +#include <linux/of_address.h> enum devm_ioremap_type { DEVM_IOREMAP = 0, @@ -162,6 +163,41 @@ void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res) } EXPORT_SYMBOL(devm_ioremap_resource); +/* + * devm_of_iomap - Requests a resource and maps the memory mapped IO + * for a given device_node managed by a given device + * + * Checks that a resource is a valid memory region, requests the memory + * region and ioremaps it. All operations are managed and will be undone + * on driver detach of the device. + * + * This is to be used when a device requests/maps resources described + * by other device tree nodes (children or otherwise). + * + * @dev: The device "managing" the resource + * @node: The device-tree node where the resource resides + * @index: index of the MMIO range in the "reg" property + * @size: Returns the size of the resource (pass NULL if not needed) + * Returns a pointer to the requested and mapped memory or an ERR_PTR() encoded + * error code on failure. Usage example: + * + * base = devm_of_iomap(&pdev->dev, node, 0, NULL); + * if (IS_ERR(base)) + * return PTR_ERR(base); + */ +void __iomem *devm_of_iomap(struct device *dev, struct device_node *node, int index, + resource_size_t *size) +{ + struct resource res; + + if (of_address_to_resource(node, index, &res)) + return IOMEM_ERR_PTR(-EINVAL); + if (size) + *size = resource_size(&res); + return devm_ioremap_resource(dev, &res); +} +EXPORT_SYMBOL(devm_of_iomap); + #ifdef CONFIG_HAS_IOPORT_MAP /* * Generic iomap devres diff --git a/lib/dma-debug.c b/lib/dma-debug.c deleted file mode 100644 index c007d25bee09..000000000000 --- a/lib/dma-debug.c +++ /dev/null @@ -1,1773 +0,0 @@ -/* - * Copyright (C) 2008 Advanced Micro Devices, Inc. - * - * Author: Joerg Roedel <joerg.roedel@amd.com> - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published - * by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#include <linux/sched/task_stack.h> -#include <linux/scatterlist.h> -#include <linux/dma-mapping.h> -#include <linux/sched/task.h> -#include <linux/stacktrace.h> -#include <linux/dma-debug.h> -#include <linux/spinlock.h> -#include <linux/vmalloc.h> -#include <linux/debugfs.h> -#include <linux/uaccess.h> -#include <linux/export.h> -#include <linux/device.h> -#include <linux/types.h> -#include <linux/sched.h> -#include <linux/ctype.h> -#include <linux/list.h> -#include <linux/slab.h> - -#include <asm/sections.h> - -#define HASH_SIZE 1024ULL -#define HASH_FN_SHIFT 13 -#define HASH_FN_MASK (HASH_SIZE - 1) - -/* allow architectures to override this if absolutely required */ -#ifndef PREALLOC_DMA_DEBUG_ENTRIES -#define PREALLOC_DMA_DEBUG_ENTRIES (1 << 16) -#endif - -enum { - dma_debug_single, - dma_debug_page, - dma_debug_sg, - dma_debug_coherent, - dma_debug_resource, -}; - -enum map_err_types { - MAP_ERR_CHECK_NOT_APPLICABLE, - MAP_ERR_NOT_CHECKED, - MAP_ERR_CHECKED, -}; - -#define DMA_DEBUG_STACKTRACE_ENTRIES 5 - -/** - * struct dma_debug_entry - track a dma_map* or dma_alloc_coherent mapping - * @list: node on pre-allocated free_entries list - * @dev: 'dev' argument to dma_map_{page|single|sg} or dma_alloc_coherent - * @type: single, page, sg, coherent - * @pfn: page frame of the start address - * @offset: offset of mapping relative to pfn - * @size: length of the mapping - * @direction: enum dma_data_direction - * @sg_call_ents: 'nents' from dma_map_sg - * @sg_mapped_ents: 'mapped_ents' from dma_map_sg - * @map_err_type: track whether dma_mapping_error() was checked - * @stacktrace: support backtraces when a violation is detected - */ -struct dma_debug_entry { - struct list_head list; - struct device *dev; - int type; - unsigned long pfn; - size_t offset; - u64 dev_addr; - u64 size; - int direction; - int sg_call_ents; - int sg_mapped_ents; - enum map_err_types map_err_type; -#ifdef CONFIG_STACKTRACE - struct stack_trace stacktrace; - unsigned long st_entries[DMA_DEBUG_STACKTRACE_ENTRIES]; -#endif -}; - -typedef bool (*match_fn)(struct dma_debug_entry *, struct dma_debug_entry *); - -struct hash_bucket { - struct list_head list; - spinlock_t lock; -} ____cacheline_aligned_in_smp; - -/* Hash list to save the allocated dma addresses */ -static struct hash_bucket dma_entry_hash[HASH_SIZE]; -/* List of pre-allocated dma_debug_entry's */ -static LIST_HEAD(free_entries); -/* Lock for the list above */ -static DEFINE_SPINLOCK(free_entries_lock); - -/* Global disable flag - will be set in case of an error */ -static bool global_disable __read_mostly; - -/* Early initialization disable flag, set at the end of dma_debug_init */ -static bool dma_debug_initialized __read_mostly; - -static inline bool dma_debug_disabled(void) -{ - return global_disable || !dma_debug_initialized; -} - -/* Global error count */ -static u32 error_count; - -/* Global error show enable*/ -static u32 show_all_errors __read_mostly; -/* Number of errors to show */ -static u32 show_num_errors = 1; - -static u32 num_free_entries; -static u32 min_free_entries; -static u32 nr_total_entries; - -/* number of preallocated entries requested by kernel cmdline */ -static u32 nr_prealloc_entries = PREALLOC_DMA_DEBUG_ENTRIES; - -/* debugfs dentry's for the stuff above */ -static struct dentry *dma_debug_dent __read_mostly; -static struct dentry *global_disable_dent __read_mostly; -static struct dentry *error_count_dent __read_mostly; -static struct dentry *show_all_errors_dent __read_mostly; -static struct dentry *show_num_errors_dent __read_mostly; -static struct dentry *num_free_entries_dent __read_mostly; -static struct dentry *min_free_entries_dent __read_mostly; -static struct dentry *filter_dent __read_mostly; - -/* per-driver filter related state */ - -#define NAME_MAX_LEN 64 - -static char current_driver_name[NAME_MAX_LEN] __read_mostly; -static struct device_driver *current_driver __read_mostly; - -static DEFINE_RWLOCK(driver_name_lock); - -static const char *const maperr2str[] = { - [MAP_ERR_CHECK_NOT_APPLICABLE] = "dma map error check not applicable", - [MAP_ERR_NOT_CHECKED] = "dma map error not checked", - [MAP_ERR_CHECKED] = "dma map error checked", -}; - -static const char *type2name[5] = { "single", "page", - "scather-gather", "coherent", - "resource" }; - -static const char *dir2name[4] = { "DMA_BIDIRECTIONAL", "DMA_TO_DEVICE", - "DMA_FROM_DEVICE", "DMA_NONE" }; - -/* - * The access to some variables in this macro is racy. We can't use atomic_t - * here because all these variables are exported to debugfs. Some of them even - * writeable. This is also the reason why a lock won't help much. But anyway, - * the races are no big deal. Here is why: - * - * error_count: the addition is racy, but the worst thing that can happen is - * that we don't count some errors - * show_num_errors: the subtraction is racy. Also no big deal because in - * worst case this will result in one warning more in the - * system log than the user configured. This variable is - * writeable via debugfs. - */ -static inline void dump_entry_trace(struct dma_debug_entry *entry) -{ -#ifdef CONFIG_STACKTRACE - if (entry) { - pr_warning("Mapped at:\n"); - print_stack_trace(&entry->stacktrace, 0); - } -#endif -} - -static bool driver_filter(struct device *dev) -{ - struct device_driver *drv; - unsigned long flags; - bool ret; - - /* driver filter off */ - if (likely(!current_driver_name[0])) - return true; - - /* driver filter on and initialized */ - if (current_driver && dev && dev->driver == current_driver) - return true; - - /* driver filter on, but we can't filter on a NULL device... */ - if (!dev) - return false; - - if (current_driver || !current_driver_name[0]) - return false; - - /* driver filter on but not yet initialized */ - drv = dev->driver; - if (!drv) - return false; - - /* lock to protect against change of current_driver_name */ - read_lock_irqsave(&driver_name_lock, flags); - - ret = false; - if (drv->name && - strncmp(current_driver_name, drv->name, NAME_MAX_LEN - 1) == 0) { - current_driver = drv; - ret = true; - } - - read_unlock_irqrestore(&driver_name_lock, flags); - - return ret; -} - -#define err_printk(dev, entry, format, arg...) do { \ - error_count += 1; \ - if (driver_filter(dev) && \ - (show_all_errors || show_num_errors > 0)) { \ - WARN(1, "%s %s: " format, \ - dev ? dev_driver_string(dev) : "NULL", \ - dev ? dev_name(dev) : "NULL", ## arg); \ - dump_entry_trace(entry); \ - } \ - if (!show_all_errors && show_num_errors > 0) \ - show_num_errors -= 1; \ - } while (0); - -/* - * Hash related functions - * - * Every DMA-API request is saved into a struct dma_debug_entry. To - * have quick access to these structs they are stored into a hash. - */ -static int hash_fn(struct dma_debug_entry *entry) -{ - /* - * Hash function is based on the dma address. - * We use bits 20-27 here as the index into the hash - */ - return (entry->dev_addr >> HASH_FN_SHIFT) & HASH_FN_MASK; -} - -/* - * Request exclusive access to a hash bucket for a given dma_debug_entry. - */ -static struct hash_bucket *get_hash_bucket(struct dma_debug_entry *entry, - unsigned long *flags) - __acquires(&dma_entry_hash[idx].lock) -{ - int idx = hash_fn(entry); - unsigned long __flags; - - spin_lock_irqsave(&dma_entry_hash[idx].lock, __flags); - *flags = __flags; - return &dma_entry_hash[idx]; -} - -/* - * Give up exclusive access to the hash bucket - */ -static void put_hash_bucket(struct hash_bucket *bucket, - unsigned long *flags) - __releases(&bucket->lock) -{ - unsigned long __flags = *flags; - - spin_unlock_irqrestore(&bucket->lock, __flags); -} - -static bool exact_match(struct dma_debug_entry *a, struct dma_debug_entry *b) -{ - return ((a->dev_addr == b->dev_addr) && - (a->dev == b->dev)) ? true : false; -} - -static bool containing_match(struct dma_debug_entry *a, - struct dma_debug_entry *b) -{ - if (a->dev != b->dev) - return false; - - if ((b->dev_addr <= a->dev_addr) && - ((b->dev_addr + b->size) >= (a->dev_addr + a->size))) - return true; - - return false; -} - -/* - * Search a given entry in the hash bucket list - */ -static struct dma_debug_entry *__hash_bucket_find(struct hash_bucket *bucket, - struct dma_debug_entry *ref, - match_fn match) -{ - struct dma_debug_entry *entry, *ret = NULL; - int matches = 0, match_lvl, last_lvl = -1; - - list_for_each_entry(entry, &bucket->list, list) { - if (!match(ref, entry)) - continue; - - /* - * Some drivers map the same physical address multiple - * times. Without a hardware IOMMU this results in the - * same device addresses being put into the dma-debug - * hash multiple times too. This can result in false - * positives being reported. Therefore we implement a - * best-fit algorithm here which returns the entry from - * the hash which fits best to the reference value - * instead of the first-fit. - */ - matches += 1; - match_lvl = 0; - entry->size == ref->size ? ++match_lvl : 0; - entry->type == ref->type ? ++match_lvl : 0; - entry->direction == ref->direction ? ++match_lvl : 0; - entry->sg_call_ents == ref->sg_call_ents ? ++match_lvl : 0; - - if (match_lvl == 4) { - /* perfect-fit - return the result */ - return entry; - } else if (match_lvl > last_lvl) { - /* - * We found an entry that fits better then the - * previous one or it is the 1st match. - */ - last_lvl = match_lvl; - ret = entry; - } - } - - /* - * If we have multiple matches but no perfect-fit, just return - * NULL. - */ - ret = (matches == 1) ? ret : NULL; - - return ret; -} - -static struct dma_debug_entry *bucket_find_exact(struct hash_bucket *bucket, - struct dma_debug_entry *ref) -{ - return __hash_bucket_find(bucket, ref, exact_match); -} - -static struct dma_debug_entry *bucket_find_contain(struct hash_bucket **bucket, - struct dma_debug_entry *ref, - unsigned long *flags) -{ - - unsigned int max_range = dma_get_max_seg_size(ref->dev); - struct dma_debug_entry *entry, index = *ref; - unsigned int range = 0; - - while (range <= max_range) { - entry = __hash_bucket_find(*bucket, ref, containing_match); - - if (entry) - return entry; - - /* - * Nothing found, go back a hash bucket - */ - put_hash_bucket(*bucket, flags); - range += (1 << HASH_FN_SHIFT); - index.dev_addr -= (1 << HASH_FN_SHIFT); - *bucket = get_hash_bucket(&index, flags); - } - - return NULL; -} - -/* - * Add an entry to a hash bucket - */ -static void hash_bucket_add(struct hash_bucket *bucket, - struct dma_debug_entry *entry) -{ - list_add_tail(&entry->list, &bucket->list); -} - -/* - * Remove entry from a hash bucket list - */ -static void hash_bucket_del(struct dma_debug_entry *entry) -{ - list_del(&entry->list); -} - -static unsigned long long phys_addr(struct dma_debug_entry *entry) -{ - if (entry->type == dma_debug_resource) - return __pfn_to_phys(entry->pfn) + entry->offset; - - return page_to_phys(pfn_to_page(entry->pfn)) + entry->offset; -} - -/* - * Dump mapping entries for debugging purposes - */ -void debug_dma_dump_mappings(struct device *dev) -{ - int idx; - - for (idx = 0; idx < HASH_SIZE; idx++) { - struct hash_bucket *bucket = &dma_entry_hash[idx]; - struct dma_debug_entry *entry; - unsigned long flags; - - spin_lock_irqsave(&bucket->lock, flags); - - list_for_each_entry(entry, &bucket->list, list) { - if (!dev || dev == entry->dev) { - dev_info(entry->dev, - "%s idx %d P=%Lx N=%lx D=%Lx L=%Lx %s %s\n", - type2name[entry->type], idx, - phys_addr(entry), entry->pfn, - entry->dev_addr, entry->size, - dir2name[entry->direction], - maperr2str[entry->map_err_type]); - } - } - - spin_unlock_irqrestore(&bucket->lock, flags); - } -} - -/* - * For each mapping (initial cacheline in the case of - * dma_alloc_coherent/dma_map_page, initial cacheline in each page of a - * scatterlist, or the cacheline specified in dma_map_single) insert - * into this tree using the cacheline as the key. At - * dma_unmap_{single|sg|page} or dma_free_coherent delete the entry. If - * the entry already exists at insertion time add a tag as a reference - * count for the overlapping mappings. For now, the overlap tracking - * just ensures that 'unmaps' balance 'maps' before marking the - * cacheline idle, but we should also be flagging overlaps as an API - * violation. - * - * Memory usage is mostly constrained by the maximum number of available - * dma-debug entries in that we need a free dma_debug_entry before - * inserting into the tree. In the case of dma_map_page and - * dma_alloc_coherent there is only one dma_debug_entry and one - * dma_active_cacheline entry to track per event. dma_map_sg(), on the - * other hand, consumes a single dma_debug_entry, but inserts 'nents' - * entries into the tree. - * - * At any time debug_dma_assert_idle() can be called to trigger a - * warning if any cachelines in the given page are in the active set. - */ -static RADIX_TREE(dma_active_cacheline, GFP_NOWAIT); -static DEFINE_SPINLOCK(radix_lock); -#define ACTIVE_CACHELINE_MAX_OVERLAP ((1 << RADIX_TREE_MAX_TAGS) - 1) -#define CACHELINE_PER_PAGE_SHIFT (PAGE_SHIFT - L1_CACHE_SHIFT) -#define CACHELINES_PER_PAGE (1 << CACHELINE_PER_PAGE_SHIFT) - -static phys_addr_t to_cacheline_number(struct dma_debug_entry *entry) -{ - return (entry->pfn << CACHELINE_PER_PAGE_SHIFT) + - (entry->offset >> L1_CACHE_SHIFT); -} - -static int active_cacheline_read_overlap(phys_addr_t cln) -{ - int overlap = 0, i; - - for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--) - if (radix_tree_tag_get(&dma_active_cacheline, cln, i)) - overlap |= 1 << i; - return overlap; -} - -static int active_cacheline_set_overlap(phys_addr_t cln, int overlap) -{ - int i; - - if (overlap > ACTIVE_CACHELINE_MAX_OVERLAP || overlap < 0) - return overlap; - - for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--) - if (overlap & 1 << i) - radix_tree_tag_set(&dma_active_cacheline, cln, i); - else - radix_tree_tag_clear(&dma_active_cacheline, cln, i); - - return overlap; -} - -static void active_cacheline_inc_overlap(phys_addr_t cln) -{ - int overlap = active_cacheline_read_overlap(cln); - - overlap = active_cacheline_set_overlap(cln, ++overlap); - - /* If we overflowed the overlap counter then we're potentially - * leaking dma-mappings. Otherwise, if maps and unmaps are - * balanced then this overflow may cause false negatives in - * debug_dma_assert_idle() as the cacheline may be marked idle - * prematurely. - */ - WARN_ONCE(overlap > ACTIVE_CACHELINE_MAX_OVERLAP, - "DMA-API: exceeded %d overlapping mappings of cacheline %pa\n", - ACTIVE_CACHELINE_MAX_OVERLAP, &cln); -} - -static int active_cacheline_dec_overlap(phys_addr_t cln) -{ - int overlap = active_cacheline_read_overlap(cln); - - return active_cacheline_set_overlap(cln, --overlap); -} - -static int active_cacheline_insert(struct dma_debug_entry *entry) -{ - phys_addr_t cln = to_cacheline_number(entry); - unsigned long flags; - int rc; - - /* If the device is not writing memory then we don't have any - * concerns about the cpu consuming stale data. This mitigates - * legitimate usages of overlapping mappings. - */ - if (entry->direction == DMA_TO_DEVICE) - return 0; - - spin_lock_irqsave(&radix_lock, flags); - rc = radix_tree_insert(&dma_active_cacheline, cln, entry); - if (rc == -EEXIST) - active_cacheline_inc_overlap(cln); - spin_unlock_irqrestore(&radix_lock, flags); - - return rc; -} - -static void active_cacheline_remove(struct dma_debug_entry *entry) -{ - phys_addr_t cln = to_cacheline_number(entry); - unsigned long flags; - - /* ...mirror the insert case */ - if (entry->direction == DMA_TO_DEVICE) - return; - - spin_lock_irqsave(&radix_lock, flags); - /* since we are counting overlaps the final put of the - * cacheline will occur when the overlap count is 0. - * active_cacheline_dec_overlap() returns -1 in that case - */ - if (active_cacheline_dec_overlap(cln) < 0) - radix_tree_delete(&dma_active_cacheline, cln); - spin_unlock_irqrestore(&radix_lock, flags); -} - -/** - * debug_dma_assert_idle() - assert that a page is not undergoing dma - * @page: page to lookup in the dma_active_cacheline tree - * - * Place a call to this routine in cases where the cpu touching the page - * before the dma completes (page is dma_unmapped) will lead to data - * corruption. - */ -void debug_dma_assert_idle(struct page *page) -{ - static struct dma_debug_entry *ents[CACHELINES_PER_PAGE]; - struct dma_debug_entry *entry = NULL; - void **results = (void **) &ents; - unsigned int nents, i; - unsigned long flags; - phys_addr_t cln; - - if (dma_debug_disabled()) - return; - - if (!page) - return; - - cln = (phys_addr_t) page_to_pfn(page) << CACHELINE_PER_PAGE_SHIFT; - spin_lock_irqsave(&radix_lock, flags); - nents = radix_tree_gang_lookup(&dma_active_cacheline, results, cln, - CACHELINES_PER_PAGE); - for (i = 0; i < nents; i++) { - phys_addr_t ent_cln = to_cacheline_number(ents[i]); - - if (ent_cln == cln) { - entry = ents[i]; - break; - } else if (ent_cln >= cln + CACHELINES_PER_PAGE) - break; - } - spin_unlock_irqrestore(&radix_lock, flags); - - if (!entry) - return; - - cln = to_cacheline_number(entry); - err_printk(entry->dev, entry, - "DMA-API: cpu touching an active dma mapped cacheline [cln=%pa]\n", - &cln); -} - -/* - * Wrapper function for adding an entry to the hash. - * This function takes care of locking itself. - */ -static void add_dma_entry(struct dma_debug_entry *entry) -{ - struct hash_bucket *bucket; - unsigned long flags; - int rc; - - bucket = get_hash_bucket(entry, &flags); - hash_bucket_add(bucket, entry); - put_hash_bucket(bucket, &flags); - - rc = active_cacheline_insert(entry); - if (rc == -ENOMEM) { - pr_err("DMA-API: cacheline tracking ENOMEM, dma-debug disabled\n"); - global_disable = true; - } - - /* TODO: report -EEXIST errors here as overlapping mappings are - * not supported by the DMA API - */ -} - -static struct dma_debug_entry *__dma_entry_alloc(void) -{ - struct dma_debug_entry *entry; - - entry = list_entry(free_entries.next, struct dma_debug_entry, list); - list_del(&entry->list); - memset(entry, 0, sizeof(*entry)); - - num_free_entries -= 1; - if (num_free_entries < min_free_entries) - min_free_entries = num_free_entries; - - return entry; -} - -/* struct dma_entry allocator - * - * The next two functions implement the allocator for - * struct dma_debug_entries. - */ -static struct dma_debug_entry *dma_entry_alloc(void) -{ - struct dma_debug_entry *entry; - unsigned long flags; - - spin_lock_irqsave(&free_entries_lock, flags); - - if (list_empty(&free_entries)) { - global_disable = true; - spin_unlock_irqrestore(&free_entries_lock, flags); - pr_err("DMA-API: debugging out of memory - disabling\n"); - return NULL; - } - - entry = __dma_entry_alloc(); - - spin_unlock_irqrestore(&free_entries_lock, flags); - -#ifdef CONFIG_STACKTRACE - entry->stacktrace.max_entries = DMA_DEBUG_STACKTRACE_ENTRIES; - entry->stacktrace.entries = entry->st_entries; - entry->stacktrace.skip = 2; - save_stack_trace(&entry->stacktrace); -#endif - - return entry; -} - -static void dma_entry_free(struct dma_debug_entry *entry) -{ - unsigned long flags; - - active_cacheline_remove(entry); - - /* - * add to beginning of the list - this way the entries are - * more likely cache hot when they are reallocated. - */ - spin_lock_irqsave(&free_entries_lock, flags); - list_add(&entry->list, &free_entries); - num_free_entries += 1; - spin_unlock_irqrestore(&free_entries_lock, flags); -} - -int dma_debug_resize_entries(u32 num_entries) -{ - int i, delta, ret = 0; - unsigned long flags; - struct dma_debug_entry *entry; - LIST_HEAD(tmp); - - spin_lock_irqsave(&free_entries_lock, flags); - - if (nr_total_entries < num_entries) { - delta = num_entries - nr_total_entries; - - spin_unlock_irqrestore(&free_entries_lock, flags); - - for (i = 0; i < delta; i++) { - entry = kzalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) - break; - - list_add_tail(&entry->list, &tmp); - } - - spin_lock_irqsave(&free_entries_lock, flags); - - list_splice(&tmp, &free_entries); - nr_total_entries += i; - num_free_entries += i; - } else { - delta = nr_total_entries - num_entries; - - for (i = 0; i < delta && !list_empty(&free_entries); i++) { - entry = __dma_entry_alloc(); - kfree(entry); - } - - nr_total_entries -= i; - } - - if (nr_total_entries != num_entries) - ret = 1; - - spin_unlock_irqrestore(&free_entries_lock, flags); - - return ret; -} - -/* - * DMA-API debugging init code - * - * The init code does two things: - * 1. Initialize core data structures - * 2. Preallocate a given number of dma_debug_entry structs - */ - -static int prealloc_memory(u32 num_entries) -{ - struct dma_debug_entry *entry, *next_entry; - int i; - - for (i = 0; i < num_entries; ++i) { - entry = kzalloc(sizeof(*entry), GFP_KERNEL); - if (!entry) - goto out_err; - - list_add_tail(&entry->list, &free_entries); - } - - num_free_entries = num_entries; - min_free_entries = num_entries; - - pr_info("DMA-API: preallocated %d debug entries\n", num_entries); - - return 0; - -out_err: - - list_for_each_entry_safe(entry, next_entry, &free_entries, list) { - list_del(&entry->list); - kfree(entry); - } - - return -ENOMEM; -} - -static ssize_t filter_read(struct file *file, char __user *user_buf, - size_t count, loff_t *ppos) -{ - char buf[NAME_MAX_LEN + 1]; - unsigned long flags; - int len; - - if (!current_driver_name[0]) - return 0; - - /* - * We can't copy to userspace directly because current_driver_name can - * only be read under the driver_name_lock with irqs disabled. So - * create a temporary copy first. - */ - read_lock_irqsave(&driver_name_lock, flags); - len = scnprintf(buf, NAME_MAX_LEN + 1, "%s\n", current_driver_name); - read_unlock_irqrestore(&driver_name_lock, flags); - - return simple_read_from_buffer(user_buf, count, ppos, buf, len); -} - -static ssize_t filter_write(struct file *file, const char __user *userbuf, - size_t count, loff_t *ppos) -{ - char buf[NAME_MAX_LEN]; - unsigned long flags; - size_t len; - int i; - - /* - * We can't copy from userspace directly. Access to - * current_driver_name is protected with a write_lock with irqs - * disabled. Since copy_from_user can fault and may sleep we - * need to copy to temporary buffer first - */ - len = min(count, (size_t)(NAME_MAX_LEN - 1)); - if (copy_from_user(buf, userbuf, len)) - return -EFAULT; - - buf[len] = 0; - - write_lock_irqsave(&driver_name_lock, flags); - - /* - * Now handle the string we got from userspace very carefully. - * The rules are: - * - only use the first token we got - * - token delimiter is everything looking like a space - * character (' ', '\n', '\t' ...) - * - */ - if (!isalnum(buf[0])) { - /* - * If the first character userspace gave us is not - * alphanumerical then assume the filter should be - * switched off. - */ - if (current_driver_name[0]) - pr_info("DMA-API: switching off dma-debug driver filter\n"); - current_driver_name[0] = 0; - current_driver = NULL; - goto out_unlock; - } - - /* - * Now parse out the first token and use it as the name for the - * driver to filter for. - */ - for (i = 0; i < NAME_MAX_LEN - 1; ++i) { - current_driver_name[i] = buf[i]; - if (isspace(buf[i]) || buf[i] == ' ' || buf[i] == 0) - break; - } - current_driver_name[i] = 0; - current_driver = NULL; - - pr_info("DMA-API: enable driver filter for driver [%s]\n", - current_driver_name); - -out_unlock: - write_unlock_irqrestore(&driver_name_lock, flags); - - return count; -} - -static const struct file_operations filter_fops = { - .read = filter_read, - .write = filter_write, - .llseek = default_llseek, -}; - -static int dma_debug_fs_init(void) -{ - dma_debug_dent = debugfs_create_dir("dma-api", NULL); - if (!dma_debug_dent) { - pr_err("DMA-API: can not create debugfs directory\n"); - return -ENOMEM; - } - - global_disable_dent = debugfs_create_bool("disabled", 0444, - dma_debug_dent, - &global_disable); - if (!global_disable_dent) - goto out_err; - - error_count_dent = debugfs_create_u32("error_count", 0444, - dma_debug_dent, &error_count); - if (!error_count_dent) - goto out_err; - - show_all_errors_dent = debugfs_create_u32("all_errors", 0644, - dma_debug_dent, - &show_all_errors); - if (!show_all_errors_dent) - goto out_err; - - show_num_errors_dent = debugfs_create_u32("num_errors", 0644, - dma_debug_dent, - &show_num_errors); - if (!show_num_errors_dent) - goto out_err; - - num_free_entries_dent = debugfs_create_u32("num_free_entries", 0444, - dma_debug_dent, - &num_free_entries); - if (!num_free_entries_dent) - goto out_err; - - min_free_entries_dent = debugfs_create_u32("min_free_entries", 0444, - dma_debug_dent, - &min_free_entries); - if (!min_free_entries_dent) - goto out_err; - - filter_dent = debugfs_create_file("driver_filter", 0644, - dma_debug_dent, NULL, &filter_fops); - if (!filter_dent) - goto out_err; - - return 0; - -out_err: - debugfs_remove_recursive(dma_debug_dent); - - return -ENOMEM; -} - -static int device_dma_allocations(struct device *dev, struct dma_debug_entry **out_entry) -{ - struct dma_debug_entry *entry; - unsigned long flags; - int count = 0, i; - - for (i = 0; i < HASH_SIZE; ++i) { - spin_lock_irqsave(&dma_entry_hash[i].lock, flags); - list_for_each_entry(entry, &dma_entry_hash[i].list, list) { - if (entry->dev == dev) { - count += 1; - *out_entry = entry; - } - } - spin_unlock_irqrestore(&dma_entry_hash[i].lock, flags); - } - - return count; -} - -static int dma_debug_device_change(struct notifier_block *nb, unsigned long action, void *data) -{ - struct device *dev = data; - struct dma_debug_entry *uninitialized_var(entry); - int count; - - if (dma_debug_disabled()) - return 0; - - switch (action) { - case BUS_NOTIFY_UNBOUND_DRIVER: - count = device_dma_allocations(dev, &entry); - if (count == 0) - break; - err_printk(dev, entry, "DMA-API: device driver has pending " - "DMA allocations while released from device " - "[count=%d]\n" - "One of leaked entries details: " - "[device address=0x%016llx] [size=%llu bytes] " - "[mapped with %s] [mapped as %s]\n", - count, entry->dev_addr, entry->size, - dir2name[entry->direction], type2name[entry->type]); - break; - default: - break; - } - - return 0; -} - -void dma_debug_add_bus(struct bus_type *bus) -{ - struct notifier_block *nb; - - if (dma_debug_disabled()) - return; - - nb = kzalloc(sizeof(struct notifier_block), GFP_KERNEL); - if (nb == NULL) { - pr_err("dma_debug_add_bus: out of memory\n"); - return; - } - - nb->notifier_call = dma_debug_device_change; - - bus_register_notifier(bus, nb); -} - -static int dma_debug_init(void) -{ - int i; - - /* Do not use dma_debug_initialized here, since we really want to be - * called to set dma_debug_initialized - */ - if (global_disable) - return 0; - - for (i = 0; i < HASH_SIZE; ++i) { - INIT_LIST_HEAD(&dma_entry_hash[i].list); - spin_lock_init(&dma_entry_hash[i].lock); - } - - if (dma_debug_fs_init() != 0) { - pr_err("DMA-API: error creating debugfs entries - disabling\n"); - global_disable = true; - - return 0; - } - - if (prealloc_memory(nr_prealloc_entries) != 0) { - pr_err("DMA-API: debugging out of memory error - disabled\n"); - global_disable = true; - - return 0; - } - - nr_total_entries = num_free_entries; - - dma_debug_initialized = true; - - pr_info("DMA-API: debugging enabled by kernel config\n"); - return 0; -} -core_initcall(dma_debug_init); - -static __init int dma_debug_cmdline(char *str) -{ - if (!str) - return -EINVAL; - - if (strncmp(str, "off", 3) == 0) { - pr_info("DMA-API: debugging disabled on kernel command line\n"); - global_disable = true; - } - - return 0; -} - -static __init int dma_debug_entries_cmdline(char *str) -{ - if (!str) - return -EINVAL; - if (!get_option(&str, &nr_prealloc_entries)) - nr_prealloc_entries = PREALLOC_DMA_DEBUG_ENTRIES; - return 0; -} - -__setup("dma_debug=", dma_debug_cmdline); -__setup("dma_debug_entries=", dma_debug_entries_cmdline); - -static void check_unmap(struct dma_debug_entry *ref) -{ - struct dma_debug_entry *entry; - struct hash_bucket *bucket; - unsigned long flags; - - bucket = get_hash_bucket(ref, &flags); - entry = bucket_find_exact(bucket, ref); - - if (!entry) { - /* must drop lock before calling dma_mapping_error */ - put_hash_bucket(bucket, &flags); - - if (dma_mapping_error(ref->dev, ref->dev_addr)) { - err_printk(ref->dev, NULL, - "DMA-API: device driver tries to free an " - "invalid DMA memory address\n"); - } else { - err_printk(ref->dev, NULL, - "DMA-API: device driver tries to free DMA " - "memory it has not allocated [device " - "address=0x%016llx] [size=%llu bytes]\n", - ref->dev_addr, ref->size); - } - return; - } - - if (ref->size != entry->size) { - err_printk(ref->dev, entry, "DMA-API: device driver frees " - "DMA memory with different size " - "[device address=0x%016llx] [map size=%llu bytes] " - "[unmap size=%llu bytes]\n", - ref->dev_addr, entry->size, ref->size); - } - - if (ref->type != entry->type) { - err_printk(ref->dev, entry, "DMA-API: device driver frees " - "DMA memory with wrong function " - "[device address=0x%016llx] [size=%llu bytes] " - "[mapped as %s] [unmapped as %s]\n", - ref->dev_addr, ref->size, - type2name[entry->type], type2name[ref->type]); - } else if ((entry->type == dma_debug_coherent) && - (phys_addr(ref) != phys_addr(entry))) { - err_printk(ref->dev, entry, "DMA-API: device driver frees " - "DMA memory with different CPU address " - "[device address=0x%016llx] [size=%llu bytes] " - "[cpu alloc address=0x%016llx] " - "[cpu free address=0x%016llx]", - ref->dev_addr, ref->size, - phys_addr(entry), - phys_addr(ref)); - } - - if (ref->sg_call_ents && ref->type == dma_debug_sg && - ref->sg_call_ents != entry->sg_call_ents) { - err_printk(ref->dev, entry, "DMA-API: device driver frees " - "DMA sg list with different entry count " - "[map count=%d] [unmap count=%d]\n", - entry->sg_call_ents, ref->sg_call_ents); - } - - /* - * This may be no bug in reality - but most implementations of the - * DMA API don't handle this properly, so check for it here - */ - if (ref->direction != entry->direction) { - err_printk(ref->dev, entry, "DMA-API: device driver frees " - "DMA memory with different direction " - "[device address=0x%016llx] [size=%llu bytes] " - "[mapped with %s] [unmapped with %s]\n", - ref->dev_addr, ref->size, - dir2name[entry->direction], - dir2name[ref->direction]); - } - - /* - * Drivers should use dma_mapping_error() to check the returned - * addresses of dma_map_single() and dma_map_page(). - * If not, print this warning message. See Documentation/DMA-API.txt. - */ - if (entry->map_err_type == MAP_ERR_NOT_CHECKED) { - err_printk(ref->dev, entry, - "DMA-API: device driver failed to check map error" - "[device address=0x%016llx] [size=%llu bytes] " - "[mapped as %s]", - ref->dev_addr, ref->size, - type2name[entry->type]); - } - - hash_bucket_del(entry); - dma_entry_free(entry); - - put_hash_bucket(bucket, &flags); -} - -static void check_for_stack(struct device *dev, - struct page *page, size_t offset) -{ - void *addr; - struct vm_struct *stack_vm_area = task_stack_vm_area(current); - - if (!stack_vm_area) { - /* Stack is direct-mapped. */ - if (PageHighMem(page)) - return; - addr = page_address(page) + offset; - if (object_is_on_stack(addr)) - err_printk(dev, NULL, "DMA-API: device driver maps memory from stack [addr=%p]\n", addr); - } else { - /* Stack is vmalloced. */ - int i; - - for (i = 0; i < stack_vm_area->nr_pages; i++) { - if (page != stack_vm_area->pages[i]) - continue; - - addr = (u8 *)current->stack + i * PAGE_SIZE + offset; - err_printk(dev, NULL, "DMA-API: device driver maps memory from stack [probable addr=%p]\n", addr); - break; - } - } -} - -static inline bool overlap(void *addr, unsigned long len, void *start, void *end) -{ - unsigned long a1 = (unsigned long)addr; - unsigned long b1 = a1 + len; - unsigned long a2 = (unsigned long)start; - unsigned long b2 = (unsigned long)end; - - return !(b1 <= a2 || a1 >= b2); -} - -static void check_for_illegal_area(struct device *dev, void *addr, unsigned long len) -{ - if (overlap(addr, len, _stext, _etext) || - overlap(addr, len, __start_rodata, __end_rodata)) - err_printk(dev, NULL, "DMA-API: device driver maps memory from kernel text or rodata [addr=%p] [len=%lu]\n", addr, len); -} - -static void check_sync(struct device *dev, - struct dma_debug_entry *ref, - bool to_cpu) -{ - struct dma_debug_entry *entry; - struct hash_bucket *bucket; - unsigned long flags; - - bucket = get_hash_bucket(ref, &flags); - - entry = bucket_find_contain(&bucket, ref, &flags); - - if (!entry) { - err_printk(dev, NULL, "DMA-API: device driver tries " - "to sync DMA memory it has not allocated " - "[device address=0x%016llx] [size=%llu bytes]\n", - (unsigned long long)ref->dev_addr, ref->size); - goto out; - } - - if (ref->size > entry->size) { - err_printk(dev, entry, "DMA-API: device driver syncs" - " DMA memory outside allocated range " - "[device address=0x%016llx] " - "[allocation size=%llu bytes] " - "[sync offset+size=%llu]\n", - entry->dev_addr, entry->size, - ref->size); - } - - if (entry->direction == DMA_BIDIRECTIONAL) - goto out; - - if (ref->direction != entry->direction) { - err_printk(dev, entry, "DMA-API: device driver syncs " - "DMA memory with different direction " - "[device address=0x%016llx] [size=%llu bytes] " - "[mapped with %s] [synced with %s]\n", - (unsigned long long)ref->dev_addr, entry->size, - dir2name[entry->direction], - dir2name[ref->direction]); - } - - if (to_cpu && !(entry->direction == DMA_FROM_DEVICE) && - !(ref->direction == DMA_TO_DEVICE)) - err_printk(dev, entry, "DMA-API: device driver syncs " - "device read-only DMA memory for cpu " - "[device address=0x%016llx] [size=%llu bytes] " - "[mapped with %s] [synced with %s]\n", - (unsigned long long)ref->dev_addr, entry->size, - dir2name[entry->direction], - dir2name[ref->direction]); - - if (!to_cpu && !(entry->direction == DMA_TO_DEVICE) && - !(ref->direction == DMA_FROM_DEVICE)) - err_printk(dev, entry, "DMA-API: device driver syncs " - "device write-only DMA memory to device " - "[device address=0x%016llx] [size=%llu bytes] " - "[mapped with %s] [synced with %s]\n", - (unsigned long long)ref->dev_addr, entry->size, - dir2name[entry->direction], - dir2name[ref->direction]); - - if (ref->sg_call_ents && ref->type == dma_debug_sg && - ref->sg_call_ents != entry->sg_call_ents) { - err_printk(ref->dev, entry, "DMA-API: device driver syncs " - "DMA sg list with different entry count " - "[map count=%d] [sync count=%d]\n", - entry->sg_call_ents, ref->sg_call_ents); - } - -out: - put_hash_bucket(bucket, &flags); -} - -static void check_sg_segment(struct device *dev, struct scatterlist *sg) -{ -#ifdef CONFIG_DMA_API_DEBUG_SG - unsigned int max_seg = dma_get_max_seg_size(dev); - u64 start, end, boundary = dma_get_seg_boundary(dev); - - /* - * Either the driver forgot to set dma_parms appropriately, or - * whoever generated the list forgot to check them. - */ - if (sg->length > max_seg) - err_printk(dev, NULL, "DMA-API: mapping sg segment longer than device claims to support [len=%u] [max=%u]\n", - sg->length, max_seg); - /* - * In some cases this could potentially be the DMA API - * implementation's fault, but it would usually imply that - * the scatterlist was built inappropriately to begin with. - */ - start = sg_dma_address(sg); - end = start + sg_dma_len(sg) - 1; - if ((start ^ end) & ~boundary) - err_printk(dev, NULL, "DMA-API: mapping sg segment across boundary [start=0x%016llx] [end=0x%016llx] [boundary=0x%016llx]\n", - start, end, boundary); -#endif -} - -void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, - size_t size, int direction, dma_addr_t dma_addr, - bool map_single) -{ - struct dma_debug_entry *entry; - - if (unlikely(dma_debug_disabled())) - return; - - if (dma_mapping_error(dev, dma_addr)) - return; - - entry = dma_entry_alloc(); - if (!entry) - return; - - entry->dev = dev; - entry->type = dma_debug_page; - entry->pfn = page_to_pfn(page); - entry->offset = offset, - entry->dev_addr = dma_addr; - entry->size = size; - entry->direction = direction; - entry->map_err_type = MAP_ERR_NOT_CHECKED; - - if (map_single) - entry->type = dma_debug_single; - - check_for_stack(dev, page, offset); - - if (!PageHighMem(page)) { - void *addr = page_address(page) + offset; - - check_for_illegal_area(dev, addr, size); - } - - add_dma_entry(entry); -} -EXPORT_SYMBOL(debug_dma_map_page); - -void debug_dma_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - struct dma_debug_entry ref; - struct dma_debug_entry *entry; - struct hash_bucket *bucket; - unsigned long flags; - - if (unlikely(dma_debug_disabled())) - return; - - ref.dev = dev; - ref.dev_addr = dma_addr; - bucket = get_hash_bucket(&ref, &flags); - - list_for_each_entry(entry, &bucket->list, list) { - if (!exact_match(&ref, entry)) - continue; - - /* - * The same physical address can be mapped multiple - * times. Without a hardware IOMMU this results in the - * same device addresses being put into the dma-debug - * hash multiple times too. This can result in false - * positives being reported. Therefore we implement a - * best-fit algorithm here which updates the first entry - * from the hash which fits the reference value and is - * not currently listed as being checked. - */ - if (entry->map_err_type == MAP_ERR_NOT_CHECKED) { - entry->map_err_type = MAP_ERR_CHECKED; - break; - } - } - - put_hash_bucket(bucket, &flags); -} -EXPORT_SYMBOL(debug_dma_mapping_error); - -void debug_dma_unmap_page(struct device *dev, dma_addr_t addr, - size_t size, int direction, bool map_single) -{ - struct dma_debug_entry ref = { - .type = dma_debug_page, - .dev = dev, - .dev_addr = addr, - .size = size, - .direction = direction, - }; - - if (unlikely(dma_debug_disabled())) - return; - - if (map_single) - ref.type = dma_debug_single; - - check_unmap(&ref); -} -EXPORT_SYMBOL(debug_dma_unmap_page); - -void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, - int nents, int mapped_ents, int direction) -{ - struct dma_debug_entry *entry; - struct scatterlist *s; - int i; - - if (unlikely(dma_debug_disabled())) - return; - - for_each_sg(sg, s, mapped_ents, i) { - entry = dma_entry_alloc(); - if (!entry) - return; - - entry->type = dma_debug_sg; - entry->dev = dev; - entry->pfn = page_to_pfn(sg_page(s)); - entry->offset = s->offset, - entry->size = sg_dma_len(s); - entry->dev_addr = sg_dma_address(s); - entry->direction = direction; - entry->sg_call_ents = nents; - entry->sg_mapped_ents = mapped_ents; - - check_for_stack(dev, sg_page(s), s->offset); - - if (!PageHighMem(sg_page(s))) { - check_for_illegal_area(dev, sg_virt(s), sg_dma_len(s)); - } - - check_sg_segment(dev, s); - - add_dma_entry(entry); - } -} -EXPORT_SYMBOL(debug_dma_map_sg); - -static int get_nr_mapped_entries(struct device *dev, - struct dma_debug_entry *ref) -{ - struct dma_debug_entry *entry; - struct hash_bucket *bucket; - unsigned long flags; - int mapped_ents; - - bucket = get_hash_bucket(ref, &flags); - entry = bucket_find_exact(bucket, ref); - mapped_ents = 0; - - if (entry) - mapped_ents = entry->sg_mapped_ents; - put_hash_bucket(bucket, &flags); - - return mapped_ents; -} - -void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, - int nelems, int dir) -{ - struct scatterlist *s; - int mapped_ents = 0, i; - - if (unlikely(dma_debug_disabled())) - return; - - for_each_sg(sglist, s, nelems, i) { - - struct dma_debug_entry ref = { - .type = dma_debug_sg, - .dev = dev, - .pfn = page_to_pfn(sg_page(s)), - .offset = s->offset, - .dev_addr = sg_dma_address(s), - .size = sg_dma_len(s), - .direction = dir, - .sg_call_ents = nelems, - }; - - if (mapped_ents && i >= mapped_ents) - break; - - if (!i) - mapped_ents = get_nr_mapped_entries(dev, &ref); - - check_unmap(&ref); - } -} -EXPORT_SYMBOL(debug_dma_unmap_sg); - -void debug_dma_alloc_coherent(struct device *dev, size_t size, - dma_addr_t dma_addr, void *virt) -{ - struct dma_debug_entry *entry; - - if (unlikely(dma_debug_disabled())) - return; - - if (unlikely(virt == NULL)) - return; - - /* handle vmalloc and linear addresses */ - if (!is_vmalloc_addr(virt) && !virt_addr_valid(virt)) - return; - - entry = dma_entry_alloc(); - if (!entry) - return; - - entry->type = dma_debug_coherent; - entry->dev = dev; - entry->offset = offset_in_page(virt); - entry->size = size; - entry->dev_addr = dma_addr; - entry->direction = DMA_BIDIRECTIONAL; - - if (is_vmalloc_addr(virt)) - entry->pfn = vmalloc_to_pfn(virt); - else - entry->pfn = page_to_pfn(virt_to_page(virt)); - - add_dma_entry(entry); -} -EXPORT_SYMBOL(debug_dma_alloc_coherent); - -void debug_dma_free_coherent(struct device *dev, size_t size, - void *virt, dma_addr_t addr) -{ - struct dma_debug_entry ref = { - .type = dma_debug_coherent, - .dev = dev, - .offset = offset_in_page(virt), - .dev_addr = addr, - .size = size, - .direction = DMA_BIDIRECTIONAL, - }; - - /* handle vmalloc and linear addresses */ - if (!is_vmalloc_addr(virt) && !virt_addr_valid(virt)) - return; - - if (is_vmalloc_addr(virt)) - ref.pfn = vmalloc_to_pfn(virt); - else - ref.pfn = page_to_pfn(virt_to_page(virt)); - - if (unlikely(dma_debug_disabled())) - return; - - check_unmap(&ref); -} -EXPORT_SYMBOL(debug_dma_free_coherent); - -void debug_dma_map_resource(struct device *dev, phys_addr_t addr, size_t size, - int direction, dma_addr_t dma_addr) -{ - struct dma_debug_entry *entry; - - if (unlikely(dma_debug_disabled())) - return; - - entry = dma_entry_alloc(); - if (!entry) - return; - - entry->type = dma_debug_resource; - entry->dev = dev; - entry->pfn = PHYS_PFN(addr); - entry->offset = offset_in_page(addr); - entry->size = size; - entry->dev_addr = dma_addr; - entry->direction = direction; - entry->map_err_type = MAP_ERR_NOT_CHECKED; - - add_dma_entry(entry); -} -EXPORT_SYMBOL(debug_dma_map_resource); - -void debug_dma_unmap_resource(struct device *dev, dma_addr_t dma_addr, - size_t size, int direction) -{ - struct dma_debug_entry ref = { - .type = dma_debug_resource, - .dev = dev, - .dev_addr = dma_addr, - .size = size, - .direction = direction, - }; - - if (unlikely(dma_debug_disabled())) - return; - - check_unmap(&ref); -} -EXPORT_SYMBOL(debug_dma_unmap_resource); - -void debug_dma_sync_single_for_cpu(struct device *dev, dma_addr_t dma_handle, - size_t size, int direction) -{ - struct dma_debug_entry ref; - - if (unlikely(dma_debug_disabled())) - return; - - ref.type = dma_debug_single; - ref.dev = dev; - ref.dev_addr = dma_handle; - ref.size = size; - ref.direction = direction; - ref.sg_call_ents = 0; - - check_sync(dev, &ref, true); -} -EXPORT_SYMBOL(debug_dma_sync_single_for_cpu); - -void debug_dma_sync_single_for_device(struct device *dev, - dma_addr_t dma_handle, size_t size, - int direction) -{ - struct dma_debug_entry ref; - - if (unlikely(dma_debug_disabled())) - return; - - ref.type = dma_debug_single; - ref.dev = dev; - ref.dev_addr = dma_handle; - ref.size = size; - ref.direction = direction; - ref.sg_call_ents = 0; - - check_sync(dev, &ref, false); -} -EXPORT_SYMBOL(debug_dma_sync_single_for_device); - -void debug_dma_sync_single_range_for_cpu(struct device *dev, - dma_addr_t dma_handle, - unsigned long offset, size_t size, - int direction) -{ - struct dma_debug_entry ref; - - if (unlikely(dma_debug_disabled())) - return; - - ref.type = dma_debug_single; - ref.dev = dev; - ref.dev_addr = dma_handle; - ref.size = offset + size; - ref.direction = direction; - ref.sg_call_ents = 0; - - check_sync(dev, &ref, true); -} -EXPORT_SYMBOL(debug_dma_sync_single_range_for_cpu); - -void debug_dma_sync_single_range_for_device(struct device *dev, - dma_addr_t dma_handle, - unsigned long offset, - size_t size, int direction) -{ - struct dma_debug_entry ref; - - if (unlikely(dma_debug_disabled())) - return; - - ref.type = dma_debug_single; - ref.dev = dev; - ref.dev_addr = dma_handle; - ref.size = offset + size; - ref.direction = direction; - ref.sg_call_ents = 0; - - check_sync(dev, &ref, false); -} -EXPORT_SYMBOL(debug_dma_sync_single_range_for_device); - -void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, - int nelems, int direction) -{ - struct scatterlist *s; - int mapped_ents = 0, i; - - if (unlikely(dma_debug_disabled())) - return; - - for_each_sg(sg, s, nelems, i) { - - struct dma_debug_entry ref = { - .type = dma_debug_sg, - .dev = dev, - .pfn = page_to_pfn(sg_page(s)), - .offset = s->offset, - .dev_addr = sg_dma_address(s), - .size = sg_dma_len(s), - .direction = direction, - .sg_call_ents = nelems, - }; - - if (!i) - mapped_ents = get_nr_mapped_entries(dev, &ref); - - if (i >= mapped_ents) - break; - - check_sync(dev, &ref, true); - } -} -EXPORT_SYMBOL(debug_dma_sync_sg_for_cpu); - -void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, - int nelems, int direction) -{ - struct scatterlist *s; - int mapped_ents = 0, i; - - if (unlikely(dma_debug_disabled())) - return; - - for_each_sg(sg, s, nelems, i) { - - struct dma_debug_entry ref = { - .type = dma_debug_sg, - .dev = dev, - .pfn = page_to_pfn(sg_page(s)), - .offset = s->offset, - .dev_addr = sg_dma_address(s), - .size = sg_dma_len(s), - .direction = direction, - .sg_call_ents = nelems, - }; - if (!i) - mapped_ents = get_nr_mapped_entries(dev, &ref); - - if (i >= mapped_ents) - break; - - check_sync(dev, &ref, false); - } -} -EXPORT_SYMBOL(debug_dma_sync_sg_for_device); - -static int __init dma_debug_driver_setup(char *str) -{ - int i; - - for (i = 0; i < NAME_MAX_LEN - 1; ++i, ++str) { - current_driver_name[i] = *str; - if (*str == 0) - break; - } - - if (current_driver_name[0]) - pr_info("DMA-API: enable driver filter for driver [%s]\n", - current_driver_name); - - - return 1; -} -__setup("dma_debug_driver=", dma_debug_driver_setup); diff --git a/lib/dma-direct.c b/lib/dma-direct.c deleted file mode 100644 index 8be8106270c2..000000000000 --- a/lib/dma-direct.c +++ /dev/null @@ -1,204 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * DMA operations that map physical memory directly without using an IOMMU or - * flushing caches. - */ -#include <linux/export.h> -#include <linux/mm.h> -#include <linux/dma-direct.h> -#include <linux/scatterlist.h> -#include <linux/dma-contiguous.h> -#include <linux/pfn.h> -#include <linux/set_memory.h> - -#define DIRECT_MAPPING_ERROR 0 - -/* - * Most architectures use ZONE_DMA for the first 16 Megabytes, but - * some use it for entirely different regions: - */ -#ifndef ARCH_ZONE_DMA_BITS -#define ARCH_ZONE_DMA_BITS 24 -#endif - -/* - * For AMD SEV all DMA must be to unencrypted addresses. - */ -static inline bool force_dma_unencrypted(void) -{ - return sev_active(); -} - -static bool -check_addr(struct device *dev, dma_addr_t dma_addr, size_t size, - const char *caller) -{ - if (unlikely(dev && !dma_capable(dev, dma_addr, size))) { - if (!dev->dma_mask) { - dev_err(dev, - "%s: call on device without dma_mask\n", - caller); - return false; - } - - if (*dev->dma_mask >= DMA_BIT_MASK(32)) { - dev_err(dev, - "%s: overflow %pad+%zu of device mask %llx\n", - caller, &dma_addr, size, *dev->dma_mask); - } - return false; - } - return true; -} - -static bool dma_coherent_ok(struct device *dev, phys_addr_t phys, size_t size) -{ - dma_addr_t addr = force_dma_unencrypted() ? - __phys_to_dma(dev, phys) : phys_to_dma(dev, phys); - return addr + size - 1 <= dev->coherent_dma_mask; -} - -void *dma_direct_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t gfp, unsigned long attrs) -{ - unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; - int page_order = get_order(size); - struct page *page = NULL; - void *ret; - - /* we always manually zero the memory once we are done: */ - gfp &= ~__GFP_ZERO; - - /* GFP_DMA32 and GFP_DMA are no ops without the corresponding zones: */ - if (dev->coherent_dma_mask <= DMA_BIT_MASK(ARCH_ZONE_DMA_BITS)) - gfp |= GFP_DMA; - if (dev->coherent_dma_mask <= DMA_BIT_MASK(32) && !(gfp & GFP_DMA)) - gfp |= GFP_DMA32; - -again: - /* CMA can be used only in the context which permits sleeping */ - if (gfpflags_allow_blocking(gfp)) { - page = dma_alloc_from_contiguous(dev, count, page_order, gfp); - if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { - dma_release_from_contiguous(dev, page, count); - page = NULL; - } - } - if (!page) - page = alloc_pages_node(dev_to_node(dev), gfp, page_order); - - if (page && !dma_coherent_ok(dev, page_to_phys(page), size)) { - __free_pages(page, page_order); - page = NULL; - - if (IS_ENABLED(CONFIG_ZONE_DMA32) && - dev->coherent_dma_mask < DMA_BIT_MASK(64) && - !(gfp & (GFP_DMA32 | GFP_DMA))) { - gfp |= GFP_DMA32; - goto again; - } - - if (IS_ENABLED(CONFIG_ZONE_DMA) && - dev->coherent_dma_mask < DMA_BIT_MASK(32) && - !(gfp & GFP_DMA)) { - gfp = (gfp & ~GFP_DMA32) | GFP_DMA; - goto again; - } - } - - if (!page) - return NULL; - ret = page_address(page); - if (force_dma_unencrypted()) { - set_memory_decrypted((unsigned long)ret, 1 << page_order); - *dma_handle = __phys_to_dma(dev, page_to_phys(page)); - } else { - *dma_handle = phys_to_dma(dev, page_to_phys(page)); - } - memset(ret, 0, size); - return ret; -} - -/* - * NOTE: this function must never look at the dma_addr argument, because we want - * to be able to use it as a helper for iommu implementations as well. - */ -void dma_direct_free(struct device *dev, size_t size, void *cpu_addr, - dma_addr_t dma_addr, unsigned long attrs) -{ - unsigned int count = PAGE_ALIGN(size) >> PAGE_SHIFT; - unsigned int page_order = get_order(size); - - if (force_dma_unencrypted()) - set_memory_encrypted((unsigned long)cpu_addr, 1 << page_order); - if (!dma_release_from_contiguous(dev, virt_to_page(cpu_addr), count)) - free_pages((unsigned long)cpu_addr, page_order); -} - -dma_addr_t dma_direct_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - dma_addr_t dma_addr = phys_to_dma(dev, page_to_phys(page)) + offset; - - if (!check_addr(dev, dma_addr, size, __func__)) - return DIRECT_MAPPING_ERROR; - return dma_addr; -} - -int dma_direct_map_sg(struct device *dev, struct scatterlist *sgl, int nents, - enum dma_data_direction dir, unsigned long attrs) -{ - int i; - struct scatterlist *sg; - - for_each_sg(sgl, sg, nents, i) { - BUG_ON(!sg_page(sg)); - - sg_dma_address(sg) = phys_to_dma(dev, sg_phys(sg)); - if (!check_addr(dev, sg_dma_address(sg), sg->length, __func__)) - return 0; - sg_dma_len(sg) = sg->length; - } - - return nents; -} - -int dma_direct_supported(struct device *dev, u64 mask) -{ -#ifdef CONFIG_ZONE_DMA - if (mask < DMA_BIT_MASK(ARCH_ZONE_DMA_BITS)) - return 0; -#else - /* - * Because 32-bit DMA masks are so common we expect every architecture - * to be able to satisfy them - either by not supporting more physical - * memory, or by providing a ZONE_DMA32. If neither is the case, the - * architecture needs to use an IOMMU instead of the direct mapping. - */ - if (mask < DMA_BIT_MASK(32)) - return 0; -#endif - /* - * Various PCI/PCIe bridges have broken support for > 32bit DMA even - * if the device itself might support it. - */ - if (dev->dma_32bit_limit && mask > DMA_BIT_MASK(32)) - return 0; - return 1; -} - -int dma_direct_mapping_error(struct device *dev, dma_addr_t dma_addr) -{ - return dma_addr == DIRECT_MAPPING_ERROR; -} - -const struct dma_map_ops dma_direct_ops = { - .alloc = dma_direct_alloc, - .free = dma_direct_free, - .map_page = dma_direct_map_page, - .map_sg = dma_direct_map_sg, - .dma_supported = dma_direct_supported, - .mapping_error = dma_direct_mapping_error, -}; -EXPORT_SYMBOL(dma_direct_ops); diff --git a/lib/dma-noncoherent.c b/lib/dma-noncoherent.c deleted file mode 100644 index 79e9a757387f..000000000000 --- a/lib/dma-noncoherent.c +++ /dev/null @@ -1,102 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Copyright (C) 2018 Christoph Hellwig. - * - * DMA operations that map physical memory directly without providing cache - * coherence. - */ -#include <linux/export.h> -#include <linux/mm.h> -#include <linux/dma-direct.h> -#include <linux/dma-noncoherent.h> -#include <linux/scatterlist.h> - -static void dma_noncoherent_sync_single_for_device(struct device *dev, - dma_addr_t addr, size_t size, enum dma_data_direction dir) -{ - arch_sync_dma_for_device(dev, dma_to_phys(dev, addr), size, dir); -} - -static void dma_noncoherent_sync_sg_for_device(struct device *dev, - struct scatterlist *sgl, int nents, enum dma_data_direction dir) -{ - struct scatterlist *sg; - int i; - - for_each_sg(sgl, sg, nents, i) - arch_sync_dma_for_device(dev, sg_phys(sg), sg->length, dir); -} - -static dma_addr_t dma_noncoherent_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - dma_addr_t addr; - - addr = dma_direct_map_page(dev, page, offset, size, dir, attrs); - if (!dma_mapping_error(dev, addr) && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - arch_sync_dma_for_device(dev, page_to_phys(page) + offset, - size, dir); - return addr; -} - -static int dma_noncoherent_map_sg(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir, unsigned long attrs) -{ - nents = dma_direct_map_sg(dev, sgl, nents, dir, attrs); - if (nents > 0 && !(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - dma_noncoherent_sync_sg_for_device(dev, sgl, nents, dir); - return nents; -} - -#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU -static void dma_noncoherent_sync_single_for_cpu(struct device *dev, - dma_addr_t addr, size_t size, enum dma_data_direction dir) -{ - arch_sync_dma_for_cpu(dev, dma_to_phys(dev, addr), size, dir); -} - -static void dma_noncoherent_sync_sg_for_cpu(struct device *dev, - struct scatterlist *sgl, int nents, enum dma_data_direction dir) -{ - struct scatterlist *sg; - int i; - - for_each_sg(sgl, sg, nents, i) - arch_sync_dma_for_cpu(dev, sg_phys(sg), sg->length, dir); -} - -static void dma_noncoherent_unmap_page(struct device *dev, dma_addr_t addr, - size_t size, enum dma_data_direction dir, unsigned long attrs) -{ - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - dma_noncoherent_sync_single_for_cpu(dev, addr, size, dir); -} - -static void dma_noncoherent_unmap_sg(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir, unsigned long attrs) -{ - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC)) - dma_noncoherent_sync_sg_for_cpu(dev, sgl, nents, dir); -} -#endif - -const struct dma_map_ops dma_noncoherent_ops = { - .alloc = arch_dma_alloc, - .free = arch_dma_free, - .mmap = arch_dma_mmap, - .sync_single_for_device = dma_noncoherent_sync_single_for_device, - .sync_sg_for_device = dma_noncoherent_sync_sg_for_device, - .map_page = dma_noncoherent_map_page, - .map_sg = dma_noncoherent_map_sg, -#ifdef CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU - .sync_single_for_cpu = dma_noncoherent_sync_single_for_cpu, - .sync_sg_for_cpu = dma_noncoherent_sync_sg_for_cpu, - .unmap_page = dma_noncoherent_unmap_page, - .unmap_sg = dma_noncoherent_unmap_sg, -#endif - .dma_supported = dma_direct_supported, - .mapping_error = dma_direct_mapping_error, - .cache_sync = arch_dma_cache_sync, -}; -EXPORT_SYMBOL(dma_noncoherent_ops); diff --git a/lib/dma-virt.c b/lib/dma-virt.c deleted file mode 100644 index 8e61a02ef9ca..000000000000 --- a/lib/dma-virt.c +++ /dev/null @@ -1,61 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * lib/dma-virt.c - * - * DMA operations that map to virtual addresses without flushing memory. - */ -#include <linux/export.h> -#include <linux/mm.h> -#include <linux/dma-mapping.h> -#include <linux/scatterlist.h> - -static void *dma_virt_alloc(struct device *dev, size_t size, - dma_addr_t *dma_handle, gfp_t gfp, - unsigned long attrs) -{ - void *ret; - - ret = (void *)__get_free_pages(gfp, get_order(size)); - if (ret) - *dma_handle = (uintptr_t)ret; - return ret; -} - -static void dma_virt_free(struct device *dev, size_t size, - void *cpu_addr, dma_addr_t dma_addr, - unsigned long attrs) -{ - free_pages((unsigned long)cpu_addr, get_order(size)); -} - -static dma_addr_t dma_virt_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, - unsigned long attrs) -{ - return (uintptr_t)(page_address(page) + offset); -} - -static int dma_virt_map_sg(struct device *dev, struct scatterlist *sgl, - int nents, enum dma_data_direction dir, - unsigned long attrs) -{ - int i; - struct scatterlist *sg; - - for_each_sg(sgl, sg, nents, i) { - BUG_ON(!sg_page(sg)); - sg_dma_address(sg) = (uintptr_t)sg_virt(sg); - sg_dma_len(sg) = sg->length; - } - - return nents; -} - -const struct dma_map_ops dma_virt_ops = { - .alloc = dma_virt_alloc, - .free = dma_virt_free, - .map_page = dma_virt_map_page, - .map_sg = dma_virt_map_sg, -}; -EXPORT_SYMBOL(dma_virt_ops); diff --git a/lib/gen_crc32table.c b/lib/gen_crc32table.c index 8f26660ea10a..f755b997b967 100644 --- a/lib/gen_crc32table.c +++ b/lib/gen_crc32table.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <stdio.h> +#include "../include/linux/crc32poly.h" #include "../include/generated/autoconf.h" #include "crc32defs.h" #include <inttypes.h> @@ -57,7 +58,7 @@ static void crc32init_le_generic(const uint32_t polynomial, static void crc32init_le(void) { - crc32init_le_generic(CRCPOLY_LE, crc32table_le); + crc32init_le_generic(CRC32_POLY_LE, crc32table_le); } static void crc32cinit_le(void) @@ -76,7 +77,7 @@ static void crc32init_be(void) crc32table_be[0][0] = 0; for (i = 1; i < BE_TABLE_SIZE; i <<= 1) { - crc = (crc << 1) ^ ((crc & 0x80000000) ? CRCPOLY_BE : 0); + crc = (crc << 1) ^ ((crc & 0x80000000) ? CRC32_POLY_BE : 0); for (j = 0; j < i; j++) crc32table_be[0][i + j] = crc ^ crc32table_be[0][j]; } diff --git a/lib/ioremap.c b/lib/ioremap.c index 54e5bbaa3200..517f5853ffed 100644 --- a/lib/ioremap.c +++ b/lib/ioremap.c @@ -92,7 +92,7 @@ static inline int ioremap_pmd_range(pud_t *pud, unsigned long addr, if (ioremap_pmd_enabled() && ((next - addr) == PMD_SIZE) && IS_ALIGNED(phys_addr + addr, PMD_SIZE) && - pmd_free_pte_page(pmd)) { + pmd_free_pte_page(pmd, addr)) { if (pmd_set_huge(pmd, phys_addr + addr, prot)) continue; } @@ -119,7 +119,7 @@ static inline int ioremap_pud_range(p4d_t *p4d, unsigned long addr, if (ioremap_pud_enabled() && ((next - addr) == PUD_SIZE) && IS_ALIGNED(phys_addr + addr, PUD_SIZE) && - pud_free_pmd_page(pud)) { + pud_free_pmd_page(pud, addr)) { if (pud_set_huge(pud, phys_addr + addr, prot)) continue; } diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 7e43cd54c84c..8be175df3075 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -596,15 +596,70 @@ static unsigned long memcpy_mcsafe_to_page(struct page *page, size_t offset, return ret; } +static size_t copy_pipe_to_iter_mcsafe(const void *addr, size_t bytes, + struct iov_iter *i) +{ + struct pipe_inode_info *pipe = i->pipe; + size_t n, off, xfer = 0; + int idx; + + if (!sanity(i)) + return 0; + + bytes = n = push_pipe(i, bytes, &idx, &off); + if (unlikely(!n)) + return 0; + for ( ; n; idx = next_idx(idx, pipe), off = 0) { + size_t chunk = min_t(size_t, n, PAGE_SIZE - off); + unsigned long rem; + + rem = memcpy_mcsafe_to_page(pipe->bufs[idx].page, off, addr, + chunk); + i->idx = idx; + i->iov_offset = off + chunk - rem; + xfer += chunk - rem; + if (rem) + break; + n -= chunk; + addr += chunk; + } + i->count -= xfer; + return xfer; +} + +/** + * _copy_to_iter_mcsafe - copy to user with source-read error exception handling + * @addr: source kernel address + * @bytes: total transfer length + * @iter: destination iterator + * + * The pmem driver arranges for filesystem-dax to use this facility via + * dax_copy_to_iter() for protecting read/write to persistent memory. + * Unless / until an architecture can guarantee identical performance + * between _copy_to_iter_mcsafe() and _copy_to_iter() it would be a + * performance regression to switch more users to the mcsafe version. + * + * Otherwise, the main differences between this and typical _copy_to_iter(). + * + * * Typical tail/residue handling after a fault retries the copy + * byte-by-byte until the fault happens again. Re-triggering machine + * checks is potentially fatal so the implementation uses source + * alignment and poison alignment assumptions to avoid re-triggering + * hardware exceptions. + * + * * ITER_KVEC, ITER_PIPE, and ITER_BVEC can return short copies. + * Compare to copy_to_iter() where only ITER_IOVEC attempts might return + * a short copy. + * + * See MCSAFE_TEST for self-test. + */ size_t _copy_to_iter_mcsafe(const void *addr, size_t bytes, struct iov_iter *i) { const char *from = addr; unsigned long rem, curr_addr, s_addr = (unsigned long) addr; - if (unlikely(i->type & ITER_PIPE)) { - WARN_ON(1); - return 0; - } + if (unlikely(i->type & ITER_PIPE)) + return copy_pipe_to_iter_mcsafe(addr, bytes, i); if (iter_is_iovec(i)) might_fault(); iterate_and_advance(i, bytes, v, @@ -701,6 +756,20 @@ size_t _copy_from_iter_nocache(void *addr, size_t bytes, struct iov_iter *i) EXPORT_SYMBOL(_copy_from_iter_nocache); #ifdef CONFIG_ARCH_HAS_UACCESS_FLUSHCACHE +/** + * _copy_from_iter_flushcache - write destination through cpu cache + * @addr: destination kernel address + * @bytes: total transfer length + * @iter: source iterator + * + * The pmem driver arranges for filesystem-dax to use this facility via + * dax_copy_from_iter() for ensuring that writes to persistent memory + * are flushed through the CPU cache. It is differentiated from + * _copy_from_iter_nocache() in that guarantees all data is flushed for + * all iterator types. The _copy_from_iter_nocache() only attempts to + * bypass the cache for the ITER_IOVEC case, and on some archs may use + * instructions that strand dirty-data in the cache. + */ size_t _copy_from_iter_flushcache(void *addr, size_t bytes, struct iov_iter *i) { char *to = addr; diff --git a/lib/klist.c b/lib/klist.c index 0507fa5d84c5..f6b547812fe3 100644 --- a/lib/klist.c +++ b/lib/klist.c @@ -336,8 +336,9 @@ struct klist_node *klist_prev(struct klist_iter *i) void (*put)(struct klist_node *) = i->i_klist->put; struct klist_node *last = i->i_cur; struct klist_node *prev; + unsigned long flags; - spin_lock(&i->i_klist->k_lock); + spin_lock_irqsave(&i->i_klist->k_lock, flags); if (last) { prev = to_klist_node(last->n_node.prev); @@ -356,7 +357,7 @@ struct klist_node *klist_prev(struct klist_iter *i) prev = to_klist_node(prev->n_node.prev); } - spin_unlock(&i->i_klist->k_lock); + spin_unlock_irqrestore(&i->i_klist->k_lock, flags); if (put && last) put(last); @@ -377,8 +378,9 @@ struct klist_node *klist_next(struct klist_iter *i) void (*put)(struct klist_node *) = i->i_klist->put; struct klist_node *last = i->i_cur; struct klist_node *next; + unsigned long flags; - spin_lock(&i->i_klist->k_lock); + spin_lock_irqsave(&i->i_klist->k_lock, flags); if (last) { next = to_klist_node(last->n_node.next); @@ -397,7 +399,7 @@ struct klist_node *klist_next(struct klist_iter *i) next = to_klist_node(next->n_node.next); } - spin_unlock(&i->i_klist->k_lock); + spin_unlock_irqrestore(&i->i_klist->k_lock, flags); if (put && last) put(last); diff --git a/lib/kobject.c b/lib/kobject.c index 18989b5b3b56..97d86dc17c42 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -35,6 +35,25 @@ const void *kobject_namespace(struct kobject *kobj) return kobj->ktype->namespace(kobj); } +/** + * kobject_get_ownership - get sysfs ownership data for @kobj + * @kobj: kobject in question + * @uid: kernel user ID for sysfs objects + * @gid: kernel group ID for sysfs objects + * + * Returns initial uid/gid pair that should be used when creating sysfs + * representation of given kobject. Normally used to adjust ownership of + * objects in a container. + */ +void kobject_get_ownership(struct kobject *kobj, kuid_t *uid, kgid_t *gid) +{ + *uid = GLOBAL_ROOT_UID; + *gid = GLOBAL_ROOT_GID; + + if (kobj->ktype->get_ownership) + kobj->ktype->get_ownership(kobj, uid, gid); +} + /* * populate_dir - populate directory with attributes. * @kobj: object we're working on. @@ -125,7 +144,7 @@ static void fill_kobj_path(struct kobject *kobj, char *path, int length) int cur = strlen(kobject_name(parent)); /* back up enough to print this name with '/' */ length -= cur; - strncpy(path + length, kobject_name(parent), cur); + memcpy(path + length, kobject_name(parent), cur); *(path + --length) = '/'; } @@ -868,9 +887,16 @@ static void kset_release(struct kobject *kobj) kfree(kset); } +void kset_get_ownership(struct kobject *kobj, kuid_t *uid, kgid_t *gid) +{ + if (kobj->parent) + kobject_get_ownership(kobj->parent, uid, gid); +} + static struct kobj_type kset_ktype = { .sysfs_ops = &kobj_sysfs_ops, - .release = kset_release, + .release = kset_release, + .get_ownership = kset_get_ownership, }; /** diff --git a/lib/locking-selftest.c b/lib/locking-selftest.c index b5c1293ce147..1e1bbf171eca 100644 --- a/lib/locking-selftest.c +++ b/lib/locking-selftest.c @@ -29,7 +29,7 @@ */ static unsigned int debug_locks_verbose; -static DEFINE_WW_CLASS(ww_lockdep); +static DEFINE_WD_CLASS(ww_lockdep); static int __init setup_debug_locks_verbose(char *str) { diff --git a/lib/mpi/mpi-pow.c b/lib/mpi/mpi-pow.c index 468fb7cd1221..a5c921e6d667 100644 --- a/lib/mpi/mpi-pow.c +++ b/lib/mpi/mpi-pow.c @@ -41,7 +41,7 @@ int mpi_powm(MPI res, MPI base, MPI exp, MPI mod) mpi_ptr_t tspace = NULL; mpi_ptr_t rp, ep, mp, bp; mpi_size_t esize, msize, bsize, rsize; - int esign, msign, bsign, rsign; + int msign, bsign, rsign; mpi_size_t size; int mod_shift_cnt; int negative_result; @@ -53,7 +53,6 @@ int mpi_powm(MPI res, MPI base, MPI exp, MPI mod) esize = exp->nlimbs; msize = mod->nlimbs; size = 2 * msize; - esign = exp->sign; msign = mod->sign; rp = res->d; diff --git a/lib/nlattr.c b/lib/nlattr.c index dfa55c873c13..e335bcafa9e4 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -253,8 +253,8 @@ int nla_parse(struct nlattr **tb, int maxtype, const struct nlattr *head, if (policy) { err = validate_nla(nla, maxtype, policy); if (err < 0) { - if (extack) - extack->bad_attr = nla; + NL_SET_ERR_MSG_ATTR(extack, nla, + "Attribute failed policy validation"); goto errout; } } diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c index 61a6b5aab07e..15ca78e1c7d4 100644 --- a/lib/nmi_backtrace.c +++ b/lib/nmi_backtrace.c @@ -87,11 +87,9 @@ void nmi_trigger_cpumask_backtrace(const cpumask_t *mask, bool nmi_cpu_backtrace(struct pt_regs *regs) { - static arch_spinlock_t lock = __ARCH_SPIN_LOCK_UNLOCKED; int cpu = smp_processor_id(); if (cpumask_test_cpu(cpu, to_cpumask(backtrace_mask))) { - arch_spin_lock(&lock); if (regs && cpu_in_idle(instruction_pointer(regs))) { pr_warn("NMI backtrace for cpu %d skipped: idling at %pS\n", cpu, (void *)instruction_pointer(regs)); @@ -102,7 +100,6 @@ bool nmi_cpu_backtrace(struct pt_regs *regs) else dump_stack(); } - arch_spin_unlock(&lock); cpumask_clear_cpu(cpu, to_cpumask(backtrace_mask)); return true; } diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c deleted file mode 100644 index 9bbd9c5d375a..000000000000 --- a/lib/percpu_ida.c +++ /dev/null @@ -1,370 +0,0 @@ -/* - * Percpu IDA library - * - * Copyright (C) 2013 Datera, Inc. Kent Overstreet - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation; either version 2, or (at - * your option) any later version. - * - * This program is distributed in the hope that it will be useful, but - * WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU - * General Public License for more details. - */ - -#include <linux/mm.h> -#include <linux/bitmap.h> -#include <linux/bitops.h> -#include <linux/bug.h> -#include <linux/err.h> -#include <linux/export.h> -#include <linux/init.h> -#include <linux/kernel.h> -#include <linux/percpu.h> -#include <linux/sched/signal.h> -#include <linux/string.h> -#include <linux/spinlock.h> -#include <linux/percpu_ida.h> - -struct percpu_ida_cpu { - /* - * Even though this is percpu, we need a lock for tag stealing by remote - * CPUs: - */ - spinlock_t lock; - - /* nr_free/freelist form a stack of free IDs */ - unsigned nr_free; - unsigned freelist[]; -}; - -static inline void move_tags(unsigned *dst, unsigned *dst_nr, - unsigned *src, unsigned *src_nr, - unsigned nr) -{ - *src_nr -= nr; - memcpy(dst + *dst_nr, src + *src_nr, sizeof(unsigned) * nr); - *dst_nr += nr; -} - -/* - * Try to steal tags from a remote cpu's percpu freelist. - * - * We first check how many percpu freelists have tags - * - * Then we iterate through the cpus until we find some tags - we don't attempt - * to find the "best" cpu to steal from, to keep cacheline bouncing to a - * minimum. - */ -static inline void steal_tags(struct percpu_ida *pool, - struct percpu_ida_cpu *tags) -{ - unsigned cpus_have_tags, cpu = pool->cpu_last_stolen; - struct percpu_ida_cpu *remote; - - for (cpus_have_tags = cpumask_weight(&pool->cpus_have_tags); - cpus_have_tags; cpus_have_tags--) { - cpu = cpumask_next(cpu, &pool->cpus_have_tags); - - if (cpu >= nr_cpu_ids) { - cpu = cpumask_first(&pool->cpus_have_tags); - if (cpu >= nr_cpu_ids) - BUG(); - } - - pool->cpu_last_stolen = cpu; - remote = per_cpu_ptr(pool->tag_cpu, cpu); - - cpumask_clear_cpu(cpu, &pool->cpus_have_tags); - - if (remote == tags) - continue; - - spin_lock(&remote->lock); - - if (remote->nr_free) { - memcpy(tags->freelist, - remote->freelist, - sizeof(unsigned) * remote->nr_free); - - tags->nr_free = remote->nr_free; - remote->nr_free = 0; - } - - spin_unlock(&remote->lock); - - if (tags->nr_free) - break; - } -} - -/* - * Pop up to IDA_PCPU_BATCH_MOVE IDs off the global freelist, and push them onto - * our percpu freelist: - */ -static inline void alloc_global_tags(struct percpu_ida *pool, - struct percpu_ida_cpu *tags) -{ - move_tags(tags->freelist, &tags->nr_free, - pool->freelist, &pool->nr_free, - min(pool->nr_free, pool->percpu_batch_size)); -} - -/** - * percpu_ida_alloc - allocate a tag - * @pool: pool to allocate from - * @state: task state for prepare_to_wait - * - * Returns a tag - an integer in the range [0..nr_tags) (passed to - * tag_pool_init()), or otherwise -ENOSPC on allocation failure. - * - * Safe to be called from interrupt context (assuming it isn't passed - * TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, of course). - * - * @gfp indicates whether or not to wait until a free id is available (it's not - * used for internal memory allocations); thus if passed __GFP_RECLAIM we may sleep - * however long it takes until another thread frees an id (same semantics as a - * mempool). - * - * Will not fail if passed TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE. - */ -int percpu_ida_alloc(struct percpu_ida *pool, int state) -{ - DEFINE_WAIT(wait); - struct percpu_ida_cpu *tags; - unsigned long flags; - int tag = -ENOSPC; - - tags = raw_cpu_ptr(pool->tag_cpu); - spin_lock_irqsave(&tags->lock, flags); - - /* Fastpath */ - if (likely(tags->nr_free >= 0)) { - tag = tags->freelist[--tags->nr_free]; - spin_unlock_irqrestore(&tags->lock, flags); - return tag; - } - spin_unlock_irqrestore(&tags->lock, flags); - - while (1) { - spin_lock_irqsave(&pool->lock, flags); - tags = this_cpu_ptr(pool->tag_cpu); - - /* - * prepare_to_wait() must come before steal_tags(), in case - * percpu_ida_free() on another cpu flips a bit in - * cpus_have_tags - * - * global lock held and irqs disabled, don't need percpu lock - */ - if (state != TASK_RUNNING) - prepare_to_wait(&pool->wait, &wait, state); - - if (!tags->nr_free) - alloc_global_tags(pool, tags); - if (!tags->nr_free) - steal_tags(pool, tags); - - if (tags->nr_free) { - tag = tags->freelist[--tags->nr_free]; - if (tags->nr_free) - cpumask_set_cpu(smp_processor_id(), - &pool->cpus_have_tags); - } - - spin_unlock_irqrestore(&pool->lock, flags); - - if (tag >= 0 || state == TASK_RUNNING) - break; - - if (signal_pending_state(state, current)) { - tag = -ERESTARTSYS; - break; - } - - schedule(); - } - if (state != TASK_RUNNING) - finish_wait(&pool->wait, &wait); - - return tag; -} -EXPORT_SYMBOL_GPL(percpu_ida_alloc); - -/** - * percpu_ida_free - free a tag - * @pool: pool @tag was allocated from - * @tag: a tag previously allocated with percpu_ida_alloc() - * - * Safe to be called from interrupt context. - */ -void percpu_ida_free(struct percpu_ida *pool, unsigned tag) -{ - struct percpu_ida_cpu *tags; - unsigned long flags; - unsigned nr_free; - - BUG_ON(tag >= pool->nr_tags); - - tags = raw_cpu_ptr(pool->tag_cpu); - - spin_lock_irqsave(&tags->lock, flags); - tags->freelist[tags->nr_free++] = tag; - - nr_free = tags->nr_free; - - if (nr_free == 1) { - cpumask_set_cpu(smp_processor_id(), - &pool->cpus_have_tags); - wake_up(&pool->wait); - } - spin_unlock_irqrestore(&tags->lock, flags); - - if (nr_free == pool->percpu_max_size) { - spin_lock_irqsave(&pool->lock, flags); - spin_lock(&tags->lock); - - if (tags->nr_free == pool->percpu_max_size) { - move_tags(pool->freelist, &pool->nr_free, - tags->freelist, &tags->nr_free, - pool->percpu_batch_size); - - wake_up(&pool->wait); - } - spin_unlock(&tags->lock); - spin_unlock_irqrestore(&pool->lock, flags); - } -} -EXPORT_SYMBOL_GPL(percpu_ida_free); - -/** - * percpu_ida_destroy - release a tag pool's resources - * @pool: pool to free - * - * Frees the resources allocated by percpu_ida_init(). - */ -void percpu_ida_destroy(struct percpu_ida *pool) -{ - free_percpu(pool->tag_cpu); - free_pages((unsigned long) pool->freelist, - get_order(pool->nr_tags * sizeof(unsigned))); -} -EXPORT_SYMBOL_GPL(percpu_ida_destroy); - -/** - * percpu_ida_init - initialize a percpu tag pool - * @pool: pool to initialize - * @nr_tags: number of tags that will be available for allocation - * - * Initializes @pool so that it can be used to allocate tags - integers in the - * range [0, nr_tags). Typically, they'll be used by driver code to refer to a - * preallocated array of tag structures. - * - * Allocation is percpu, but sharding is limited by nr_tags - for best - * performance, the workload should not span more cpus than nr_tags / 128. - */ -int __percpu_ida_init(struct percpu_ida *pool, unsigned long nr_tags, - unsigned long max_size, unsigned long batch_size) -{ - unsigned i, cpu, order; - - memset(pool, 0, sizeof(*pool)); - - init_waitqueue_head(&pool->wait); - spin_lock_init(&pool->lock); - pool->nr_tags = nr_tags; - pool->percpu_max_size = max_size; - pool->percpu_batch_size = batch_size; - - /* Guard against overflow */ - if (nr_tags > (unsigned) INT_MAX + 1) { - pr_err("percpu_ida_init(): nr_tags too large\n"); - return -EINVAL; - } - - order = get_order(nr_tags * sizeof(unsigned)); - pool->freelist = (void *) __get_free_pages(GFP_KERNEL, order); - if (!pool->freelist) - return -ENOMEM; - - for (i = 0; i < nr_tags; i++) - pool->freelist[i] = i; - - pool->nr_free = nr_tags; - - pool->tag_cpu = __alloc_percpu(sizeof(struct percpu_ida_cpu) + - pool->percpu_max_size * sizeof(unsigned), - sizeof(unsigned)); - if (!pool->tag_cpu) - goto err; - - for_each_possible_cpu(cpu) - spin_lock_init(&per_cpu_ptr(pool->tag_cpu, cpu)->lock); - - return 0; -err: - percpu_ida_destroy(pool); - return -ENOMEM; -} -EXPORT_SYMBOL_GPL(__percpu_ida_init); - -/** - * percpu_ida_for_each_free - iterate free ids of a pool - * @pool: pool to iterate - * @fn: interate callback function - * @data: parameter for @fn - * - * Note, this doesn't guarantee to iterate all free ids restrictly. Some free - * ids might be missed, some might be iterated duplicated, and some might - * be iterated and not free soon. - */ -int percpu_ida_for_each_free(struct percpu_ida *pool, percpu_ida_cb fn, - void *data) -{ - unsigned long flags; - struct percpu_ida_cpu *remote; - unsigned cpu, i, err = 0; - - for_each_possible_cpu(cpu) { - remote = per_cpu_ptr(pool->tag_cpu, cpu); - spin_lock_irqsave(&remote->lock, flags); - for (i = 0; i < remote->nr_free; i++) { - err = fn(remote->freelist[i], data); - if (err) - break; - } - spin_unlock_irqrestore(&remote->lock, flags); - if (err) - goto out; - } - - spin_lock_irqsave(&pool->lock, flags); - for (i = 0; i < pool->nr_free; i++) { - err = fn(pool->freelist[i], data); - if (err) - break; - } - spin_unlock_irqrestore(&pool->lock, flags); -out: - return err; -} -EXPORT_SYMBOL_GPL(percpu_ida_for_each_free); - -/** - * percpu_ida_free_tags - return free tags number of a specific cpu or global pool - * @pool: pool related - * @cpu: specific cpu or global pool if @cpu == nr_cpu_ids - * - * Note: this just returns a snapshot of free tags number. - */ -unsigned percpu_ida_free_tags(struct percpu_ida *pool, int cpu) -{ - struct percpu_ida_cpu *remote; - if (cpu == nr_cpu_ids) - return pool->nr_free; - remote = per_cpu_ptr(pool->tag_cpu, cpu); - return remote->nr_free; -} -EXPORT_SYMBOL_GPL(percpu_ida_free_tags); diff --git a/lib/raid6/s390vx.uc b/lib/raid6/s390vx.uc index 140fa8bb5c23..914ebe98fc21 100644 --- a/lib/raid6/s390vx.uc +++ b/lib/raid6/s390vx.uc @@ -55,22 +55,24 @@ static inline void XOR(int x, int y, int z) asm volatile ("VX %0,%1,%2" : : "i" (x), "i" (y), "i" (z)); } -static inline void LOAD_DATA(int x, int n, u8 *ptr) +static inline void LOAD_DATA(int x, u8 *ptr) { - typedef struct { u8 _[16*n]; } addrtype; + typedef struct { u8 _[16 * $#]; } addrtype; register addrtype *__ptr asm("1") = (addrtype *) ptr; asm volatile ("VLM %2,%3,0,%r1" - : : "m" (*__ptr), "a" (__ptr), "i" (x), "i" (x + n - 1)); + : : "m" (*__ptr), "a" (__ptr), "i" (x), + "i" (x + $# - 1)); } -static inline void STORE_DATA(int x, int n, u8 *ptr) +static inline void STORE_DATA(int x, u8 *ptr) { - typedef struct { u8 _[16*n]; } addrtype; + typedef struct { u8 _[16 * $#]; } addrtype; register addrtype *__ptr asm("1") = (addrtype *) ptr; asm volatile ("VSTM %2,%3,0,1" - : "=m" (*__ptr) : "a" (__ptr), "i" (x), "i" (x + n - 1)); + : "=m" (*__ptr) : "a" (__ptr), "i" (x), + "i" (x + $# - 1)); } static inline void COPY_VEC(int x, int y) @@ -93,19 +95,19 @@ static void raid6_s390vx$#_gen_syndrome(int disks, size_t bytes, void **ptrs) q = dptr[z0 + 2]; /* RS syndrome */ for (d = 0; d < bytes; d += $#*NSIZE) { - LOAD_DATA(0,$#,&dptr[z0][d]); + LOAD_DATA(0,&dptr[z0][d]); COPY_VEC(8+$$,0+$$); for (z = z0 - 1; z >= 0; z--) { MASK(16+$$,8+$$); AND(16+$$,16+$$,25); SHLBYTE(8+$$,8+$$); XOR(8+$$,8+$$,16+$$); - LOAD_DATA(16,$#,&dptr[z][d]); + LOAD_DATA(16,&dptr[z][d]); XOR(0+$$,0+$$,16+$$); XOR(8+$$,8+$$,16+$$); } - STORE_DATA(0,$#,&p[d]); - STORE_DATA(8,$#,&q[d]); + STORE_DATA(0,&p[d]); + STORE_DATA(8,&q[d]); } kernel_fpu_end(&vxstate, KERNEL_VXR); } @@ -127,14 +129,14 @@ static void raid6_s390vx$#_xor_syndrome(int disks, int start, int stop, for (d = 0; d < bytes; d += $#*NSIZE) { /* P/Q data pages */ - LOAD_DATA(0,$#,&dptr[z0][d]); + LOAD_DATA(0,&dptr[z0][d]); COPY_VEC(8+$$,0+$$); for (z = z0 - 1; z >= start; z--) { MASK(16+$$,8+$$); AND(16+$$,16+$$,25); SHLBYTE(8+$$,8+$$); XOR(8+$$,8+$$,16+$$); - LOAD_DATA(16,$#,&dptr[z][d]); + LOAD_DATA(16,&dptr[z][d]); XOR(0+$$,0+$$,16+$$); XOR(8+$$,8+$$,16+$$); } @@ -145,12 +147,12 @@ static void raid6_s390vx$#_xor_syndrome(int disks, int start, int stop, SHLBYTE(8+$$,8+$$); XOR(8+$$,8+$$,16+$$); } - LOAD_DATA(16,$#,&p[d]); + LOAD_DATA(16,&p[d]); XOR(16+$$,16+$$,0+$$); - STORE_DATA(16,$#,&p[d]); - LOAD_DATA(16,$#,&q[d]); + STORE_DATA(16,&p[d]); + LOAD_DATA(16,&q[d]); XOR(16+$$,16+$$,8+$$); - STORE_DATA(16,$#,&q[d]); + STORE_DATA(16,&q[d]); } kernel_fpu_end(&vxstate, KERNEL_VXR); } diff --git a/lib/reciprocal_div.c b/lib/reciprocal_div.c index fcb4ce682c6f..bf043258fa00 100644 --- a/lib/reciprocal_div.c +++ b/lib/reciprocal_div.c @@ -1,4 +1,5 @@ // SPDX-License-Identifier: GPL-2.0 +#include <linux/bug.h> #include <linux/kernel.h> #include <asm/div64.h> #include <linux/reciprocal_div.h> @@ -26,3 +27,43 @@ struct reciprocal_value reciprocal_value(u32 d) return R; } EXPORT_SYMBOL(reciprocal_value); + +struct reciprocal_value_adv reciprocal_value_adv(u32 d, u8 prec) +{ + struct reciprocal_value_adv R; + u32 l, post_shift; + u64 mhigh, mlow; + + /* ceil(log2(d)) */ + l = fls(d - 1); + /* NOTE: mlow/mhigh could overflow u64 when l == 32. This case needs to + * be handled before calling "reciprocal_value_adv", please see the + * comment at include/linux/reciprocal_div.h. + */ + WARN(l == 32, + "ceil(log2(0x%08x)) == 32, %s doesn't support such divisor", + d, __func__); + post_shift = l; + mlow = 1ULL << (32 + l); + do_div(mlow, d); + mhigh = (1ULL << (32 + l)) + (1ULL << (32 + l - prec)); + do_div(mhigh, d); + + for (; post_shift > 0; post_shift--) { + u64 lo = mlow >> 1, hi = mhigh >> 1; + + if (lo >= hi) + break; + + mlow = lo; + mhigh = hi; + } + + R.m = (u32)mhigh; + R.sh = post_shift; + R.exp = l; + R.is_wide_m = mhigh > U32_MAX; + + return R; +} +EXPORT_SYMBOL(reciprocal_value_adv); diff --git a/lib/reed_solomon/reed_solomon.c b/lib/reed_solomon/reed_solomon.c index d8bb1a1eba72..e5fdc8b9e856 100644 --- a/lib/reed_solomon/reed_solomon.c +++ b/lib/reed_solomon/reed_solomon.c @@ -283,7 +283,7 @@ out: * in index form * @prim: primitive element to generate polynomial roots * @nroots: RS code generator polynomial degree (number of roots) - * @gfp: GFP_ flags for allocations + * @gfp: Memory allocation flags. */ struct rs_control *init_rs_gfp(int symsize, int gfpoly, int fcr, int prim, int nroots, gfp_t gfp) diff --git a/lib/refcount.c b/lib/refcount.c index 0eb48353abe3..ebcf8cd49e05 100644 --- a/lib/refcount.c +++ b/lib/refcount.c @@ -35,13 +35,13 @@ * */ +#include <linux/mutex.h> #include <linux/refcount.h> +#include <linux/spinlock.h> #include <linux/bug.h> -#ifdef CONFIG_REFCOUNT_FULL - /** - * refcount_add_not_zero - add a value to a refcount unless it is 0 + * refcount_add_not_zero_checked - add a value to a refcount unless it is 0 * @i: the value to add to the refcount * @r: the refcount * @@ -58,7 +58,7 @@ * * Return: false if the passed refcount is 0, true otherwise */ -bool refcount_add_not_zero(unsigned int i, refcount_t *r) +bool refcount_add_not_zero_checked(unsigned int i, refcount_t *r) { unsigned int new, val = atomic_read(&r->refs); @@ -79,10 +79,10 @@ bool refcount_add_not_zero(unsigned int i, refcount_t *r) return true; } -EXPORT_SYMBOL(refcount_add_not_zero); +EXPORT_SYMBOL(refcount_add_not_zero_checked); /** - * refcount_add - add a value to a refcount + * refcount_add_checked - add a value to a refcount * @i: the value to add to the refcount * @r: the refcount * @@ -97,14 +97,14 @@ EXPORT_SYMBOL(refcount_add_not_zero); * cases, refcount_inc(), or one of its variants, should instead be used to * increment a reference count. */ -void refcount_add(unsigned int i, refcount_t *r) +void refcount_add_checked(unsigned int i, refcount_t *r) { - WARN_ONCE(!refcount_add_not_zero(i, r), "refcount_t: addition on 0; use-after-free.\n"); + WARN_ONCE(!refcount_add_not_zero_checked(i, r), "refcount_t: addition on 0; use-after-free.\n"); } -EXPORT_SYMBOL(refcount_add); +EXPORT_SYMBOL(refcount_add_checked); /** - * refcount_inc_not_zero - increment a refcount unless it is 0 + * refcount_inc_not_zero_checked - increment a refcount unless it is 0 * @r: the refcount to increment * * Similar to atomic_inc_not_zero(), but will saturate at UINT_MAX and WARN. @@ -115,7 +115,7 @@ EXPORT_SYMBOL(refcount_add); * * Return: true if the increment was successful, false otherwise */ -bool refcount_inc_not_zero(refcount_t *r) +bool refcount_inc_not_zero_checked(refcount_t *r) { unsigned int new, val = atomic_read(&r->refs); @@ -134,10 +134,10 @@ bool refcount_inc_not_zero(refcount_t *r) return true; } -EXPORT_SYMBOL(refcount_inc_not_zero); +EXPORT_SYMBOL(refcount_inc_not_zero_checked); /** - * refcount_inc - increment a refcount + * refcount_inc_checked - increment a refcount * @r: the refcount to increment * * Similar to atomic_inc(), but will saturate at UINT_MAX and WARN. @@ -148,14 +148,14 @@ EXPORT_SYMBOL(refcount_inc_not_zero); * Will WARN if the refcount is 0, as this represents a possible use-after-free * condition. */ -void refcount_inc(refcount_t *r) +void refcount_inc_checked(refcount_t *r) { - WARN_ONCE(!refcount_inc_not_zero(r), "refcount_t: increment on 0; use-after-free.\n"); + WARN_ONCE(!refcount_inc_not_zero_checked(r), "refcount_t: increment on 0; use-after-free.\n"); } -EXPORT_SYMBOL(refcount_inc); +EXPORT_SYMBOL(refcount_inc_checked); /** - * refcount_sub_and_test - subtract from a refcount and test if it is 0 + * refcount_sub_and_test_checked - subtract from a refcount and test if it is 0 * @i: amount to subtract from the refcount * @r: the refcount * @@ -174,7 +174,7 @@ EXPORT_SYMBOL(refcount_inc); * * Return: true if the resulting refcount is 0, false otherwise */ -bool refcount_sub_and_test(unsigned int i, refcount_t *r) +bool refcount_sub_and_test_checked(unsigned int i, refcount_t *r) { unsigned int new, val = atomic_read(&r->refs); @@ -192,10 +192,10 @@ bool refcount_sub_and_test(unsigned int i, refcount_t *r) return !new; } -EXPORT_SYMBOL(refcount_sub_and_test); +EXPORT_SYMBOL(refcount_sub_and_test_checked); /** - * refcount_dec_and_test - decrement a refcount and test if it is 0 + * refcount_dec_and_test_checked - decrement a refcount and test if it is 0 * @r: the refcount * * Similar to atomic_dec_and_test(), it will WARN on underflow and fail to @@ -207,14 +207,14 @@ EXPORT_SYMBOL(refcount_sub_and_test); * * Return: true if the resulting refcount is 0, false otherwise */ -bool refcount_dec_and_test(refcount_t *r) +bool refcount_dec_and_test_checked(refcount_t *r) { - return refcount_sub_and_test(1, r); + return refcount_sub_and_test_checked(1, r); } -EXPORT_SYMBOL(refcount_dec_and_test); +EXPORT_SYMBOL(refcount_dec_and_test_checked); /** - * refcount_dec - decrement a refcount + * refcount_dec_checked - decrement a refcount * @r: the refcount * * Similar to atomic_dec(), it will WARN on underflow and fail to decrement @@ -223,12 +223,11 @@ EXPORT_SYMBOL(refcount_dec_and_test); * Provides release memory ordering, such that prior loads and stores are done * before. */ -void refcount_dec(refcount_t *r) +void refcount_dec_checked(refcount_t *r) { - WARN_ONCE(refcount_dec_and_test(r), "refcount_t: decrement hit 0; leaking memory.\n"); + WARN_ONCE(refcount_dec_and_test_checked(r), "refcount_t: decrement hit 0; leaking memory.\n"); } -EXPORT_SYMBOL(refcount_dec); -#endif /* CONFIG_REFCOUNT_FULL */ +EXPORT_SYMBOL(refcount_dec_checked); /** * refcount_dec_if_one - decrement a refcount if it is 1 @@ -350,3 +349,31 @@ bool refcount_dec_and_lock(refcount_t *r, spinlock_t *lock) } EXPORT_SYMBOL(refcount_dec_and_lock); +/** + * refcount_dec_and_lock_irqsave - return holding spinlock with disabled + * interrupts if able to decrement refcount to 0 + * @r: the refcount + * @lock: the spinlock to be locked + * @flags: saved IRQ-flags if the is acquired + * + * Same as refcount_dec_and_lock() above except that the spinlock is acquired + * with disabled interupts. + * + * Return: true and hold spinlock if able to decrement refcount to 0, false + * otherwise + */ +bool refcount_dec_and_lock_irqsave(refcount_t *r, spinlock_t *lock, + unsigned long *flags) +{ + if (refcount_dec_not_one(r)) + return false; + + spin_lock_irqsave(lock, *flags); + if (!refcount_dec_and_test(r)) { + spin_unlock_irqrestore(lock, *flags); + return false; + } + + return true; +} +EXPORT_SYMBOL(refcount_dec_and_lock_irqsave); diff --git a/lib/rhashtable.c b/lib/rhashtable.c index 9427b5766134..ae4223e0f5bc 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -28,6 +28,7 @@ #include <linux/rhashtable.h> #include <linux/err.h> #include <linux/export.h> +#include <linux/rhashtable.h> #define HASH_DEFAULT_SIZE 64UL #define HASH_MIN_SIZE 4U @@ -115,8 +116,7 @@ static void bucket_table_free_rcu(struct rcu_head *head) static union nested_table *nested_table_alloc(struct rhashtable *ht, union nested_table __rcu **prev, - unsigned int shifted, - unsigned int nhash) + bool leaf) { union nested_table *ntbl; int i; @@ -127,10 +127,9 @@ static union nested_table *nested_table_alloc(struct rhashtable *ht, ntbl = kzalloc(PAGE_SIZE, GFP_ATOMIC); - if (ntbl && shifted) { - for (i = 0; i < PAGE_SIZE / sizeof(ntbl[0].bucket); i++) - INIT_RHT_NULLS_HEAD(ntbl[i].bucket, ht, - (i << shifted) | nhash); + if (ntbl && leaf) { + for (i = 0; i < PAGE_SIZE / sizeof(ntbl[0]); i++) + INIT_RHT_NULLS_HEAD(ntbl[i].bucket); } rcu_assign_pointer(*prev, ntbl); @@ -156,7 +155,7 @@ static struct bucket_table *nested_bucket_table_alloc(struct rhashtable *ht, return NULL; if (!nested_table_alloc(ht, (union nested_table __rcu **)tbl->buckets, - 0, 0)) { + false)) { kfree(tbl); return NULL; } @@ -206,7 +205,7 @@ static struct bucket_table *bucket_table_alloc(struct rhashtable *ht, tbl->hash_rnd = get_random_u32(); for (i = 0; i < nbuckets; i++) - INIT_RHT_NULLS_HEAD(tbl->buckets[i], ht, i); + INIT_RHT_NULLS_HEAD(tbl->buckets[i]); return tbl; } @@ -227,8 +226,7 @@ static struct bucket_table *rhashtable_last_table(struct rhashtable *ht, static int rhashtable_rehash_one(struct rhashtable *ht, unsigned int old_hash) { struct bucket_table *old_tbl = rht_dereference(ht->tbl, ht); - struct bucket_table *new_tbl = rhashtable_last_table(ht, - rht_dereference_rcu(old_tbl->future_tbl, ht)); + struct bucket_table *new_tbl = rhashtable_last_table(ht, old_tbl); struct rhash_head __rcu **pprev = rht_bucket_var(old_tbl, old_hash); int err = -EAGAIN; struct rhash_head *head, *next, *entry; @@ -298,21 +296,14 @@ static int rhashtable_rehash_attach(struct rhashtable *ht, struct bucket_table *old_tbl, struct bucket_table *new_tbl) { - /* Protect future_tbl using the first bucket lock. */ - spin_lock_bh(old_tbl->locks); - - /* Did somebody beat us to it? */ - if (rcu_access_pointer(old_tbl->future_tbl)) { - spin_unlock_bh(old_tbl->locks); - return -EEXIST; - } - /* Make insertions go into the new, empty table right away. Deletions * and lookups will be attempted in both tables until we synchronize. + * As cmpxchg() provides strong barriers, we do not need + * rcu_assign_pointer(). */ - rcu_assign_pointer(old_tbl->future_tbl, new_tbl); - spin_unlock_bh(old_tbl->locks); + if (cmpxchg(&old_tbl->future_tbl, NULL, new_tbl) != NULL) + return -EEXIST; return 0; } @@ -475,7 +466,7 @@ static int rhashtable_insert_rehash(struct rhashtable *ht, fail: /* Do not fail the insert if someone else did a rehash. */ - if (likely(rcu_dereference_raw(tbl->future_tbl))) + if (likely(rcu_access_pointer(tbl->future_tbl))) return 0; /* Schedule async rehash to retry allocation in process context. */ @@ -548,7 +539,7 @@ static struct bucket_table *rhashtable_insert_one(struct rhashtable *ht, if (PTR_ERR(data) != -EAGAIN && PTR_ERR(data) != -ENOENT) return ERR_CAST(data); - new_tbl = rcu_dereference(tbl->future_tbl); + new_tbl = rht_dereference_rcu(tbl->future_tbl, ht); if (new_tbl) return new_tbl; @@ -607,7 +598,7 @@ static void *rhashtable_try_insert(struct rhashtable *ht, const void *key, break; spin_unlock_bh(lock); - tbl = rcu_dereference(tbl->future_tbl); + tbl = rht_dereference_rcu(tbl->future_tbl, ht); } data = rhashtable_lookup_one(ht, tbl, hash, key, obj); @@ -774,7 +765,7 @@ int rhashtable_walk_start_check(struct rhashtable_iter *iter) skip++; if (list == iter->list) { iter->p = p; - skip = skip; + iter->skip = skip; goto found; } } @@ -964,8 +955,16 @@ EXPORT_SYMBOL_GPL(rhashtable_walk_stop); static size_t rounded_hashtable_size(const struct rhashtable_params *params) { - return max(roundup_pow_of_two(params->nelem_hint * 4 / 3), - (unsigned long)params->min_size); + size_t retsize; + + if (params->nelem_hint) + retsize = max(roundup_pow_of_two(params->nelem_hint * 4 / 3), + (unsigned long)params->min_size); + else + retsize = max(HASH_DEFAULT_SIZE, + (unsigned long)params->min_size); + + return retsize; } static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed) @@ -994,7 +993,6 @@ static u32 rhashtable_jhash2(const void *key, u32 length, u32 seed) * .key_offset = offsetof(struct test_obj, key), * .key_len = sizeof(int), * .hashfn = jhash, - * .nulls_base = (1U << RHT_BASE_SHIFT), * }; * * Configuration Example 2: Variable length keys @@ -1022,15 +1020,10 @@ int rhashtable_init(struct rhashtable *ht, struct bucket_table *tbl; size_t size; - size = HASH_DEFAULT_SIZE; - if ((!params->key_len && !params->obj_hashfn) || (params->obj_hashfn && !params->obj_cmpfn)) return -EINVAL; - if (params->nulls_base && params->nulls_base < (1U << RHT_BASE_SHIFT)) - return -EINVAL; - memset(ht, 0, sizeof(*ht)); mutex_init(&ht->mutex); spin_lock_init(&ht->lock); @@ -1050,8 +1043,7 @@ int rhashtable_init(struct rhashtable *ht, ht->p.min_size = max_t(u16, ht->p.min_size, HASH_MIN_SIZE); - if (params->nelem_hint) - size = rounded_hashtable_size(&ht->p); + size = rounded_hashtable_size(&ht->p); if (params->locks_mul) ht->p.locks_mul = roundup_pow_of_two(params->locks_mul); @@ -1095,10 +1087,6 @@ int rhltable_init(struct rhltable *hlt, const struct rhashtable_params *params) { int err; - /* No rhlist NULLs marking for now. */ - if (params->nulls_base) - return -EINVAL; - err = rhashtable_init(&hlt->ht, params); hlt->ht.rhlist = true; return err; @@ -1143,13 +1131,14 @@ void rhashtable_free_and_destroy(struct rhashtable *ht, void (*free_fn)(void *ptr, void *arg), void *arg) { - struct bucket_table *tbl; + struct bucket_table *tbl, *next_tbl; unsigned int i; cancel_work_sync(&ht->run_work); mutex_lock(&ht->mutex); tbl = rht_dereference(ht->tbl, ht); +restart: if (free_fn) { for (i = 0; i < tbl->size; i++) { struct rhash_head *pos, *next; @@ -1166,7 +1155,12 @@ void rhashtable_free_and_destroy(struct rhashtable *ht, } } + next_tbl = rht_dereference(tbl->future_tbl, ht); bucket_table_free(tbl); + if (next_tbl) { + tbl = next_tbl; + goto restart; + } mutex_unlock(&ht->mutex); } EXPORT_SYMBOL_GPL(rhashtable_free_and_destroy); @@ -1216,25 +1210,18 @@ struct rhash_head __rcu **rht_bucket_nested_insert(struct rhashtable *ht, unsigned int index = hash & ((1 << tbl->nest) - 1); unsigned int size = tbl->size >> tbl->nest; union nested_table *ntbl; - unsigned int shifted; - unsigned int nhash; ntbl = (union nested_table *)rcu_dereference_raw(tbl->buckets[0]); hash >>= tbl->nest; - nhash = index; - shifted = tbl->nest; ntbl = nested_table_alloc(ht, &ntbl[index].table, - size <= (1 << shift) ? shifted : 0, nhash); + size <= (1 << shift)); while (ntbl && size > (1 << shift)) { index = hash & ((1 << shift) - 1); size >>= shift; hash >>= shift; - nhash |= index << shifted; - shifted += shift; ntbl = nested_table_alloc(ht, &ntbl[index].table, - size <= (1 << shift) ? shifted : 0, - nhash); + size <= (1 << shift)); } if (!ntbl) diff --git a/lib/scatterlist.c b/lib/scatterlist.c index 1642fd507a96..7c6096a71704 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -24,9 +24,6 @@ **/ struct scatterlist *sg_next(struct scatterlist *sg) { -#ifdef CONFIG_DEBUG_SG - BUG_ON(sg->sg_magic != SG_MAGIC); -#endif if (sg_is_last(sg)) return NULL; @@ -111,10 +108,7 @@ struct scatterlist *sg_last(struct scatterlist *sgl, unsigned int nents) for_each_sg(sgl, sg, nents, i) ret = sg; -#ifdef CONFIG_DEBUG_SG - BUG_ON(sgl[0].sg_magic != SG_MAGIC); BUG_ON(!sg_is_last(ret)); -#endif return ret; } EXPORT_SYMBOL(sg_last); diff --git a/lib/swiotlb.c b/lib/swiotlb.c deleted file mode 100644 index 04b68d9dffac..000000000000 --- a/lib/swiotlb.c +++ /dev/null @@ -1,1087 +0,0 @@ -/* - * Dynamic DMA mapping support. - * - * This implementation is a fallback for platforms that do not support - * I/O TLBs (aka DMA address translation hardware). - * Copyright (C) 2000 Asit Mallick <Asit.K.Mallick@intel.com> - * Copyright (C) 2000 Goutham Rao <goutham.rao@intel.com> - * Copyright (C) 2000, 2003 Hewlett-Packard Co - * David Mosberger-Tang <davidm@hpl.hp.com> - * - * 03/05/07 davidm Switch from PCI-DMA to generic device DMA API. - * 00/12/13 davidm Rename to swiotlb.c and add mark_clean() to avoid - * unnecessary i-cache flushing. - * 04/07/.. ak Better overflow handling. Assorted fixes. - * 05/09/10 linville Add support for syncing ranges, support syncing for - * DMA_BIDIRECTIONAL mappings, miscellaneous cleanup. - * 08/12/11 beckyb Add highmem support - */ - -#include <linux/cache.h> -#include <linux/dma-direct.h> -#include <linux/mm.h> -#include <linux/export.h> -#include <linux/spinlock.h> -#include <linux/string.h> -#include <linux/swiotlb.h> -#include <linux/pfn.h> -#include <linux/types.h> -#include <linux/ctype.h> -#include <linux/highmem.h> -#include <linux/gfp.h> -#include <linux/scatterlist.h> -#include <linux/mem_encrypt.h> -#include <linux/set_memory.h> - -#include <asm/io.h> -#include <asm/dma.h> - -#include <linux/init.h> -#include <linux/bootmem.h> -#include <linux/iommu-helper.h> - -#define CREATE_TRACE_POINTS -#include <trace/events/swiotlb.h> - -#define OFFSET(val,align) ((unsigned long) \ - ( (val) & ( (align) - 1))) - -#define SLABS_PER_PAGE (1 << (PAGE_SHIFT - IO_TLB_SHIFT)) - -/* - * Minimum IO TLB size to bother booting with. Systems with mainly - * 64bit capable cards will only lightly use the swiotlb. If we can't - * allocate a contiguous 1MB, we're probably in trouble anyway. - */ -#define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT) - -enum swiotlb_force swiotlb_force; - -/* - * Used to do a quick range check in swiotlb_tbl_unmap_single and - * swiotlb_tbl_sync_single_*, to see if the memory was in fact allocated by this - * API. - */ -static phys_addr_t io_tlb_start, io_tlb_end; - -/* - * The number of IO TLB blocks (in groups of 64) between io_tlb_start and - * io_tlb_end. This is command line adjustable via setup_io_tlb_npages. - */ -static unsigned long io_tlb_nslabs; - -/* - * When the IOMMU overflows we return a fallback buffer. This sets the size. - */ -static unsigned long io_tlb_overflow = 32*1024; - -static phys_addr_t io_tlb_overflow_buffer; - -/* - * This is a free list describing the number of free entries available from - * each index - */ -static unsigned int *io_tlb_list; -static unsigned int io_tlb_index; - -/* - * Max segment that we can provide which (if pages are contingous) will - * not be bounced (unless SWIOTLB_FORCE is set). - */ -unsigned int max_segment; - -/* - * We need to save away the original address corresponding to a mapped entry - * for the sync operations. - */ -#define INVALID_PHYS_ADDR (~(phys_addr_t)0) -static phys_addr_t *io_tlb_orig_addr; - -/* - * Protect the above data structures in the map and unmap calls - */ -static DEFINE_SPINLOCK(io_tlb_lock); - -static int late_alloc; - -static int __init -setup_io_tlb_npages(char *str) -{ - if (isdigit(*str)) { - io_tlb_nslabs = simple_strtoul(str, &str, 0); - /* avoid tail segment of size < IO_TLB_SEGSIZE */ - io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); - } - if (*str == ',') - ++str; - if (!strcmp(str, "force")) { - swiotlb_force = SWIOTLB_FORCE; - } else if (!strcmp(str, "noforce")) { - swiotlb_force = SWIOTLB_NO_FORCE; - io_tlb_nslabs = 1; - } - - return 0; -} -early_param("swiotlb", setup_io_tlb_npages); -/* make io_tlb_overflow tunable too? */ - -unsigned long swiotlb_nr_tbl(void) -{ - return io_tlb_nslabs; -} -EXPORT_SYMBOL_GPL(swiotlb_nr_tbl); - -unsigned int swiotlb_max_segment(void) -{ - return max_segment; -} -EXPORT_SYMBOL_GPL(swiotlb_max_segment); - -void swiotlb_set_max_segment(unsigned int val) -{ - if (swiotlb_force == SWIOTLB_FORCE) - max_segment = 1; - else - max_segment = rounddown(val, PAGE_SIZE); -} - -/* default to 64MB */ -#define IO_TLB_DEFAULT_SIZE (64UL<<20) -unsigned long swiotlb_size_or_default(void) -{ - unsigned long size; - - size = io_tlb_nslabs << IO_TLB_SHIFT; - - return size ? size : (IO_TLB_DEFAULT_SIZE); -} - -static bool no_iotlb_memory; - -void swiotlb_print_info(void) -{ - unsigned long bytes = io_tlb_nslabs << IO_TLB_SHIFT; - unsigned char *vstart, *vend; - - if (no_iotlb_memory) { - pr_warn("software IO TLB: No low mem\n"); - return; - } - - vstart = phys_to_virt(io_tlb_start); - vend = phys_to_virt(io_tlb_end); - - printk(KERN_INFO "software IO TLB [mem %#010llx-%#010llx] (%luMB) mapped at [%p-%p]\n", - (unsigned long long)io_tlb_start, - (unsigned long long)io_tlb_end, - bytes >> 20, vstart, vend - 1); -} - -/* - * Early SWIOTLB allocation may be too early to allow an architecture to - * perform the desired operations. This function allows the architecture to - * call SWIOTLB when the operations are possible. It needs to be called - * before the SWIOTLB memory is used. - */ -void __init swiotlb_update_mem_attributes(void) -{ - void *vaddr; - unsigned long bytes; - - if (no_iotlb_memory || late_alloc) - return; - - vaddr = phys_to_virt(io_tlb_start); - bytes = PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT); - set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT); - memset(vaddr, 0, bytes); - - vaddr = phys_to_virt(io_tlb_overflow_buffer); - bytes = PAGE_ALIGN(io_tlb_overflow); - set_memory_decrypted((unsigned long)vaddr, bytes >> PAGE_SHIFT); - memset(vaddr, 0, bytes); -} - -int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) -{ - void *v_overflow_buffer; - unsigned long i, bytes; - - bytes = nslabs << IO_TLB_SHIFT; - - io_tlb_nslabs = nslabs; - io_tlb_start = __pa(tlb); - io_tlb_end = io_tlb_start + bytes; - - /* - * Get the overflow emergency buffer - */ - v_overflow_buffer = memblock_virt_alloc_low_nopanic( - PAGE_ALIGN(io_tlb_overflow), - PAGE_SIZE); - if (!v_overflow_buffer) - return -ENOMEM; - - io_tlb_overflow_buffer = __pa(v_overflow_buffer); - - /* - * Allocate and initialize the free list array. This array is used - * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE - * between io_tlb_start and io_tlb_end. - */ - io_tlb_list = memblock_virt_alloc( - PAGE_ALIGN(io_tlb_nslabs * sizeof(int)), - PAGE_SIZE); - io_tlb_orig_addr = memblock_virt_alloc( - PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)), - PAGE_SIZE); - for (i = 0; i < io_tlb_nslabs; i++) { - io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); - io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; - } - io_tlb_index = 0; - - if (verbose) - swiotlb_print_info(); - - swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT); - return 0; -} - -/* - * Statically reserve bounce buffer space and initialize bounce buffer data - * structures for the software IO TLB used to implement the DMA API. - */ -void __init -swiotlb_init(int verbose) -{ - size_t default_size = IO_TLB_DEFAULT_SIZE; - unsigned char *vstart; - unsigned long bytes; - - if (!io_tlb_nslabs) { - io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); - io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); - } - - bytes = io_tlb_nslabs << IO_TLB_SHIFT; - - /* Get IO TLB memory from the low pages */ - vstart = memblock_virt_alloc_low_nopanic(PAGE_ALIGN(bytes), PAGE_SIZE); - if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose)) - return; - - if (io_tlb_start) - memblock_free_early(io_tlb_start, - PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); - pr_warn("Cannot allocate SWIOTLB buffer"); - no_iotlb_memory = true; -} - -/* - * Systems with larger DMA zones (those that don't support ISA) can - * initialize the swiotlb later using the slab allocator if needed. - * This should be just like above, but with some error catching. - */ -int -swiotlb_late_init_with_default_size(size_t default_size) -{ - unsigned long bytes, req_nslabs = io_tlb_nslabs; - unsigned char *vstart = NULL; - unsigned int order; - int rc = 0; - - if (!io_tlb_nslabs) { - io_tlb_nslabs = (default_size >> IO_TLB_SHIFT); - io_tlb_nslabs = ALIGN(io_tlb_nslabs, IO_TLB_SEGSIZE); - } - - /* - * Get IO TLB memory from the low pages - */ - order = get_order(io_tlb_nslabs << IO_TLB_SHIFT); - io_tlb_nslabs = SLABS_PER_PAGE << order; - bytes = io_tlb_nslabs << IO_TLB_SHIFT; - - while ((SLABS_PER_PAGE << order) > IO_TLB_MIN_SLABS) { - vstart = (void *)__get_free_pages(GFP_DMA | __GFP_NOWARN, - order); - if (vstart) - break; - order--; - } - - if (!vstart) { - io_tlb_nslabs = req_nslabs; - return -ENOMEM; - } - if (order != get_order(bytes)) { - printk(KERN_WARNING "Warning: only able to allocate %ld MB " - "for software IO TLB\n", (PAGE_SIZE << order) >> 20); - io_tlb_nslabs = SLABS_PER_PAGE << order; - } - rc = swiotlb_late_init_with_tbl(vstart, io_tlb_nslabs); - if (rc) - free_pages((unsigned long)vstart, order); - - return rc; -} - -int -swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) -{ - unsigned long i, bytes; - unsigned char *v_overflow_buffer; - - bytes = nslabs << IO_TLB_SHIFT; - - io_tlb_nslabs = nslabs; - io_tlb_start = virt_to_phys(tlb); - io_tlb_end = io_tlb_start + bytes; - - set_memory_decrypted((unsigned long)tlb, bytes >> PAGE_SHIFT); - memset(tlb, 0, bytes); - - /* - * Get the overflow emergency buffer - */ - v_overflow_buffer = (void *)__get_free_pages(GFP_DMA, - get_order(io_tlb_overflow)); - if (!v_overflow_buffer) - goto cleanup2; - - set_memory_decrypted((unsigned long)v_overflow_buffer, - io_tlb_overflow >> PAGE_SHIFT); - memset(v_overflow_buffer, 0, io_tlb_overflow); - io_tlb_overflow_buffer = virt_to_phys(v_overflow_buffer); - - /* - * Allocate and initialize the free list array. This array is used - * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE - * between io_tlb_start and io_tlb_end. - */ - io_tlb_list = (unsigned int *)__get_free_pages(GFP_KERNEL, - get_order(io_tlb_nslabs * sizeof(int))); - if (!io_tlb_list) - goto cleanup3; - - io_tlb_orig_addr = (phys_addr_t *) - __get_free_pages(GFP_KERNEL, - get_order(io_tlb_nslabs * - sizeof(phys_addr_t))); - if (!io_tlb_orig_addr) - goto cleanup4; - - for (i = 0; i < io_tlb_nslabs; i++) { - io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); - io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; - } - io_tlb_index = 0; - - swiotlb_print_info(); - - late_alloc = 1; - - swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT); - - return 0; - -cleanup4: - free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs * - sizeof(int))); - io_tlb_list = NULL; -cleanup3: - free_pages((unsigned long)v_overflow_buffer, - get_order(io_tlb_overflow)); - io_tlb_overflow_buffer = 0; -cleanup2: - io_tlb_end = 0; - io_tlb_start = 0; - io_tlb_nslabs = 0; - max_segment = 0; - return -ENOMEM; -} - -void __init swiotlb_exit(void) -{ - if (!io_tlb_orig_addr) - return; - - if (late_alloc) { - free_pages((unsigned long)phys_to_virt(io_tlb_overflow_buffer), - get_order(io_tlb_overflow)); - free_pages((unsigned long)io_tlb_orig_addr, - get_order(io_tlb_nslabs * sizeof(phys_addr_t))); - free_pages((unsigned long)io_tlb_list, get_order(io_tlb_nslabs * - sizeof(int))); - free_pages((unsigned long)phys_to_virt(io_tlb_start), - get_order(io_tlb_nslabs << IO_TLB_SHIFT)); - } else { - memblock_free_late(io_tlb_overflow_buffer, - PAGE_ALIGN(io_tlb_overflow)); - memblock_free_late(__pa(io_tlb_orig_addr), - PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t))); - memblock_free_late(__pa(io_tlb_list), - PAGE_ALIGN(io_tlb_nslabs * sizeof(int))); - memblock_free_late(io_tlb_start, - PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); - } - io_tlb_nslabs = 0; - max_segment = 0; -} - -int is_swiotlb_buffer(phys_addr_t paddr) -{ - return paddr >= io_tlb_start && paddr < io_tlb_end; -} - -/* - * Bounce: copy the swiotlb buffer back to the original dma location - */ -static void swiotlb_bounce(phys_addr_t orig_addr, phys_addr_t tlb_addr, - size_t size, enum dma_data_direction dir) -{ - unsigned long pfn = PFN_DOWN(orig_addr); - unsigned char *vaddr = phys_to_virt(tlb_addr); - - if (PageHighMem(pfn_to_page(pfn))) { - /* The buffer does not have a mapping. Map it in and copy */ - unsigned int offset = orig_addr & ~PAGE_MASK; - char *buffer; - unsigned int sz = 0; - unsigned long flags; - - while (size) { - sz = min_t(size_t, PAGE_SIZE - offset, size); - - local_irq_save(flags); - buffer = kmap_atomic(pfn_to_page(pfn)); - if (dir == DMA_TO_DEVICE) - memcpy(vaddr, buffer + offset, sz); - else - memcpy(buffer + offset, vaddr, sz); - kunmap_atomic(buffer); - local_irq_restore(flags); - - size -= sz; - pfn++; - vaddr += sz; - offset = 0; - } - } else if (dir == DMA_TO_DEVICE) { - memcpy(vaddr, phys_to_virt(orig_addr), size); - } else { - memcpy(phys_to_virt(orig_addr), vaddr, size); - } -} - -phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, - dma_addr_t tbl_dma_addr, - phys_addr_t orig_addr, size_t size, - enum dma_data_direction dir, - unsigned long attrs) -{ - unsigned long flags; - phys_addr_t tlb_addr; - unsigned int nslots, stride, index, wrap; - int i; - unsigned long mask; - unsigned long offset_slots; - unsigned long max_slots; - - if (no_iotlb_memory) - panic("Can not allocate SWIOTLB buffer earlier and can't now provide you with the DMA bounce buffer"); - - if (mem_encrypt_active()) - pr_warn_once("%s is active and system is using DMA bounce buffers\n", - sme_active() ? "SME" : "SEV"); - - mask = dma_get_seg_boundary(hwdev); - - tbl_dma_addr &= mask; - - offset_slots = ALIGN(tbl_dma_addr, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; - - /* - * Carefully handle integer overflow which can occur when mask == ~0UL. - */ - max_slots = mask + 1 - ? ALIGN(mask + 1, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT - : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT); - - /* - * For mappings greater than or equal to a page, we limit the stride - * (and hence alignment) to a page size. - */ - nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; - if (size >= PAGE_SIZE) - stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT)); - else - stride = 1; - - BUG_ON(!nslots); - - /* - * Find suitable number of IO TLB entries size that will fit this - * request and allocate a buffer from that IO TLB pool. - */ - spin_lock_irqsave(&io_tlb_lock, flags); - index = ALIGN(io_tlb_index, stride); - if (index >= io_tlb_nslabs) - index = 0; - wrap = index; - - do { - while (iommu_is_span_boundary(index, nslots, offset_slots, - max_slots)) { - index += stride; - if (index >= io_tlb_nslabs) - index = 0; - if (index == wrap) - goto not_found; - } - - /* - * If we find a slot that indicates we have 'nslots' number of - * contiguous buffers, we allocate the buffers from that slot - * and mark the entries as '0' indicating unavailable. - */ - if (io_tlb_list[index] >= nslots) { - int count = 0; - - for (i = index; i < (int) (index + nslots); i++) - io_tlb_list[i] = 0; - for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE - 1) && io_tlb_list[i]; i--) - io_tlb_list[i] = ++count; - tlb_addr = io_tlb_start + (index << IO_TLB_SHIFT); - - /* - * Update the indices to avoid searching in the next - * round. - */ - io_tlb_index = ((index + nslots) < io_tlb_nslabs - ? (index + nslots) : 0); - - goto found; - } - index += stride; - if (index >= io_tlb_nslabs) - index = 0; - } while (index != wrap); - -not_found: - spin_unlock_irqrestore(&io_tlb_lock, flags); - if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) - dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes)\n", size); - return SWIOTLB_MAP_ERROR; -found: - spin_unlock_irqrestore(&io_tlb_lock, flags); - - /* - * Save away the mapping from the original address to the DMA address. - * This is needed when we sync the memory. Then we sync the buffer if - * needed. - */ - for (i = 0; i < nslots; i++) - io_tlb_orig_addr[index+i] = orig_addr + (i << IO_TLB_SHIFT); - if (!(attrs & DMA_ATTR_SKIP_CPU_SYNC) && - (dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) - swiotlb_bounce(orig_addr, tlb_addr, size, DMA_TO_DEVICE); - - return tlb_addr; -} - -/* - * Allocates bounce buffer and returns its physical address. - */ -static phys_addr_t -map_single(struct device *hwdev, phys_addr_t phys, size_t size, - enum dma_data_direction dir, unsigned long attrs) -{ - dma_addr_t start_dma_addr; - - if (swiotlb_force == SWIOTLB_NO_FORCE) { - dev_warn_ratelimited(hwdev, "Cannot do DMA to address %pa\n", - &phys); - return SWIOTLB_MAP_ERROR; - } - - start_dma_addr = __phys_to_dma(hwdev, io_tlb_start); - return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size, - dir, attrs); -} - -/* - * tlb_addr is the physical address of the bounce buffer to unmap. - */ -void swiotlb_tbl_unmap_single(struct device *hwdev, phys_addr_t tlb_addr, - size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - unsigned long flags; - int i, count, nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; - int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT; - phys_addr_t orig_addr = io_tlb_orig_addr[index]; - - /* - * First, sync the memory before unmapping the entry - */ - if (orig_addr != INVALID_PHYS_ADDR && - !(attrs & DMA_ATTR_SKIP_CPU_SYNC) && - ((dir == DMA_FROM_DEVICE) || (dir == DMA_BIDIRECTIONAL))) - swiotlb_bounce(orig_addr, tlb_addr, size, DMA_FROM_DEVICE); - - /* - * Return the buffer to the free list by setting the corresponding - * entries to indicate the number of contiguous entries available. - * While returning the entries to the free list, we merge the entries - * with slots below and above the pool being returned. - */ - spin_lock_irqsave(&io_tlb_lock, flags); - { - count = ((index + nslots) < ALIGN(index + 1, IO_TLB_SEGSIZE) ? - io_tlb_list[index + nslots] : 0); - /* - * Step 1: return the slots to the free list, merging the - * slots with superceeding slots - */ - for (i = index + nslots - 1; i >= index; i--) { - io_tlb_list[i] = ++count; - io_tlb_orig_addr[i] = INVALID_PHYS_ADDR; - } - /* - * Step 2: merge the returned slots with the preceding slots, - * if available (non zero) - */ - for (i = index - 1; (OFFSET(i, IO_TLB_SEGSIZE) != IO_TLB_SEGSIZE -1) && io_tlb_list[i]; i--) - io_tlb_list[i] = ++count; - } - spin_unlock_irqrestore(&io_tlb_lock, flags); -} - -void swiotlb_tbl_sync_single(struct device *hwdev, phys_addr_t tlb_addr, - size_t size, enum dma_data_direction dir, - enum dma_sync_target target) -{ - int index = (tlb_addr - io_tlb_start) >> IO_TLB_SHIFT; - phys_addr_t orig_addr = io_tlb_orig_addr[index]; - - if (orig_addr == INVALID_PHYS_ADDR) - return; - orig_addr += (unsigned long)tlb_addr & ((1 << IO_TLB_SHIFT) - 1); - - switch (target) { - case SYNC_FOR_CPU: - if (likely(dir == DMA_FROM_DEVICE || dir == DMA_BIDIRECTIONAL)) - swiotlb_bounce(orig_addr, tlb_addr, - size, DMA_FROM_DEVICE); - else - BUG_ON(dir != DMA_TO_DEVICE); - break; - case SYNC_FOR_DEVICE: - if (likely(dir == DMA_TO_DEVICE || dir == DMA_BIDIRECTIONAL)) - swiotlb_bounce(orig_addr, tlb_addr, - size, DMA_TO_DEVICE); - else - BUG_ON(dir != DMA_FROM_DEVICE); - break; - default: - BUG(); - } -} - -static inline bool dma_coherent_ok(struct device *dev, dma_addr_t addr, - size_t size) -{ - u64 mask = DMA_BIT_MASK(32); - - if (dev && dev->coherent_dma_mask) - mask = dev->coherent_dma_mask; - return addr + size - 1 <= mask; -} - -static void * -swiotlb_alloc_buffer(struct device *dev, size_t size, dma_addr_t *dma_handle, - unsigned long attrs) -{ - phys_addr_t phys_addr; - - if (swiotlb_force == SWIOTLB_NO_FORCE) - goto out_warn; - - phys_addr = swiotlb_tbl_map_single(dev, - __phys_to_dma(dev, io_tlb_start), - 0, size, DMA_FROM_DEVICE, attrs); - if (phys_addr == SWIOTLB_MAP_ERROR) - goto out_warn; - - *dma_handle = __phys_to_dma(dev, phys_addr); - if (!dma_coherent_ok(dev, *dma_handle, size)) - goto out_unmap; - - memset(phys_to_virt(phys_addr), 0, size); - return phys_to_virt(phys_addr); - -out_unmap: - dev_warn(dev, "hwdev DMA mask = 0x%016Lx, dev_addr = 0x%016Lx\n", - (unsigned long long)dev->coherent_dma_mask, - (unsigned long long)*dma_handle); - - /* - * DMA_TO_DEVICE to avoid memcpy in unmap_single. - * DMA_ATTR_SKIP_CPU_SYNC is optional. - */ - swiotlb_tbl_unmap_single(dev, phys_addr, size, DMA_TO_DEVICE, - DMA_ATTR_SKIP_CPU_SYNC); -out_warn: - if (!(attrs & DMA_ATTR_NO_WARN) && printk_ratelimit()) { - dev_warn(dev, - "swiotlb: coherent allocation failed, size=%zu\n", - size); - dump_stack(); - } - return NULL; -} - -static bool swiotlb_free_buffer(struct device *dev, size_t size, - dma_addr_t dma_addr) -{ - phys_addr_t phys_addr = dma_to_phys(dev, dma_addr); - - WARN_ON_ONCE(irqs_disabled()); - - if (!is_swiotlb_buffer(phys_addr)) - return false; - - /* - * DMA_TO_DEVICE to avoid memcpy in swiotlb_tbl_unmap_single. - * DMA_ATTR_SKIP_CPU_SYNC is optional. - */ - swiotlb_tbl_unmap_single(dev, phys_addr, size, DMA_TO_DEVICE, - DMA_ATTR_SKIP_CPU_SYNC); - return true; -} - -static void -swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir, - int do_panic) -{ - if (swiotlb_force == SWIOTLB_NO_FORCE) - return; - - /* - * Ran out of IOMMU space for this operation. This is very bad. - * Unfortunately the drivers cannot handle this operation properly. - * unless they check for dma_mapping_error (most don't) - * When the mapping is small enough return a static buffer to limit - * the damage, or panic when the transfer is too big. - */ - dev_err_ratelimited(dev, "DMA: Out of SW-IOMMU space for %zu bytes\n", - size); - - if (size <= io_tlb_overflow || !do_panic) - return; - - if (dir == DMA_BIDIRECTIONAL) - panic("DMA: Random memory could be DMA accessed\n"); - if (dir == DMA_FROM_DEVICE) - panic("DMA: Random memory could be DMA written\n"); - if (dir == DMA_TO_DEVICE) - panic("DMA: Random memory could be DMA read\n"); -} - -/* - * Map a single buffer of the indicated size for DMA in streaming mode. The - * physical address to use is returned. - * - * Once the device is given the dma address, the device owns this memory until - * either swiotlb_unmap_page or swiotlb_dma_sync_single is performed. - */ -dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, - unsigned long offset, size_t size, - enum dma_data_direction dir, - unsigned long attrs) -{ - phys_addr_t map, phys = page_to_phys(page) + offset; - dma_addr_t dev_addr = phys_to_dma(dev, phys); - - BUG_ON(dir == DMA_NONE); - /* - * If the address happens to be in the device's DMA window, - * we can safely return the device addr and not worry about bounce - * buffering it. - */ - if (dma_capable(dev, dev_addr, size) && swiotlb_force != SWIOTLB_FORCE) - return dev_addr; - - trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force); - - /* Oh well, have to allocate and map a bounce buffer. */ - map = map_single(dev, phys, size, dir, attrs); - if (map == SWIOTLB_MAP_ERROR) { - swiotlb_full(dev, size, dir, 1); - return __phys_to_dma(dev, io_tlb_overflow_buffer); - } - - dev_addr = __phys_to_dma(dev, map); - - /* Ensure that the address returned is DMA'ble */ - if (dma_capable(dev, dev_addr, size)) - return dev_addr; - - attrs |= DMA_ATTR_SKIP_CPU_SYNC; - swiotlb_tbl_unmap_single(dev, map, size, dir, attrs); - - return __phys_to_dma(dev, io_tlb_overflow_buffer); -} - -/* - * Unmap a single streaming mode DMA translation. The dma_addr and size must - * match what was provided for in a previous swiotlb_map_page call. All - * other usages are undefined. - * - * After this call, reads by the cpu to the buffer are guaranteed to see - * whatever the device wrote there. - */ -static void unmap_single(struct device *hwdev, dma_addr_t dev_addr, - size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); - - BUG_ON(dir == DMA_NONE); - - if (is_swiotlb_buffer(paddr)) { - swiotlb_tbl_unmap_single(hwdev, paddr, size, dir, attrs); - return; - } - - if (dir != DMA_FROM_DEVICE) - return; - - /* - * phys_to_virt doesn't work with hihgmem page but we could - * call dma_mark_clean() with hihgmem page here. However, we - * are fine since dma_mark_clean() is null on POWERPC. We can - * make dma_mark_clean() take a physical address if necessary. - */ - dma_mark_clean(phys_to_virt(paddr), size); -} - -void swiotlb_unmap_page(struct device *hwdev, dma_addr_t dev_addr, - size_t size, enum dma_data_direction dir, - unsigned long attrs) -{ - unmap_single(hwdev, dev_addr, size, dir, attrs); -} - -/* - * Make physical memory consistent for a single streaming mode DMA translation - * after a transfer. - * - * If you perform a swiotlb_map_page() but wish to interrogate the buffer - * using the cpu, yet do not wish to teardown the dma mapping, you must - * call this function before doing so. At the next point you give the dma - * address back to the card, you must first perform a - * swiotlb_dma_sync_for_device, and then the device again owns the buffer - */ -static void -swiotlb_sync_single(struct device *hwdev, dma_addr_t dev_addr, - size_t size, enum dma_data_direction dir, - enum dma_sync_target target) -{ - phys_addr_t paddr = dma_to_phys(hwdev, dev_addr); - - BUG_ON(dir == DMA_NONE); - - if (is_swiotlb_buffer(paddr)) { - swiotlb_tbl_sync_single(hwdev, paddr, size, dir, target); - return; - } - - if (dir != DMA_FROM_DEVICE) - return; - - dma_mark_clean(phys_to_virt(paddr), size); -} - -void -swiotlb_sync_single_for_cpu(struct device *hwdev, dma_addr_t dev_addr, - size_t size, enum dma_data_direction dir) -{ - swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_CPU); -} - -void -swiotlb_sync_single_for_device(struct device *hwdev, dma_addr_t dev_addr, - size_t size, enum dma_data_direction dir) -{ - swiotlb_sync_single(hwdev, dev_addr, size, dir, SYNC_FOR_DEVICE); -} - -/* - * Map a set of buffers described by scatterlist in streaming mode for DMA. - * This is the scatter-gather version of the above swiotlb_map_page - * interface. Here the scatter gather list elements are each tagged with the - * appropriate dma address and length. They are obtained via - * sg_dma_{address,length}(SG). - * - * NOTE: An implementation may be able to use a smaller number of - * DMA address/length pairs than there are SG table elements. - * (for example via virtual mapping capabilities) - * The routine returns the number of addr/length pairs actually - * used, at most nents. - * - * Device ownership issues as mentioned above for swiotlb_map_page are the - * same here. - */ -int -swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, - enum dma_data_direction dir, unsigned long attrs) -{ - struct scatterlist *sg; - int i; - - BUG_ON(dir == DMA_NONE); - - for_each_sg(sgl, sg, nelems, i) { - phys_addr_t paddr = sg_phys(sg); - dma_addr_t dev_addr = phys_to_dma(hwdev, paddr); - - if (swiotlb_force == SWIOTLB_FORCE || - !dma_capable(hwdev, dev_addr, sg->length)) { - phys_addr_t map = map_single(hwdev, sg_phys(sg), - sg->length, dir, attrs); - if (map == SWIOTLB_MAP_ERROR) { - /* Don't panic here, we expect map_sg users - to do proper error handling. */ - swiotlb_full(hwdev, sg->length, dir, 0); - attrs |= DMA_ATTR_SKIP_CPU_SYNC; - swiotlb_unmap_sg_attrs(hwdev, sgl, i, dir, - attrs); - sg_dma_len(sgl) = 0; - return 0; - } - sg->dma_address = __phys_to_dma(hwdev, map); - } else - sg->dma_address = dev_addr; - sg_dma_len(sg) = sg->length; - } - return nelems; -} - -/* - * Unmap a set of streaming mode DMA translations. Again, cpu read rules - * concerning calls here are the same as for swiotlb_unmap_page() above. - */ -void -swiotlb_unmap_sg_attrs(struct device *hwdev, struct scatterlist *sgl, - int nelems, enum dma_data_direction dir, - unsigned long attrs) -{ - struct scatterlist *sg; - int i; - - BUG_ON(dir == DMA_NONE); - - for_each_sg(sgl, sg, nelems, i) - unmap_single(hwdev, sg->dma_address, sg_dma_len(sg), dir, - attrs); -} - -/* - * Make physical memory consistent for a set of streaming mode DMA translations - * after a transfer. - * - * The same as swiotlb_sync_single_* but for a scatter-gather list, same rules - * and usage. - */ -static void -swiotlb_sync_sg(struct device *hwdev, struct scatterlist *sgl, - int nelems, enum dma_data_direction dir, - enum dma_sync_target target) -{ - struct scatterlist *sg; - int i; - - for_each_sg(sgl, sg, nelems, i) - swiotlb_sync_single(hwdev, sg->dma_address, - sg_dma_len(sg), dir, target); -} - -void -swiotlb_sync_sg_for_cpu(struct device *hwdev, struct scatterlist *sg, - int nelems, enum dma_data_direction dir) -{ - swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_CPU); -} - -void -swiotlb_sync_sg_for_device(struct device *hwdev, struct scatterlist *sg, - int nelems, enum dma_data_direction dir) -{ - swiotlb_sync_sg(hwdev, sg, nelems, dir, SYNC_FOR_DEVICE); -} - -int -swiotlb_dma_mapping_error(struct device *hwdev, dma_addr_t dma_addr) -{ - return (dma_addr == __phys_to_dma(hwdev, io_tlb_overflow_buffer)); -} - -/* - * Return whether the given device DMA address mask can be supported - * properly. For example, if your device can only drive the low 24-bits - * during bus mastering, then you would pass 0x00ffffff as the mask to - * this function. - */ -int -swiotlb_dma_supported(struct device *hwdev, u64 mask) -{ - return __phys_to_dma(hwdev, io_tlb_end - 1) <= mask; -} - -void *swiotlb_alloc(struct device *dev, size_t size, dma_addr_t *dma_handle, - gfp_t gfp, unsigned long attrs) -{ - void *vaddr; - - /* temporary workaround: */ - if (gfp & __GFP_NOWARN) - attrs |= DMA_ATTR_NO_WARN; - - /* - * Don't print a warning when the first allocation attempt fails. - * swiotlb_alloc_coherent() will print a warning when the DMA memory - * allocation ultimately failed. - */ - gfp |= __GFP_NOWARN; - - vaddr = dma_direct_alloc(dev, size, dma_handle, gfp, attrs); - if (!vaddr) - vaddr = swiotlb_alloc_buffer(dev, size, dma_handle, attrs); - return vaddr; -} - -void swiotlb_free(struct device *dev, size_t size, void *vaddr, - dma_addr_t dma_addr, unsigned long attrs) -{ - if (!swiotlb_free_buffer(dev, size, dma_addr)) - dma_direct_free(dev, size, vaddr, dma_addr, attrs); -} - -const struct dma_map_ops swiotlb_dma_ops = { - .mapping_error = swiotlb_dma_mapping_error, - .alloc = swiotlb_alloc, - .free = swiotlb_free, - .sync_single_for_cpu = swiotlb_sync_single_for_cpu, - .sync_single_for_device = swiotlb_sync_single_for_device, - .sync_sg_for_cpu = swiotlb_sync_sg_for_cpu, - .sync_sg_for_device = swiotlb_sync_sg_for_device, - .map_sg = swiotlb_map_sg_attrs, - .unmap_sg = swiotlb_unmap_sg_attrs, - .map_page = swiotlb_map_page, - .unmap_page = swiotlb_unmap_page, - .dma_supported = dma_direct_supported, -}; diff --git a/lib/test_bitfield.c b/lib/test_bitfield.c new file mode 100644 index 000000000000..5b8f4108662d --- /dev/null +++ b/lib/test_bitfield.c @@ -0,0 +1,168 @@ +// SPDX-License-Identifier: GPL-2.0+ +/* + * Test cases for bitfield helpers. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/bitfield.h> + +#define CHECK_ENC_GET_U(tp, v, field, res) do { \ + { \ + u##tp _res; \ + \ + _res = u##tp##_encode_bits(v, field); \ + if (_res != res) { \ + pr_warn("u" #tp "_encode_bits(" #v ", " #field ") is 0x%llx != " #res "\n",\ + (u64)_res); \ + return -EINVAL; \ + } \ + if (u##tp##_get_bits(_res, field) != v) \ + return -EINVAL; \ + } \ + } while (0) + +#define CHECK_ENC_GET_LE(tp, v, field, res) do { \ + { \ + __le##tp _res; \ + \ + _res = le##tp##_encode_bits(v, field); \ + if (_res != cpu_to_le##tp(res)) { \ + pr_warn("le" #tp "_encode_bits(" #v ", " #field ") is 0x%llx != 0x%llx\n",\ + (u64)le##tp##_to_cpu(_res), \ + (u64)(res)); \ + return -EINVAL; \ + } \ + if (le##tp##_get_bits(_res, field) != v) \ + return -EINVAL; \ + } \ + } while (0) + +#define CHECK_ENC_GET_BE(tp, v, field, res) do { \ + { \ + __be##tp _res; \ + \ + _res = be##tp##_encode_bits(v, field); \ + if (_res != cpu_to_be##tp(res)) { \ + pr_warn("be" #tp "_encode_bits(" #v ", " #field ") is 0x%llx != 0x%llx\n",\ + (u64)be##tp##_to_cpu(_res), \ + (u64)(res)); \ + return -EINVAL; \ + } \ + if (be##tp##_get_bits(_res, field) != v) \ + return -EINVAL; \ + } \ + } while (0) + +#define CHECK_ENC_GET(tp, v, field, res) do { \ + CHECK_ENC_GET_U(tp, v, field, res); \ + CHECK_ENC_GET_LE(tp, v, field, res); \ + CHECK_ENC_GET_BE(tp, v, field, res); \ + } while (0) + +static int test_constants(void) +{ + /* + * NOTE + * This whole function compiles (or at least should, if everything + * is going according to plan) to nothing after optimisation. + */ + + CHECK_ENC_GET(16, 1, 0x000f, 0x0001); + CHECK_ENC_GET(16, 3, 0x00f0, 0x0030); + CHECK_ENC_GET(16, 5, 0x0f00, 0x0500); + CHECK_ENC_GET(16, 7, 0xf000, 0x7000); + CHECK_ENC_GET(16, 14, 0x000f, 0x000e); + CHECK_ENC_GET(16, 15, 0x00f0, 0x00f0); + + CHECK_ENC_GET_U(8, 1, 0x0f, 0x01); + CHECK_ENC_GET_U(8, 3, 0xf0, 0x30); + CHECK_ENC_GET_U(8, 14, 0x0f, 0x0e); + CHECK_ENC_GET_U(8, 15, 0xf0, 0xf0); + + CHECK_ENC_GET(32, 1, 0x00000f00, 0x00000100); + CHECK_ENC_GET(32, 3, 0x0000f000, 0x00003000); + CHECK_ENC_GET(32, 5, 0x000f0000, 0x00050000); + CHECK_ENC_GET(32, 7, 0x00f00000, 0x00700000); + CHECK_ENC_GET(32, 14, 0x0f000000, 0x0e000000); + CHECK_ENC_GET(32, 15, 0xf0000000, 0xf0000000); + + CHECK_ENC_GET(64, 1, 0x00000f0000000000ull, 0x0000010000000000ull); + CHECK_ENC_GET(64, 3, 0x0000f00000000000ull, 0x0000300000000000ull); + CHECK_ENC_GET(64, 5, 0x000f000000000000ull, 0x0005000000000000ull); + CHECK_ENC_GET(64, 7, 0x00f0000000000000ull, 0x0070000000000000ull); + CHECK_ENC_GET(64, 14, 0x0f00000000000000ull, 0x0e00000000000000ull); + CHECK_ENC_GET(64, 15, 0xf000000000000000ull, 0xf000000000000000ull); + + return 0; +} + +#define CHECK(tp, mask) do { \ + u64 v; \ + \ + for (v = 0; v < 1 << hweight32(mask); v++) \ + if (tp##_encode_bits(v, mask) != v << __ffs64(mask)) \ + return -EINVAL; \ + } while (0) + +static int test_variables(void) +{ + CHECK(u8, 0x0f); + CHECK(u8, 0xf0); + CHECK(u8, 0x38); + + CHECK(u16, 0x0038); + CHECK(u16, 0x0380); + CHECK(u16, 0x3800); + CHECK(u16, 0x8000); + + CHECK(u32, 0x80000000); + CHECK(u32, 0x7f000000); + CHECK(u32, 0x07e00000); + CHECK(u32, 0x00018000); + + CHECK(u64, 0x8000000000000000ull); + CHECK(u64, 0x7f00000000000000ull); + CHECK(u64, 0x0001800000000000ull); + CHECK(u64, 0x0000000080000000ull); + CHECK(u64, 0x000000007f000000ull); + CHECK(u64, 0x0000000018000000ull); + CHECK(u64, 0x0000001f8000000ull); + + return 0; +} + +static int __init test_bitfields(void) +{ + int ret = test_constants(); + + if (ret) { + pr_warn("constant tests failed!\n"); + return ret; + } + + ret = test_variables(); + if (ret) { + pr_warn("variable tests failed!\n"); + return ret; + } + +#ifdef TEST_BITFIELD_COMPILE + /* these should fail compilation */ + CHECK_ENC_GET(16, 16, 0x0f00, 0x1000); + u32_encode_bits(7, 0x06000000); + + /* this should at least give a warning */ + u16_encode_bits(0, 0x60000); +#endif + + pr_info("tests passed\n"); + + return 0; +} +module_init(test_bitfields) + +MODULE_AUTHOR("Johannes Berg <johannes@sipsolutions.net>"); +MODULE_LICENSE("GPL"); diff --git a/lib/test_bpf.c b/lib/test_bpf.c index 60aedc879361..08d3d59dca17 100644 --- a/lib/test_bpf.c +++ b/lib/test_bpf.c @@ -5282,21 +5282,31 @@ static struct bpf_test tests[] = { { /* Mainly checking JIT here. */ "BPF_MAXINSNS: Ctx heavy transformations", { }, +#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390) + CLASSIC | FLAG_EXPECTED_FAIL, +#else CLASSIC, +#endif { }, { { 1, !!(SKB_VLAN_TCI & VLAN_TAG_PRESENT) }, { 10, !!(SKB_VLAN_TCI & VLAN_TAG_PRESENT) } }, .fill_helper = bpf_fill_maxinsns6, + .expected_errcode = -ENOTSUPP, }, { /* Mainly checking JIT here. */ "BPF_MAXINSNS: Call heavy transformations", { }, +#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390) + CLASSIC | FLAG_NO_DATA | FLAG_EXPECTED_FAIL, +#else CLASSIC | FLAG_NO_DATA, +#endif { }, { { 1, 0 }, { 10, 0 } }, .fill_helper = bpf_fill_maxinsns7, + .expected_errcode = -ENOTSUPP, }, { /* Mainly checking JIT here. */ "BPF_MAXINSNS: Jump heavy test", @@ -5347,18 +5357,28 @@ static struct bpf_test tests[] = { { "BPF_MAXINSNS: exec all MSH", { }, +#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390) + CLASSIC | FLAG_EXPECTED_FAIL, +#else CLASSIC, +#endif { 0xfa, 0xfb, 0xfc, 0xfd, }, { { 4, 0xababab83 } }, .fill_helper = bpf_fill_maxinsns13, + .expected_errcode = -ENOTSUPP, }, { "BPF_MAXINSNS: ld_abs+get_processor_id", { }, +#if defined(CONFIG_BPF_JIT_ALWAYS_ON) && defined(CONFIG_S390) + CLASSIC | FLAG_EXPECTED_FAIL, +#else CLASSIC, +#endif { }, { { 1, 0xbee } }, .fill_helper = bpf_fill_ld_abs_get_processor_id, + .expected_errcode = -ENOTSUPP, }, /* * LD_IND / LD_ABS on fragmented SKBs diff --git a/lib/test_overflow.c b/lib/test_overflow.c index 2278fe05a1b0..fc680562d8b6 100644 --- a/lib/test_overflow.c +++ b/lib/test_overflow.c @@ -252,7 +252,8 @@ static int __init test_ ## t ## _overflow(void) { \ int err = 0; \ unsigned i; \ \ - pr_info("%-3s: %zu tests\n", #t, ARRAY_SIZE(t ## _tests)); \ + pr_info("%-3s: %zu arithmetic tests\n", #t, \ + ARRAY_SIZE(t ## _tests)); \ for (i = 0; i < ARRAY_SIZE(t ## _tests); ++i) \ err |= do_test_ ## t(&t ## _tests[i]); \ return err; \ @@ -287,6 +288,200 @@ static int __init test_overflow_calculation(void) return err; } +static int __init test_overflow_shift(void) +{ + int err = 0; + +/* Args are: value, shift, type, expected result, overflow expected */ +#define TEST_ONE_SHIFT(a, s, t, expect, of) ({ \ + int __failed = 0; \ + typeof(a) __a = (a); \ + typeof(s) __s = (s); \ + t __e = (expect); \ + t __d; \ + bool __of = check_shl_overflow(__a, __s, &__d); \ + if (__of != of) { \ + pr_warn("expected (%s)(%s << %s) to%s overflow\n", \ + #t, #a, #s, of ? "" : " not"); \ + __failed = 1; \ + } else if (!__of && __d != __e) { \ + pr_warn("expected (%s)(%s << %s) == %s\n", \ + #t, #a, #s, #expect); \ + if ((t)-1 < 0) \ + pr_warn("got %lld\n", (s64)__d); \ + else \ + pr_warn("got %llu\n", (u64)__d); \ + __failed = 1; \ + } \ + if (!__failed) \ + pr_info("ok: (%s)(%s << %s) == %s\n", #t, #a, #s, \ + of ? "overflow" : #expect); \ + __failed; \ +}) + + /* Sane shifts. */ + err |= TEST_ONE_SHIFT(1, 0, u8, 1 << 0, false); + err |= TEST_ONE_SHIFT(1, 4, u8, 1 << 4, false); + err |= TEST_ONE_SHIFT(1, 7, u8, 1 << 7, false); + err |= TEST_ONE_SHIFT(0xF, 4, u8, 0xF << 4, false); + err |= TEST_ONE_SHIFT(1, 0, u16, 1 << 0, false); + err |= TEST_ONE_SHIFT(1, 10, u16, 1 << 10, false); + err |= TEST_ONE_SHIFT(1, 15, u16, 1 << 15, false); + err |= TEST_ONE_SHIFT(0xFF, 8, u16, 0xFF << 8, false); + err |= TEST_ONE_SHIFT(1, 0, int, 1 << 0, false); + err |= TEST_ONE_SHIFT(1, 16, int, 1 << 16, false); + err |= TEST_ONE_SHIFT(1, 30, int, 1 << 30, false); + err |= TEST_ONE_SHIFT(1, 0, s32, 1 << 0, false); + err |= TEST_ONE_SHIFT(1, 16, s32, 1 << 16, false); + err |= TEST_ONE_SHIFT(1, 30, s32, 1 << 30, false); + err |= TEST_ONE_SHIFT(1, 0, unsigned int, 1U << 0, false); + err |= TEST_ONE_SHIFT(1, 20, unsigned int, 1U << 20, false); + err |= TEST_ONE_SHIFT(1, 31, unsigned int, 1U << 31, false); + err |= TEST_ONE_SHIFT(0xFFFFU, 16, unsigned int, 0xFFFFU << 16, false); + err |= TEST_ONE_SHIFT(1, 0, u32, 1U << 0, false); + err |= TEST_ONE_SHIFT(1, 20, u32, 1U << 20, false); + err |= TEST_ONE_SHIFT(1, 31, u32, 1U << 31, false); + err |= TEST_ONE_SHIFT(0xFFFFU, 16, u32, 0xFFFFU << 16, false); + err |= TEST_ONE_SHIFT(1, 0, u64, 1ULL << 0, false); + err |= TEST_ONE_SHIFT(1, 40, u64, 1ULL << 40, false); + err |= TEST_ONE_SHIFT(1, 63, u64, 1ULL << 63, false); + err |= TEST_ONE_SHIFT(0xFFFFFFFFULL, 32, u64, + 0xFFFFFFFFULL << 32, false); + + /* Sane shift: start and end with 0, without a too-wide shift. */ + err |= TEST_ONE_SHIFT(0, 7, u8, 0, false); + err |= TEST_ONE_SHIFT(0, 15, u16, 0, false); + err |= TEST_ONE_SHIFT(0, 31, unsigned int, 0, false); + err |= TEST_ONE_SHIFT(0, 31, u32, 0, false); + err |= TEST_ONE_SHIFT(0, 63, u64, 0, false); + + /* Sane shift: start and end with 0, without reaching signed bit. */ + err |= TEST_ONE_SHIFT(0, 6, s8, 0, false); + err |= TEST_ONE_SHIFT(0, 14, s16, 0, false); + err |= TEST_ONE_SHIFT(0, 30, int, 0, false); + err |= TEST_ONE_SHIFT(0, 30, s32, 0, false); + err |= TEST_ONE_SHIFT(0, 62, s64, 0, false); + + /* Overflow: shifted the bit off the end. */ + err |= TEST_ONE_SHIFT(1, 8, u8, 0, true); + err |= TEST_ONE_SHIFT(1, 16, u16, 0, true); + err |= TEST_ONE_SHIFT(1, 32, unsigned int, 0, true); + err |= TEST_ONE_SHIFT(1, 32, u32, 0, true); + err |= TEST_ONE_SHIFT(1, 64, u64, 0, true); + + /* Overflow: shifted into the signed bit. */ + err |= TEST_ONE_SHIFT(1, 7, s8, 0, true); + err |= TEST_ONE_SHIFT(1, 15, s16, 0, true); + err |= TEST_ONE_SHIFT(1, 31, int, 0, true); + err |= TEST_ONE_SHIFT(1, 31, s32, 0, true); + err |= TEST_ONE_SHIFT(1, 63, s64, 0, true); + + /* Overflow: high bit falls off unsigned types. */ + /* 10010110 */ + err |= TEST_ONE_SHIFT(150, 1, u8, 0, true); + /* 1000100010010110 */ + err |= TEST_ONE_SHIFT(34966, 1, u16, 0, true); + /* 10000100000010001000100010010110 */ + err |= TEST_ONE_SHIFT(2215151766U, 1, u32, 0, true); + err |= TEST_ONE_SHIFT(2215151766U, 1, unsigned int, 0, true); + /* 1000001000010000010000000100000010000100000010001000100010010110 */ + err |= TEST_ONE_SHIFT(9372061470395238550ULL, 1, u64, 0, true); + + /* Overflow: bit shifted into signed bit on signed types. */ + /* 01001011 */ + err |= TEST_ONE_SHIFT(75, 1, s8, 0, true); + /* 0100010001001011 */ + err |= TEST_ONE_SHIFT(17483, 1, s16, 0, true); + /* 01000010000001000100010001001011 */ + err |= TEST_ONE_SHIFT(1107575883, 1, s32, 0, true); + err |= TEST_ONE_SHIFT(1107575883, 1, int, 0, true); + /* 0100000100001000001000000010000001000010000001000100010001001011 */ + err |= TEST_ONE_SHIFT(4686030735197619275LL, 1, s64, 0, true); + + /* Overflow: bit shifted past signed bit on signed types. */ + /* 01001011 */ + err |= TEST_ONE_SHIFT(75, 2, s8, 0, true); + /* 0100010001001011 */ + err |= TEST_ONE_SHIFT(17483, 2, s16, 0, true); + /* 01000010000001000100010001001011 */ + err |= TEST_ONE_SHIFT(1107575883, 2, s32, 0, true); + err |= TEST_ONE_SHIFT(1107575883, 2, int, 0, true); + /* 0100000100001000001000000010000001000010000001000100010001001011 */ + err |= TEST_ONE_SHIFT(4686030735197619275LL, 2, s64, 0, true); + + /* Overflow: values larger than destination type. */ + err |= TEST_ONE_SHIFT(0x100, 0, u8, 0, true); + err |= TEST_ONE_SHIFT(0xFF, 0, s8, 0, true); + err |= TEST_ONE_SHIFT(0x10000U, 0, u16, 0, true); + err |= TEST_ONE_SHIFT(0xFFFFU, 0, s16, 0, true); + err |= TEST_ONE_SHIFT(0x100000000ULL, 0, u32, 0, true); + err |= TEST_ONE_SHIFT(0x100000000ULL, 0, unsigned int, 0, true); + err |= TEST_ONE_SHIFT(0xFFFFFFFFUL, 0, s32, 0, true); + err |= TEST_ONE_SHIFT(0xFFFFFFFFUL, 0, int, 0, true); + err |= TEST_ONE_SHIFT(0xFFFFFFFFFFFFFFFFULL, 0, s64, 0, true); + + /* Nonsense: negative initial value. */ + err |= TEST_ONE_SHIFT(-1, 0, s8, 0, true); + err |= TEST_ONE_SHIFT(-1, 0, u8, 0, true); + err |= TEST_ONE_SHIFT(-5, 0, s16, 0, true); + err |= TEST_ONE_SHIFT(-5, 0, u16, 0, true); + err |= TEST_ONE_SHIFT(-10, 0, int, 0, true); + err |= TEST_ONE_SHIFT(-10, 0, unsigned int, 0, true); + err |= TEST_ONE_SHIFT(-100, 0, s32, 0, true); + err |= TEST_ONE_SHIFT(-100, 0, u32, 0, true); + err |= TEST_ONE_SHIFT(-10000, 0, s64, 0, true); + err |= TEST_ONE_SHIFT(-10000, 0, u64, 0, true); + + /* Nonsense: negative shift values. */ + err |= TEST_ONE_SHIFT(0, -5, s8, 0, true); + err |= TEST_ONE_SHIFT(0, -5, u8, 0, true); + err |= TEST_ONE_SHIFT(0, -10, s16, 0, true); + err |= TEST_ONE_SHIFT(0, -10, u16, 0, true); + err |= TEST_ONE_SHIFT(0, -15, int, 0, true); + err |= TEST_ONE_SHIFT(0, -15, unsigned int, 0, true); + err |= TEST_ONE_SHIFT(0, -20, s32, 0, true); + err |= TEST_ONE_SHIFT(0, -20, u32, 0, true); + err |= TEST_ONE_SHIFT(0, -30, s64, 0, true); + err |= TEST_ONE_SHIFT(0, -30, u64, 0, true); + + /* Overflow: shifted at or beyond entire type's bit width. */ + err |= TEST_ONE_SHIFT(0, 8, u8, 0, true); + err |= TEST_ONE_SHIFT(0, 9, u8, 0, true); + err |= TEST_ONE_SHIFT(0, 8, s8, 0, true); + err |= TEST_ONE_SHIFT(0, 9, s8, 0, true); + err |= TEST_ONE_SHIFT(0, 16, u16, 0, true); + err |= TEST_ONE_SHIFT(0, 17, u16, 0, true); + err |= TEST_ONE_SHIFT(0, 16, s16, 0, true); + err |= TEST_ONE_SHIFT(0, 17, s16, 0, true); + err |= TEST_ONE_SHIFT(0, 32, u32, 0, true); + err |= TEST_ONE_SHIFT(0, 33, u32, 0, true); + err |= TEST_ONE_SHIFT(0, 32, int, 0, true); + err |= TEST_ONE_SHIFT(0, 33, int, 0, true); + err |= TEST_ONE_SHIFT(0, 32, s32, 0, true); + err |= TEST_ONE_SHIFT(0, 33, s32, 0, true); + err |= TEST_ONE_SHIFT(0, 64, u64, 0, true); + err |= TEST_ONE_SHIFT(0, 65, u64, 0, true); + err |= TEST_ONE_SHIFT(0, 64, s64, 0, true); + err |= TEST_ONE_SHIFT(0, 65, s64, 0, true); + + /* + * Corner case: for unsigned types, we fail when we've shifted + * through the entire width of bits. For signed types, we might + * want to match this behavior, but that would mean noticing if + * we shift through all but the signed bit, and this is not + * currently detected (but we'll notice an overflow into the + * signed bit). So, for now, we will test this condition but + * mark it as not expected to overflow. + */ + err |= TEST_ONE_SHIFT(0, 7, s8, 0, false); + err |= TEST_ONE_SHIFT(0, 15, s16, 0, false); + err |= TEST_ONE_SHIFT(0, 31, int, 0, false); + err |= TEST_ONE_SHIFT(0, 31, s32, 0, false); + err |= TEST_ONE_SHIFT(0, 63, s64, 0, false); + + return err; +} + /* * Deal with the various forms of allocator arguments. See comments above * the DEFINE_TEST_ALLOC() instances for mapping of the "bits". @@ -397,6 +592,7 @@ static int __init test_module_init(void) int err = 0; err |= test_overflow_calculation(); + err |= test_overflow_shift(); err |= test_overflow_allocation(); if (err) { diff --git a/lib/test_printf.c b/lib/test_printf.c index b2aa8f514844..53527ea822b5 100644 --- a/lib/test_printf.c +++ b/lib/test_printf.c @@ -206,6 +206,7 @@ test_string(void) #define PTR_WIDTH 16 #define PTR ((void *)0xffff0123456789abUL) #define PTR_STR "ffff0123456789ab" +#define PTR_VAL_NO_CRNG "(____ptrval____)" #define ZEROS "00000000" /* hex 32 zero bits */ static int __init @@ -216,7 +217,16 @@ plain_format(void) nchars = snprintf(buf, PLAIN_BUF_SIZE, "%p", PTR); - if (nchars != PTR_WIDTH || strncmp(buf, ZEROS, strlen(ZEROS)) != 0) + if (nchars != PTR_WIDTH) + return -1; + + if (strncmp(buf, PTR_VAL_NO_CRNG, PTR_WIDTH) == 0) { + pr_warn("crng possibly not yet initialized. plain 'p' buffer contains \"%s\"", + PTR_VAL_NO_CRNG); + return 0; + } + + if (strncmp(buf, ZEROS, strlen(ZEROS)) != 0) return -1; return 0; @@ -227,6 +237,7 @@ plain_format(void) #define PTR_WIDTH 8 #define PTR ((void *)0x456789ab) #define PTR_STR "456789ab" +#define PTR_VAL_NO_CRNG "(ptrval)" static int __init plain_format(void) @@ -245,7 +256,16 @@ plain_hash(void) nchars = snprintf(buf, PLAIN_BUF_SIZE, "%p", PTR); - if (nchars != PTR_WIDTH || strncmp(buf, PTR_STR, PTR_WIDTH) == 0) + if (nchars != PTR_WIDTH) + return -1; + + if (strncmp(buf, PTR_VAL_NO_CRNG, PTR_WIDTH) == 0) { + pr_warn("crng possibly not yet initialized. plain 'p' buffer contains \"%s\"", + PTR_VAL_NO_CRNG); + return 0; + } + + if (strncmp(buf, PTR_STR, PTR_WIDTH) == 0) return -1; return 0; @@ -260,13 +280,6 @@ plain(void) { int err; - /* - * Make sure crng is ready. Otherwise we get "(ptrval)" instead - * of a hashed address when printing '%p' in plain_hash() and - * plain_format(). - */ - wait_for_random_bytes(); - err = plain_hash(); if (err) { pr_warn("plain 'p' does not appear to be hashed\n"); diff --git a/lib/test_rhashtable.c b/lib/test_rhashtable.c index fb6968109113..82ac39ce5310 100644 --- a/lib/test_rhashtable.c +++ b/lib/test_rhashtable.c @@ -83,7 +83,7 @@ static u32 my_hashfn(const void *data, u32 len, u32 seed) { const struct test_obj_rhl *obj = data; - return (obj->value.id % 10) << RHT_HASH_RESERVED_SPACE; + return (obj->value.id % 10); } static int my_cmpfn(struct rhashtable_compare_arg *arg, const void *obj) @@ -99,7 +99,6 @@ static struct rhashtable_params test_rht_params = { .key_offset = offsetof(struct test_obj, value), .key_len = sizeof(struct test_obj_val), .hashfn = jhash, - .nulls_base = (3U << RHT_BASE_SHIFT), }; static struct rhashtable_params test_rht_params_dup = { @@ -296,8 +295,6 @@ static int __init test_rhltable(unsigned int entries) if (!obj_in_table) goto out_free; - /* nulls_base not supported in rhlist interface */ - test_rht_params.nulls_base = 0; err = rhltable_init(&rhlt, &test_rht_params); if (WARN_ON(err)) goto out_free; @@ -501,6 +498,8 @@ static unsigned int __init print_ht(struct rhltable *rhlt) unsigned int i, cnt = 0; ht = &rhlt->ht; + /* Take the mutex to avoid RCU warning */ + mutex_lock(&ht->mutex); tbl = rht_dereference(ht->tbl, ht); for (i = 0; i < tbl->size; i++) { struct rhash_head *pos, *next; @@ -534,6 +533,7 @@ static unsigned int __init print_ht(struct rhltable *rhlt) } } printk(KERN_ERR "\n---- ht: ----%s\n-------------\n", buff); + mutex_unlock(&ht->mutex); return cnt; } diff --git a/lib/vsprintf.c b/lib/vsprintf.c index a48aaa79d352..d5b3a3f95c01 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -1651,6 +1651,17 @@ char *device_node_string(char *buf, char *end, struct device_node *dn, return widen_string(buf, buf - buf_start, end, spec); } +/* Make pointers available for printing early in the boot sequence. */ +static int debug_boot_weak_hash __ro_after_init; + +static int __init debug_boot_weak_hash_enable(char *str) +{ + debug_boot_weak_hash = 1; + pr_info("debug_boot_weak_hash enabled\n"); + return 0; +} +early_param("debug_boot_weak_hash", debug_boot_weak_hash_enable); + static DEFINE_STATIC_KEY_TRUE(not_filled_random_ptr_key); static siphash_key_t ptr_key __read_mostly; @@ -1675,8 +1686,16 @@ static struct random_ready_callback random_ready = { static int __init initialize_ptr_random(void) { - int ret = add_random_ready_callback(&random_ready); + int key_size = sizeof(ptr_key); + int ret; + /* Use hw RNG if available. */ + if (get_random_bytes_arch(&ptr_key, key_size) == key_size) { + static_branch_disable(¬_filled_random_ptr_key); + return 0; + } + + ret = add_random_ready_callback(&random_ready); if (!ret) { return 0; } else if (ret == -EALREADY) { @@ -1695,6 +1714,12 @@ static char *ptr_to_id(char *buf, char *end, void *ptr, struct printf_spec spec) const char *str = sizeof(ptr) == 8 ? "(____ptrval____)" : "(ptrval)"; unsigned long hashval; + /* When debugging early boot use non-cryptographically secure hash. */ + if (unlikely(debug_boot_weak_hash)) { + hashval = hash_long((unsigned long)ptr, 32); + return pointer_string(buf, end, (const void *)hashval, spec); + } + if (static_branch_unlikely(¬_filled_random_ptr_key)) { spec.field_width = 2 * sizeof(ptr); /* string length must be less than default_width */ @@ -1942,6 +1967,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, case 'F': return device_node_string(buf, end, ptr, spec, fmt + 1); } + break; case 'x': return pointer_string(buf, end, ptr, spec); } diff --git a/lib/xz/xz_crc32.c b/lib/xz/xz_crc32.c index 34532d14fd4c..25a5d87e2e4c 100644 --- a/lib/xz/xz_crc32.c +++ b/lib/xz/xz_crc32.c @@ -15,6 +15,7 @@ * but they are bigger and use more memory for the lookup table. */ +#include <linux/crc32poly.h> #include "xz_private.h" /* @@ -29,7 +30,7 @@ STATIC_RW_DATA uint32_t xz_crc32_table[256]; XZ_EXTERN void xz_crc32_init(void) { - const uint32_t poly = 0xEDB88320; + const uint32_t poly = CRC32_POLY_LE; uint32_t i; uint32_t j; |