diff options
Diffstat (limited to 'lib')
40 files changed, 1076 insertions, 493 deletions
diff --git a/lib/Kconfig b/lib/Kconfig index 1e80cb3c77a9..4771fb3f4da4 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -351,9 +351,9 @@ config HAS_IOMEM select GENERIC_IO default y -config HAS_IOPORT +config HAS_IOPORT_MAP boolean - depends on HAS_IOMEM && !NO_IOPORT + depends on HAS_IOMEM && !NO_IOPORT_MAP default y config HAS_DMA diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index db25707aa41b..dd7f8858188a 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -119,7 +119,7 @@ menu "Compile-time checks and compiler options" config DEBUG_INFO bool "Compile the kernel with debug info" - depends on DEBUG_KERNEL + depends on DEBUG_KERNEL && !COMPILE_TEST help If you say Y here the resulting kernel image will include debugging info resulting in a larger kernel image. @@ -761,6 +761,15 @@ config PANIC_ON_OOPS_VALUE default 0 if !PANIC_ON_OOPS default 1 if PANIC_ON_OOPS +config PANIC_TIMEOUT + int "panic timeout" + default 0 + help + Set the timeout value (in seconds) until a reboot occurs when the + the kernel panics. If n = 0, then we wait forever. A timeout + value n > 0 will wait n seconds before rebooting, while a timeout + value n < 0 will reboot immediately. + config SCHED_DEBUG bool "Collect scheduler debugging info" depends on DEBUG_KERNEL && PROC_FS @@ -971,6 +980,21 @@ config DEBUG_LOCKING_API_SELFTESTS The following locking APIs are covered: spinlocks, rwlocks, mutexes and rwsems. +config LOCK_TORTURE_TEST + tristate "torture tests for locking" + depends on DEBUG_KERNEL + select TORTURE_TEST + default n + help + This option provides a kernel module that runs torture tests + on kernel locking primitives. The kernel module may be built + after the fact on the running kernel to be tested, if desired. + + Say Y here if you want kernel locking-primitive torture tests + to be built into the kernel. + Say M if you want these torture tests to build as a module. + Say N if you are unsure. + endmenu # lock debugging config TRACE_IRQFLAGS @@ -1132,9 +1156,14 @@ config SPARSE_RCU_POINTER Say N if you are unsure. +config TORTURE_TEST + tristate + default n + config RCU_TORTURE_TEST tristate "torture tests for RCU" depends on DEBUG_KERNEL + select TORTURE_TEST default n help This option provides a kernel module that runs torture tests @@ -1547,17 +1576,6 @@ config PROVIDE_OHCI1394_DMA_INIT See Documentation/debugging-via-ohci1394.txt for more information. -config FIREWIRE_OHCI_REMOTE_DMA - bool "Remote debugging over FireWire with firewire-ohci" - depends on FIREWIRE_OHCI - help - This option lets you use the FireWire bus for remote debugging - with help of the firewire-ohci driver. It enables unfiltered - remote DMA in firewire-ohci. - See Documentation/debugging-via-ohci1394.txt for more information. - - If unsure, say N. - config BUILD_DOCSRC bool "Build targets in Documentation/ tree" depends on HEADERS_CHECK @@ -1575,8 +1593,43 @@ config DMA_API_DEBUG With this option you will be able to detect common bugs in device drivers like double-freeing of DMA mappings or freeing mappings that were never allocated. - This option causes a performance degredation. Use only if you want - to debug device drivers. If unsure, say N. + + This also attempts to catch cases where a page owned by DMA is + accessed by the cpu in a way that could cause data corruption. For + example, this enables cow_user_page() to check that the source page is + not undergoing DMA. + + This option causes a performance degradation. Use only if you want to + debug device drivers and dma interactions. + + If unsure, say N. + +config TEST_MODULE + tristate "Test module loading with 'hello world' module" + default n + depends on m + help + This builds the "test_module" module that emits "Hello, world" + on printk when loaded. It is designed to be used for basic + evaluation of the module loading subsystem (for example when + validating module verification). It lacks any extra dependencies, + and will not normally be loaded by the system unless explicitly + requested by name. + + If unsure, say N. + +config TEST_USER_COPY + tristate "Test user/kernel boundary protections" + default n + depends on m + help + This builds the "test_user_copy" module that runs sanity checks + on the copy_to/from_user infrastructure, making sure basic + user/kernel boundary testing is working. If it fails to load, + a regression has been detected in the user/kernel memory boundary + protections. + + If unsure, say N. source "samples/Kconfig" diff --git a/lib/Makefile b/lib/Makefile index 972552b39cf5..0cd7b68e1382 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -26,11 +26,13 @@ obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ bust_spinlocks.o hexdump.o kasprintf.o bitmap.o scatterlist.o \ gcd.o lcm.o list_sort.o uuid.o flex_array.o iovec.o clz_ctz.o \ bsearch.o find_last_bit.o find_next_bit.o llist.o memweight.o kfifo.o \ - percpu-refcount.o percpu_ida.o + percpu-refcount.o percpu_ida.o hash.o obj-y += string_helpers.o obj-$(CONFIG_TEST_STRING_HELPERS) += test-string_helpers.o obj-y += kstrtox.o obj-$(CONFIG_TEST_KSTRTOX) += test-kstrtox.o +obj-$(CONFIG_TEST_MODULE) += test_module.o +obj-$(CONFIG_TEST_USER_COPY) += test_user_copy.o ifeq ($(CONFIG_DEBUG_KOBJECT),y) CFLAGS_kobject.o += -DDEBUG @@ -43,6 +45,7 @@ obj-$(CONFIG_HAS_IOMEM) += iomap_copy.o devres.o obj-$(CONFIG_CHECK_SIGNATURE) += check_signature.o obj-$(CONFIG_DEBUG_LOCKING_API_SELFTESTS) += locking-selftest.o +GCOV_PROFILE_hweight.o := n CFLAGS_hweight.o = $(subst $(quote),,$(CONFIG_ARCH_HWEIGHT_CFLAGS)) obj-$(CONFIG_GENERIC_HWEIGHT) += hweight.o diff --git a/lib/assoc_array.c b/lib/assoc_array.c index 1b6a44f1ec3e..c0b1007011e1 100644 --- a/lib/assoc_array.c +++ b/lib/assoc_array.c @@ -157,7 +157,7 @@ enum assoc_array_walk_status { assoc_array_walk_tree_empty, assoc_array_walk_found_terminal_node, assoc_array_walk_found_wrong_shortcut, -} status; +}; struct assoc_array_walk_result { struct { diff --git a/lib/average.c b/lib/average.c index 99a67e662b3c..114d1beae0c7 100644 --- a/lib/average.c +++ b/lib/average.c @@ -53,8 +53,10 @@ EXPORT_SYMBOL(ewma_init); */ struct ewma *ewma_add(struct ewma *avg, unsigned long val) { - avg->internal = avg->internal ? - (((avg->internal << avg->weight) - avg->internal) + + unsigned long internal = ACCESS_ONCE(avg->internal); + + ACCESS_ONCE(avg->internal) = internal ? + (((internal << avg->weight) - internal) + (val << avg->factor)) >> avg->weight : (val << avg->factor); return avg; diff --git a/lib/clz_ctz.c b/lib/clz_ctz.c index a8f8379eb49f..2e11e48446ab 100644 --- a/lib/clz_ctz.c +++ b/lib/clz_ctz.c @@ -6,6 +6,9 @@ * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as * published by the Free Software Foundation. + * The functions in this file aren't called directly, but are required by + * GCC builtins such as __builtin_ctz, and therefore they can't be removed + * despite appearing unreferenced in kernel source. * * __c[lt]z[sd]i2 can be overridden by linking arch-specific versions. */ @@ -13,18 +16,22 @@ #include <linux/export.h> #include <linux/kernel.h> +int __weak __ctzsi2(int val); int __weak __ctzsi2(int val) { return __ffs(val); } EXPORT_SYMBOL(__ctzsi2); +int __weak __clzsi2(int val); int __weak __clzsi2(int val) { return 32 - fls(val); } EXPORT_SYMBOL(__clzsi2); +int __weak __clzdi2(long val); +int __weak __ctzdi2(long val); #if BITS_PER_LONG == 32 int __weak __clzdi2(long val) diff --git a/lib/cmdline.c b/lib/cmdline.c index eb6791188cf5..d4932f745e92 100644 --- a/lib/cmdline.c +++ b/lib/cmdline.c @@ -49,13 +49,13 @@ static int get_range(char **str, int *pint) * 3 - hyphen found to denote a range */ -int get_option (char **str, int *pint) +int get_option(char **str, int *pint) { char *cur = *str; if (!cur || !(*cur)) return 0; - *pint = simple_strtol (cur, str, 0); + *pint = simple_strtol(cur, str, 0); if (cur == *str) return 0; if (**str == ',') { @@ -67,6 +67,7 @@ int get_option (char **str, int *pint) return 1; } +EXPORT_SYMBOL(get_option); /** * get_options - Parse a string into a list of integers @@ -84,13 +85,13 @@ int get_option (char **str, int *pint) * the parse to end (typically a null terminator, if @str is * completely parseable). */ - + char *get_options(const char *str, int nints, int *ints) { int res, i = 1; while (i < nints) { - res = get_option ((char **)&str, ints + i); + res = get_option((char **)&str, ints + i); if (res == 0) break; if (res == 3) { @@ -112,6 +113,7 @@ char *get_options(const char *str, int nints, int *ints) ints[0] = i - 1; return (char *)str; } +EXPORT_SYMBOL(get_options); /** * memparse - parse a string with mem suffixes into a number @@ -152,8 +154,4 @@ unsigned long long memparse(const char *ptr, char **retptr) return ret; } - - EXPORT_SYMBOL(memparse); -EXPORT_SYMBOL(get_option); -EXPORT_SYMBOL(get_options); diff --git a/lib/cpumask.c b/lib/cpumask.c index d327b87c99b7..b810b753c607 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -140,7 +140,7 @@ EXPORT_SYMBOL(zalloc_cpumask_var); */ void __init alloc_bootmem_cpumask_var(cpumask_var_t *mask) { - *mask = alloc_bootmem(cpumask_size()); + *mask = memblock_virt_alloc(cpumask_size(), 0); } /** @@ -161,6 +161,6 @@ EXPORT_SYMBOL(free_cpumask_var); */ void __init free_bootmem_cpumask_var(cpumask_var_t mask) { - free_bootmem(__pa(mask), cpumask_size()); + memblock_free_early(__pa(mask), cpumask_size()); } #endif diff --git a/lib/decompress.c b/lib/decompress.c index 4d1cd0397aab..86069d74c062 100644 --- a/lib/decompress.c +++ b/lib/decompress.c @@ -16,6 +16,7 @@ #include <linux/types.h> #include <linux/string.h> #include <linux/init.h> +#include <linux/printk.h> #ifndef CONFIG_DECOMPRESS_GZIP # define gunzip NULL @@ -61,6 +62,8 @@ decompress_fn __init decompress_method(const unsigned char *inbuf, int len, if (len < 2) return NULL; /* Need at least this much... */ + pr_debug("Compressed data magic: %#.2x %#.2x\n", inbuf[0], inbuf[1]); + for (cf = compressed_formats; cf->name; cf++) { if (!memcmp(inbuf, cf->magic, 2)) break; diff --git a/lib/decompress_inflate.c b/lib/decompress_inflate.c index d619b28c456f..0edfd742a154 100644 --- a/lib/decompress_inflate.c +++ b/lib/decompress_inflate.c @@ -19,6 +19,7 @@ #include "zlib_inflate/inflate.h" #include "zlib_inflate/infutil.h" +#include <linux/decompress/inflate.h> #endif /* STATIC */ diff --git a/lib/decompress_unlz4.c b/lib/decompress_unlz4.c index 3e67cfad16ad..7d1e83caf8ad 100644 --- a/lib/decompress_unlz4.c +++ b/lib/decompress_unlz4.c @@ -141,6 +141,7 @@ STATIC inline int INIT unlz4(u8 *input, int in_len, goto exit_2; } + ret = -1; if (flush && flush(outp, dest_len) != dest_len) goto exit_2; if (output) diff --git a/lib/devres.c b/lib/devres.c index 823533138fa0..2f16c133fd36 100644 --- a/lib/devres.c +++ b/lib/devres.c @@ -81,11 +81,13 @@ EXPORT_SYMBOL(devm_ioremap_nocache); void devm_iounmap(struct device *dev, void __iomem *addr) { WARN_ON(devres_destroy(dev, devm_ioremap_release, devm_ioremap_match, - (void *)addr)); + (__force void *)addr)); iounmap(addr); } EXPORT_SYMBOL(devm_iounmap); +#define IOMEM_ERR_PTR(err) (__force void __iomem *)ERR_PTR(err) + /** * devm_ioremap_resource() - check, request region, and ioremap resource * @dev: generic device to handle the resource for @@ -114,7 +116,7 @@ void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res) if (!res || resource_type(res) != IORESOURCE_MEM) { dev_err(dev, "invalid resource\n"); - return ERR_PTR(-EINVAL); + return IOMEM_ERR_PTR(-EINVAL); } size = resource_size(res); @@ -122,7 +124,7 @@ void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res) if (!devm_request_mem_region(dev, res->start, size, name)) { dev_err(dev, "can't request region for resource %pR\n", res); - return ERR_PTR(-EBUSY); + return IOMEM_ERR_PTR(-EBUSY); } if (res->flags & IORESOURCE_CACHEABLE) @@ -133,7 +135,7 @@ void __iomem *devm_ioremap_resource(struct device *dev, struct resource *res) if (!dest_ptr) { dev_err(dev, "ioremap failed for resource %pR\n", res); devm_release_mem_region(dev, res->start, size); - dest_ptr = ERR_PTR(-ENOMEM); + dest_ptr = IOMEM_ERR_PTR(-ENOMEM); } return dest_ptr; @@ -168,7 +170,7 @@ void __iomem *devm_request_and_ioremap(struct device *device, } EXPORT_SYMBOL(devm_request_and_ioremap); -#ifdef CONFIG_HAS_IOPORT +#ifdef CONFIG_HAS_IOPORT_MAP /* * Generic iomap devres */ @@ -224,10 +226,10 @@ void devm_ioport_unmap(struct device *dev, void __iomem *addr) { ioport_unmap(addr); WARN_ON(devres_destroy(dev, devm_ioport_map_release, - devm_ioport_map_match, (void *)addr)); + devm_ioport_map_match, (__force void *)addr)); } EXPORT_SYMBOL(devm_ioport_unmap); -#endif /* CONFIG_HAS_IOPORT */ +#endif /* CONFIG_HAS_IOPORT_MAP */ #ifdef CONFIG_PCI /* diff --git a/lib/dma-debug.c b/lib/dma-debug.c index d87a17a819d0..98f2d7e91a91 100644 --- a/lib/dma-debug.c +++ b/lib/dma-debug.c @@ -53,11 +53,26 @@ enum map_err_types { #define DMA_DEBUG_STACKTRACE_ENTRIES 5 +/** + * struct dma_debug_entry - track a dma_map* or dma_alloc_coherent mapping + * @list: node on pre-allocated free_entries list + * @dev: 'dev' argument to dma_map_{page|single|sg} or dma_alloc_coherent + * @type: single, page, sg, coherent + * @pfn: page frame of the start address + * @offset: offset of mapping relative to pfn + * @size: length of the mapping + * @direction: enum dma_data_direction + * @sg_call_ents: 'nents' from dma_map_sg + * @sg_mapped_ents: 'mapped_ents' from dma_map_sg + * @map_err_type: track whether dma_mapping_error() was checked + * @stacktrace: support backtraces when a violation is detected + */ struct dma_debug_entry { struct list_head list; struct device *dev; int type; - phys_addr_t paddr; + unsigned long pfn; + size_t offset; u64 dev_addr; u64 size; int direction; @@ -372,6 +387,11 @@ static void hash_bucket_del(struct dma_debug_entry *entry) list_del(&entry->list); } +static unsigned long long phys_addr(struct dma_debug_entry *entry) +{ + return page_to_phys(pfn_to_page(entry->pfn)) + entry->offset; +} + /* * Dump mapping entries for debugging purposes */ @@ -389,9 +409,9 @@ void debug_dma_dump_mappings(struct device *dev) list_for_each_entry(entry, &bucket->list, list) { if (!dev || dev == entry->dev) { dev_info(entry->dev, - "%s idx %d P=%Lx D=%Lx L=%Lx %s %s\n", + "%s idx %d P=%Lx N=%lx D=%Lx L=%Lx %s %s\n", type2name[entry->type], idx, - (unsigned long long)entry->paddr, + phys_addr(entry), entry->pfn, entry->dev_addr, entry->size, dir2name[entry->direction], maperr2str[entry->map_err_type]); @@ -404,6 +424,176 @@ void debug_dma_dump_mappings(struct device *dev) EXPORT_SYMBOL(debug_dma_dump_mappings); /* + * For each mapping (initial cacheline in the case of + * dma_alloc_coherent/dma_map_page, initial cacheline in each page of a + * scatterlist, or the cacheline specified in dma_map_single) insert + * into this tree using the cacheline as the key. At + * dma_unmap_{single|sg|page} or dma_free_coherent delete the entry. If + * the entry already exists at insertion time add a tag as a reference + * count for the overlapping mappings. For now, the overlap tracking + * just ensures that 'unmaps' balance 'maps' before marking the + * cacheline idle, but we should also be flagging overlaps as an API + * violation. + * + * Memory usage is mostly constrained by the maximum number of available + * dma-debug entries in that we need a free dma_debug_entry before + * inserting into the tree. In the case of dma_map_page and + * dma_alloc_coherent there is only one dma_debug_entry and one + * dma_active_cacheline entry to track per event. dma_map_sg(), on the + * other hand, consumes a single dma_debug_entry, but inserts 'nents' + * entries into the tree. + * + * At any time debug_dma_assert_idle() can be called to trigger a + * warning if any cachelines in the given page are in the active set. + */ +static RADIX_TREE(dma_active_cacheline, GFP_NOWAIT); +static DEFINE_SPINLOCK(radix_lock); +#define ACTIVE_CACHELINE_MAX_OVERLAP ((1 << RADIX_TREE_MAX_TAGS) - 1) +#define CACHELINE_PER_PAGE_SHIFT (PAGE_SHIFT - L1_CACHE_SHIFT) +#define CACHELINES_PER_PAGE (1 << CACHELINE_PER_PAGE_SHIFT) + +static phys_addr_t to_cacheline_number(struct dma_debug_entry *entry) +{ + return (entry->pfn << CACHELINE_PER_PAGE_SHIFT) + + (entry->offset >> L1_CACHE_SHIFT); +} + +static int active_cacheline_read_overlap(phys_addr_t cln) +{ + int overlap = 0, i; + + for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--) + if (radix_tree_tag_get(&dma_active_cacheline, cln, i)) + overlap |= 1 << i; + return overlap; +} + +static int active_cacheline_set_overlap(phys_addr_t cln, int overlap) +{ + int i; + + if (overlap > ACTIVE_CACHELINE_MAX_OVERLAP || overlap < 0) + return overlap; + + for (i = RADIX_TREE_MAX_TAGS - 1; i >= 0; i--) + if (overlap & 1 << i) + radix_tree_tag_set(&dma_active_cacheline, cln, i); + else + radix_tree_tag_clear(&dma_active_cacheline, cln, i); + + return overlap; +} + +static void active_cacheline_inc_overlap(phys_addr_t cln) +{ + int overlap = active_cacheline_read_overlap(cln); + + overlap = active_cacheline_set_overlap(cln, ++overlap); + + /* If we overflowed the overlap counter then we're potentially + * leaking dma-mappings. Otherwise, if maps and unmaps are + * balanced then this overflow may cause false negatives in + * debug_dma_assert_idle() as the cacheline may be marked idle + * prematurely. + */ + WARN_ONCE(overlap > ACTIVE_CACHELINE_MAX_OVERLAP, + "DMA-API: exceeded %d overlapping mappings of cacheline %pa\n", + ACTIVE_CACHELINE_MAX_OVERLAP, &cln); +} + +static int active_cacheline_dec_overlap(phys_addr_t cln) +{ + int overlap = active_cacheline_read_overlap(cln); + + return active_cacheline_set_overlap(cln, --overlap); +} + +static int active_cacheline_insert(struct dma_debug_entry *entry) +{ + phys_addr_t cln = to_cacheline_number(entry); + unsigned long flags; + int rc; + + /* If the device is not writing memory then we don't have any + * concerns about the cpu consuming stale data. This mitigates + * legitimate usages of overlapping mappings. + */ + if (entry->direction == DMA_TO_DEVICE) + return 0; + + spin_lock_irqsave(&radix_lock, flags); + rc = radix_tree_insert(&dma_active_cacheline, cln, entry); + if (rc == -EEXIST) + active_cacheline_inc_overlap(cln); + spin_unlock_irqrestore(&radix_lock, flags); + + return rc; +} + +static void active_cacheline_remove(struct dma_debug_entry *entry) +{ + phys_addr_t cln = to_cacheline_number(entry); + unsigned long flags; + + /* ...mirror the insert case */ + if (entry->direction == DMA_TO_DEVICE) + return; + + spin_lock_irqsave(&radix_lock, flags); + /* since we are counting overlaps the final put of the + * cacheline will occur when the overlap count is 0. + * active_cacheline_dec_overlap() returns -1 in that case + */ + if (active_cacheline_dec_overlap(cln) < 0) + radix_tree_delete(&dma_active_cacheline, cln); + spin_unlock_irqrestore(&radix_lock, flags); +} + +/** + * debug_dma_assert_idle() - assert that a page is not undergoing dma + * @page: page to lookup in the dma_active_cacheline tree + * + * Place a call to this routine in cases where the cpu touching the page + * before the dma completes (page is dma_unmapped) will lead to data + * corruption. + */ +void debug_dma_assert_idle(struct page *page) +{ + static struct dma_debug_entry *ents[CACHELINES_PER_PAGE]; + struct dma_debug_entry *entry = NULL; + void **results = (void **) &ents; + unsigned int nents, i; + unsigned long flags; + phys_addr_t cln; + + if (!page) + return; + + cln = (phys_addr_t) page_to_pfn(page) << CACHELINE_PER_PAGE_SHIFT; + spin_lock_irqsave(&radix_lock, flags); + nents = radix_tree_gang_lookup(&dma_active_cacheline, results, cln, + CACHELINES_PER_PAGE); + for (i = 0; i < nents; i++) { + phys_addr_t ent_cln = to_cacheline_number(ents[i]); + + if (ent_cln == cln) { + entry = ents[i]; + break; + } else if (ent_cln >= cln + CACHELINES_PER_PAGE) + break; + } + spin_unlock_irqrestore(&radix_lock, flags); + + if (!entry) + return; + + cln = to_cacheline_number(entry); + err_printk(entry->dev, entry, + "DMA-API: cpu touching an active dma mapped cacheline [cln=%pa]\n", + &cln); +} + +/* * Wrapper function for adding an entry to the hash. * This function takes care of locking itself. */ @@ -411,10 +601,21 @@ static void add_dma_entry(struct dma_debug_entry *entry) { struct hash_bucket *bucket; unsigned long flags; + int rc; bucket = get_hash_bucket(entry, &flags); hash_bucket_add(bucket, entry); put_hash_bucket(bucket, &flags); + + rc = active_cacheline_insert(entry); + if (rc == -ENOMEM) { + pr_err("DMA-API: cacheline tracking ENOMEM, dma-debug disabled\n"); + global_disable = true; + } + + /* TODO: report -EEXIST errors here as overlapping mappings are + * not supported by the DMA API + */ } static struct dma_debug_entry *__dma_entry_alloc(void) @@ -469,6 +670,8 @@ static void dma_entry_free(struct dma_debug_entry *entry) { unsigned long flags; + active_cacheline_remove(entry); + /* * add to beginning of the list - this way the entries are * more likely cache hot when they are reallocated. @@ -895,15 +1098,15 @@ static void check_unmap(struct dma_debug_entry *ref) ref->dev_addr, ref->size, type2name[entry->type], type2name[ref->type]); } else if ((entry->type == dma_debug_coherent) && - (ref->paddr != entry->paddr)) { + (phys_addr(ref) != phys_addr(entry))) { err_printk(ref->dev, entry, "DMA-API: device driver frees " "DMA memory with different CPU address " "[device address=0x%016llx] [size=%llu bytes] " "[cpu alloc address=0x%016llx] " "[cpu free address=0x%016llx]", ref->dev_addr, ref->size, - (unsigned long long)entry->paddr, - (unsigned long long)ref->paddr); + phys_addr(entry), + phys_addr(ref)); } if (ref->sg_call_ents && ref->type == dma_debug_sg && @@ -1052,7 +1255,8 @@ void debug_dma_map_page(struct device *dev, struct page *page, size_t offset, entry->dev = dev; entry->type = dma_debug_page; - entry->paddr = page_to_phys(page) + offset; + entry->pfn = page_to_pfn(page); + entry->offset = offset, entry->dev_addr = dma_addr; entry->size = size; entry->direction = direction; @@ -1148,7 +1352,8 @@ void debug_dma_map_sg(struct device *dev, struct scatterlist *sg, entry->type = dma_debug_sg; entry->dev = dev; - entry->paddr = sg_phys(s); + entry->pfn = page_to_pfn(sg_page(s)); + entry->offset = s->offset, entry->size = sg_dma_len(s); entry->dev_addr = sg_dma_address(s); entry->direction = direction; @@ -1198,7 +1403,8 @@ void debug_dma_unmap_sg(struct device *dev, struct scatterlist *sglist, struct dma_debug_entry ref = { .type = dma_debug_sg, .dev = dev, - .paddr = sg_phys(s), + .pfn = page_to_pfn(sg_page(s)), + .offset = s->offset, .dev_addr = sg_dma_address(s), .size = sg_dma_len(s), .direction = dir, @@ -1233,7 +1439,8 @@ void debug_dma_alloc_coherent(struct device *dev, size_t size, entry->type = dma_debug_coherent; entry->dev = dev; - entry->paddr = virt_to_phys(virt); + entry->pfn = page_to_pfn(virt_to_page(virt)); + entry->offset = (size_t) virt & PAGE_MASK; entry->size = size; entry->dev_addr = dma_addr; entry->direction = DMA_BIDIRECTIONAL; @@ -1248,7 +1455,8 @@ void debug_dma_free_coherent(struct device *dev, size_t size, struct dma_debug_entry ref = { .type = dma_debug_coherent, .dev = dev, - .paddr = virt_to_phys(virt), + .pfn = page_to_pfn(virt_to_page(virt)), + .offset = (size_t) virt & PAGE_MASK, .dev_addr = addr, .size = size, .direction = DMA_BIDIRECTIONAL, @@ -1356,7 +1564,8 @@ void debug_dma_sync_sg_for_cpu(struct device *dev, struct scatterlist *sg, struct dma_debug_entry ref = { .type = dma_debug_sg, .dev = dev, - .paddr = sg_phys(s), + .pfn = page_to_pfn(sg_page(s)), + .offset = s->offset, .dev_addr = sg_dma_address(s), .size = sg_dma_len(s), .direction = direction, @@ -1388,7 +1597,8 @@ void debug_dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, struct dma_debug_entry ref = { .type = dma_debug_sg, .dev = dev, - .paddr = sg_phys(s), + .pfn = page_to_pfn(sg_page(s)), + .offset = s->offset, .dev_addr = sg_dma_address(s), .size = sg_dma_len(s), .direction = direction, diff --git a/lib/dynamic_debug.c b/lib/dynamic_debug.c index c37aeacd7651..7288e38e1757 100644 --- a/lib/dynamic_debug.c +++ b/lib/dynamic_debug.c @@ -8,6 +8,7 @@ * By Greg Banks <gnb@melbourne.sgi.com> * Copyright (c) 2008 Silicon Graphics Inc. All Rights Reserved. * Copyright (C) 2011 Bart Van Assche. All Rights Reserved. + * Copyright (C) 2013 Du, Changbin <changbin.du@gmail.com> */ #define pr_fmt(fmt) KBUILD_MODNAME ":%s: " fmt, __func__ @@ -24,6 +25,7 @@ #include <linux/sysctl.h> #include <linux/ctype.h> #include <linux/string.h> +#include <linux/parser.h> #include <linux/string_helpers.h> #include <linux/uaccess.h> #include <linux/dynamic_debug.h> @@ -147,7 +149,8 @@ static int ddebug_change(const struct ddebug_query *query, list_for_each_entry(dt, &ddebug_tables, link) { /* match against the module name */ - if (query->module && strcmp(query->module, dt->mod_name)) + if (query->module && + !match_wildcard(query->module, dt->mod_name)) continue; for (i = 0; i < dt->num_ddebugs; i++) { @@ -155,14 +158,16 @@ static int ddebug_change(const struct ddebug_query *query, /* match against the source filename */ if (query->filename && - strcmp(query->filename, dp->filename) && - strcmp(query->filename, kbasename(dp->filename)) && - strcmp(query->filename, trim_prefix(dp->filename))) + !match_wildcard(query->filename, dp->filename) && + !match_wildcard(query->filename, + kbasename(dp->filename)) && + !match_wildcard(query->filename, + trim_prefix(dp->filename))) continue; /* match against the function */ if (query->function && - strcmp(query->function, dp->function)) + !match_wildcard(query->function, dp->function)) continue; /* match against the format */ @@ -263,14 +268,12 @@ static int ddebug_tokenize(char *buf, char *words[], int maxwords) */ static inline int parse_lineno(const char *str, unsigned int *val) { - char *end = NULL; BUG_ON(str == NULL); if (*str == '\0') { *val = 0; return 0; } - *val = simple_strtoul(str, &end, 10); - if (end == NULL || end == str || *end != '\0') { + if (kstrtouint(str, 10, val) < 0) { pr_err("bad line-number: %s\n", str); return -EINVAL; } @@ -343,14 +346,14 @@ static int ddebug_parse_query(char *words[], int nwords, } if (last) *last++ = '\0'; - if (parse_lineno(first, &query->first_lineno) < 0) { - pr_err("line-number is <0\n"); + if (parse_lineno(first, &query->first_lineno) < 0) return -EINVAL; - } if (last) { /* range <first>-<last> */ - if (parse_lineno(last, &query->last_lineno) - < query->first_lineno) { + if (parse_lineno(last, &query->last_lineno) < 0) + return -EINVAL; + + if (query->last_lineno < query->first_lineno) { pr_err("last-line:%d < 1st-line:%d\n", query->last_lineno, query->first_lineno); diff --git a/lib/flex_array.c b/lib/flex_array.c index 6948a6692fc4..2eed22fa507c 100644 --- a/lib/flex_array.c +++ b/lib/flex_array.c @@ -90,8 +90,8 @@ struct flex_array *flex_array_alloc(int element_size, unsigned int total, { struct flex_array *ret; int elems_per_part = 0; - int reciprocal_elems = 0; int max_size = 0; + struct reciprocal_value reciprocal_elems = { 0 }; if (element_size) { elems_per_part = FLEX_ARRAY_ELEMENTS_PER_PART(element_size); @@ -119,6 +119,11 @@ EXPORT_SYMBOL(flex_array_alloc); static int fa_element_to_part_nr(struct flex_array *fa, unsigned int element_nr) { + /* + * if element_size == 0 we don't get here, so we never touch + * the zeroed fa->reciprocal_elems, which would yield invalid + * results + */ return reciprocal_divide(element_nr, fa->reciprocal_elems); } diff --git a/lib/genalloc.c b/lib/genalloc.c index dda31168844f..bdb9a456bcbb 100644 --- a/lib/genalloc.c +++ b/lib/genalloc.c @@ -316,7 +316,7 @@ EXPORT_SYMBOL(gen_pool_alloc); * gen_pool_dma_alloc - allocate special memory from the pool for DMA usage * @pool: pool to allocate from * @size: number of bytes to allocate from the pool - * @dma: dma-view physical address + * @dma: dma-view physical address return value. Use NULL if unneeded. * * Allocate the requested number of bytes from the specified pool. * Uses the pool allocation function (with first-fit algorithm by default). @@ -334,7 +334,8 @@ void *gen_pool_dma_alloc(struct gen_pool *pool, size_t size, dma_addr_t *dma) if (!vaddr) return NULL; - *dma = gen_pool_virt_to_phys(pool, vaddr); + if (dma) + *dma = gen_pool_virt_to_phys(pool, vaddr); return (void *)vaddr; } diff --git a/lib/hash.c b/lib/hash.c new file mode 100644 index 000000000000..fea973f4bd57 --- /dev/null +++ b/lib/hash.c @@ -0,0 +1,39 @@ +/* General purpose hashing library + * + * That's a start of a kernel hashing library, which can be extended + * with further algorithms in future. arch_fast_hash{2,}() will + * eventually resolve to an architecture optimized implementation. + * + * Copyright 2013 Francesco Fusco <ffusco@redhat.com> + * Copyright 2013 Daniel Borkmann <dborkman@redhat.com> + * Copyright 2013 Thomas Graf <tgraf@redhat.com> + * Licensed under the GNU General Public License, version 2.0 (GPLv2) + */ + +#include <linux/jhash.h> +#include <linux/hash.h> +#include <linux/cache.h> + +static struct fast_hash_ops arch_hash_ops __read_mostly = { + .hash = jhash, + .hash2 = jhash2, +}; + +u32 arch_fast_hash(const void *data, u32 len, u32 seed) +{ + return arch_hash_ops.hash(data, len, seed); +} +EXPORT_SYMBOL_GPL(arch_fast_hash); + +u32 arch_fast_hash2(const u32 *data, u32 len, u32 seed) +{ + return arch_hash_ops.hash2(data, len, seed); +} +EXPORT_SYMBOL_GPL(arch_fast_hash2); + +static int __init hashlib_init(void) +{ + setup_arch_fast_hash(&arch_hash_ops); + return 0; +} +early_initcall(hashlib_init); diff --git a/lib/idr.c b/lib/idr.c index bfe4db4e165f..2642fa8e424d 100644 --- a/lib/idr.c +++ b/lib/idr.c @@ -196,7 +196,7 @@ static void idr_mark_full(struct idr_layer **pa, int id) } } -int __idr_pre_get(struct idr *idp, gfp_t gfp_mask) +static int __idr_pre_get(struct idr *idp, gfp_t gfp_mask) { while (idp->id_free_cnt < MAX_IDR_FREE) { struct idr_layer *new; @@ -207,7 +207,6 @@ int __idr_pre_get(struct idr *idp, gfp_t gfp_mask) } return 1; } -EXPORT_SYMBOL(__idr_pre_get); /** * sub_alloc - try to allocate an id without growing the tree depth @@ -374,20 +373,6 @@ static void idr_fill_slot(struct idr *idr, void *ptr, int id, idr_mark_full(pa, id); } -int __idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id) -{ - struct idr_layer *pa[MAX_IDR_LEVEL + 1]; - int rv; - - rv = idr_get_empty_slot(idp, starting_id, pa, 0, idp); - if (rv < 0) - return rv == -ENOMEM ? -EAGAIN : rv; - - idr_fill_slot(idp, ptr, rv, pa); - *id = rv; - return 0; -} -EXPORT_SYMBOL(__idr_get_new_above); /** * idr_preload - preload for idr_alloc() @@ -548,7 +533,7 @@ static void sub_remove(struct idr *idp, int shift, int id) n = id & IDR_MASK; if (likely(p != NULL && test_bit(n, p->bitmap))) { __clear_bit(n, p->bitmap); - rcu_assign_pointer(p->ary[n], NULL); + RCU_INIT_POINTER(p->ary[n], NULL); to_free = NULL; while(*paa && ! --((**paa)->count)){ if (to_free) @@ -607,7 +592,7 @@ void idr_remove(struct idr *idp, int id) } EXPORT_SYMBOL(idr_remove); -void __idr_remove_all(struct idr *idp) +static void __idr_remove_all(struct idr *idp) { int n, id, max; int bt_mask; @@ -617,7 +602,7 @@ void __idr_remove_all(struct idr *idp) n = idp->layers * IDR_BITS; p = idp->top; - rcu_assign_pointer(idp->top, NULL); + RCU_INIT_POINTER(idp->top, NULL); max = idr_max(idp->layers); id = 0; @@ -640,7 +625,6 @@ void __idr_remove_all(struct idr *idp) } idp->layers = 0; } -EXPORT_SYMBOL(__idr_remove_all); /** * idr_destroy - release all cached layers within an idr tree @@ -869,6 +853,16 @@ void idr_init(struct idr *idp) } EXPORT_SYMBOL(idr_init); +static int idr_has_entry(int id, void *p, void *data) +{ + return 1; +} + +bool idr_is_empty(struct idr *idp) +{ + return !idr_for_each(idp, idr_has_entry, NULL); +} +EXPORT_SYMBOL(idr_is_empty); /** * DOC: IDA description diff --git a/lib/iomap.c b/lib/iomap.c index 2c08f36862eb..fc3dcb4b238e 100644 --- a/lib/iomap.c +++ b/lib/iomap.c @@ -224,7 +224,7 @@ EXPORT_SYMBOL(iowrite8_rep); EXPORT_SYMBOL(iowrite16_rep); EXPORT_SYMBOL(iowrite32_rep); -#ifdef CONFIG_HAS_IOPORT +#ifdef CONFIG_HAS_IOPORT_MAP /* Create a virtual mapping cookie for an IO port range */ void __iomem *ioport_map(unsigned long port, unsigned int nr) { @@ -239,7 +239,7 @@ void ioport_unmap(void __iomem *addr) } EXPORT_SYMBOL(ioport_map); EXPORT_SYMBOL(ioport_unmap); -#endif /* CONFIG_HAS_IOPORT */ +#endif /* CONFIG_HAS_IOPORT_MAP */ #ifdef CONFIG_PCI /* Hide the details if this is a MMIO or PIO address space and just do what diff --git a/lib/kobject.c b/lib/kobject.c index 5b4b8886435e..58751bb80a7c 100644 --- a/lib/kobject.c +++ b/lib/kobject.c @@ -13,11 +13,11 @@ */ #include <linux/kobject.h> -#include <linux/kobj_completion.h> #include <linux/string.h> #include <linux/export.h> #include <linux/stat.h> #include <linux/slab.h> +#include <linux/random.h> /** * kobject_namespace - return @kobj's namespace tag @@ -65,13 +65,17 @@ static int populate_dir(struct kobject *kobj) static int create_dir(struct kobject *kobj) { + const struct kobj_ns_type_operations *ops; int error; error = sysfs_create_dir_ns(kobj, kobject_namespace(kobj)); - if (!error) { - error = populate_dir(kobj); - if (error) - sysfs_remove_dir(kobj); + if (error) + return error; + + error = populate_dir(kobj); + if (error) { + sysfs_remove_dir(kobj); + return error; } /* @@ -80,7 +84,20 @@ static int create_dir(struct kobject *kobj) */ sysfs_get(kobj->sd); - return error; + /* + * If @kobj has ns_ops, its children need to be filtered based on + * their namespace tags. Enable namespace support on @kobj->sd. + */ + ops = kobj_child_ns_ops(kobj); + if (ops) { + BUG_ON(ops->type <= KOBJ_NS_TYPE_NONE); + BUG_ON(ops->type >= KOBJ_NS_TYPES); + BUG_ON(!kobj_ns_type_registered(ops->type)); + + sysfs_enable_ns(kobj->sd); + } + + return 0; } static int get_kobj_path_length(struct kobject *kobj) @@ -247,8 +264,10 @@ int kobject_set_name_vargs(struct kobject *kobj, const char *fmt, return 0; kobj->name = kvasprintf(GFP_KERNEL, fmt, vargs); - if (!kobj->name) + if (!kobj->name) { + kobj->name = old_name; return -ENOMEM; + } /* ewww... some of these buggers have '/' in the name ... */ while ((s = strchr(kobj->name, '/'))) @@ -346,7 +365,7 @@ static int kobject_add_varg(struct kobject *kobj, struct kobject *parent, * * If @parent is set, then the parent of the @kobj will be set to it. * If @parent is NULL, then the parent of the @kobj will be set to the - * kobject associted with the kset assigned to this kobject. If no kset + * kobject associated with the kset assigned to this kobject. If no kset * is assigned to the kobject, then the kobject will be located in the * root of the sysfs tree. * @@ -536,7 +555,7 @@ out: */ void kobject_del(struct kobject *kobj) { - struct sysfs_dirent *sd; + struct kernfs_node *sd; if (!kobj) return; @@ -625,10 +644,12 @@ static void kobject_release(struct kref *kref) { struct kobject *kobj = container_of(kref, struct kobject, kref); #ifdef CONFIG_DEBUG_KOBJECT_RELEASE - pr_info("kobject: '%s' (%p): %s, parent %p (delayed)\n", - kobject_name(kobj), kobj, __func__, kobj->parent); + unsigned long delay = HZ + HZ * (get_random_int() & 0x3); + pr_info("kobject: '%s' (%p): %s, parent %p (delayed %ld)\n", + kobject_name(kobj), kobj, __func__, kobj->parent, delay); INIT_DELAYED_WORK(&kobj->release, kobject_delayed_cleanup); - schedule_delayed_work(&kobj->release, HZ); + + schedule_delayed_work(&kobj->release, delay); #else kobject_cleanup(kobj); #endif @@ -758,55 +779,7 @@ const struct sysfs_ops kobj_sysfs_ops = { .show = kobj_attr_show, .store = kobj_attr_store, }; - -/** - * kobj_completion_init - initialize a kobj_completion object. - * @kc: kobj_completion - * @ktype: type of kobject to initialize - * - * kobj_completion structures can be embedded within structures with different - * lifetime rules. During the release of the enclosing object, we can - * wait on the release of the kobject so that we don't free it while it's - * still busy. - */ -void kobj_completion_init(struct kobj_completion *kc, struct kobj_type *ktype) -{ - init_completion(&kc->kc_unregister); - kobject_init(&kc->kc_kobj, ktype); -} -EXPORT_SYMBOL_GPL(kobj_completion_init); - -/** - * kobj_completion_release - release a kobj_completion object - * @kobj: kobject embedded in kobj_completion - * - * Used with kobject_release to notify waiters that the kobject has been - * released. - */ -void kobj_completion_release(struct kobject *kobj) -{ - struct kobj_completion *kc = kobj_to_kobj_completion(kobj); - complete(&kc->kc_unregister); -} -EXPORT_SYMBOL_GPL(kobj_completion_release); - -/** - * kobj_completion_del_and_wait - release the kobject and wait for it - * @kc: kobj_completion object to release - * - * Delete the kobject from sysfs and drop the reference count. Then wait - * until any other outstanding references are also dropped. This routine - * is only necessary once other references may have been taken on the - * kobject. Typically this happens when the kobject has been published - * to sysfs via kobject_add. - */ -void kobj_completion_del_and_wait(struct kobj_completion *kc) -{ - kobject_del(&kc->kc_kobj); - kobject_put(&kc->kc_kobj); - wait_for_completion(&kc->kc_unregister); -} -EXPORT_SYMBOL_GPL(kobj_completion_del_and_wait); +EXPORT_SYMBOL_GPL(kobj_sysfs_ops); /** * kset_register - initialize and add a kset. @@ -835,6 +808,7 @@ void kset_unregister(struct kset *k) { if (!k) return; + kobject_del(&k->kobj); kobject_put(&k->kobj); } diff --git a/lib/kobject_uevent.c b/lib/kobject_uevent.c index 52e5abbc41db..4e3bd71bd949 100644 --- a/lib/kobject_uevent.c +++ b/lib/kobject_uevent.c @@ -88,11 +88,17 @@ out: #ifdef CONFIG_NET static int kobj_bcast_filter(struct sock *dsk, struct sk_buff *skb, void *data) { - struct kobject *kobj = data; + struct kobject *kobj = data, *ksobj; const struct kobj_ns_type_operations *ops; ops = kobj_ns_ops(kobj); - if (ops) { + if (!ops && kobj->kset) { + ksobj = &kobj->kset->kobj; + if (ksobj->parent != NULL) + ops = kobj_ns_ops(ksobj->parent); + } + + if (ops && ops->netlink_ns && kobj->ktype->namespace) { const void *sock_ns, *ns; ns = kobj->ktype->namespace(kobj); sock_ns = ops->netlink_ns(dsk); @@ -118,6 +124,30 @@ static int kobj_usermode_filter(struct kobject *kobj) return 0; } +static int init_uevent_argv(struct kobj_uevent_env *env, const char *subsystem) +{ + int len; + + len = strlcpy(&env->buf[env->buflen], subsystem, + sizeof(env->buf) - env->buflen); + if (len >= (sizeof(env->buf) - env->buflen)) { + WARN(1, KERN_ERR "init_uevent_argv: buffer size too small\n"); + return -ENOMEM; + } + + env->argv[0] = uevent_helper; + env->argv[1] = &env->buf[env->buflen]; + env->argv[2] = NULL; + + env->buflen += len + 1; + return 0; +} + +static void cleanup_uevent_env(struct subprocess_info *info) +{ + kfree(info->data); +} + /** * kobject_uevent_env - send an uevent with environmental data * @@ -295,11 +325,8 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, /* call uevent_helper, usually only enabled during early boot */ if (uevent_helper[0] && !kobj_usermode_filter(kobj)) { - char *argv [3]; + struct subprocess_info *info; - argv [0] = uevent_helper; - argv [1] = (char *)subsystem; - argv [2] = NULL; retval = add_uevent_var(env, "HOME=/"); if (retval) goto exit; @@ -307,9 +334,18 @@ int kobject_uevent_env(struct kobject *kobj, enum kobject_action action, "PATH=/sbin:/bin:/usr/sbin:/usr/bin"); if (retval) goto exit; + retval = init_uevent_argv(env, subsystem); + if (retval) + goto exit; - retval = call_usermodehelper(argv[0], argv, - env->envp, UMH_WAIT_EXEC); + retval = -ENOMEM; + info = call_usermodehelper_setup(env->argv[0], env->argv, + env->envp, GFP_KERNEL, + NULL, cleanup_uevent_env, env); + if (info) { + retval = call_usermodehelper_exec(info, UMH_NO_WAIT); + env = NULL; /* freed by cleanup_uevent_env */ + } } exit: diff --git a/lib/kstrtox.c b/lib/kstrtox.c index f78ae0c0c4e2..ec8da78df9be 100644 --- a/lib/kstrtox.c +++ b/lib/kstrtox.c @@ -92,7 +92,6 @@ static int _kstrtoull(const char *s, unsigned int base, unsigned long long *res) rv = _parse_integer(s, base, &_res); if (rv & KSTRTOX_OVERFLOW) return -ERANGE; - rv &= ~KSTRTOX_OVERFLOW; if (rv == 0) return -EINVAL; s += rv; diff --git a/lib/nlattr.c b/lib/nlattr.c index 18eca7809b08..fc6754720ced 100644 --- a/lib/nlattr.c +++ b/lib/nlattr.c @@ -303,9 +303,15 @@ int nla_memcmp(const struct nlattr *nla, const void *data, */ int nla_strcmp(const struct nlattr *nla, const char *str) { - int len = strlen(str) + 1; - int d = nla_len(nla) - len; + int len = strlen(str); + char *buf = nla_data(nla); + int attrlen = nla_len(nla); + int d; + if (attrlen > 0 && buf[attrlen - 1] == '\0') + attrlen--; + + d = attrlen - len; if (d == 0) d = memcmp(nla_data(nla), str, len); diff --git a/lib/parser.c b/lib/parser.c index 807b2aaa33fa..b6d11631231b 100644 --- a/lib/parser.c +++ b/lib/parser.c @@ -113,6 +113,7 @@ int match_token(char *s, const match_table_t table, substring_t args[]) return p->token; } +EXPORT_SYMBOL(match_token); /** * match_number: scan a number in the given base from a substring_t @@ -163,6 +164,7 @@ int match_int(substring_t *s, int *result) { return match_number(s, result, 0); } +EXPORT_SYMBOL(match_int); /** * match_octal: - scan an octal representation of an integer from a substring_t @@ -177,6 +179,7 @@ int match_octal(substring_t *s, int *result) { return match_number(s, result, 8); } +EXPORT_SYMBOL(match_octal); /** * match_hex: - scan a hex representation of an integer from a substring_t @@ -191,6 +194,58 @@ int match_hex(substring_t *s, int *result) { return match_number(s, result, 16); } +EXPORT_SYMBOL(match_hex); + +/** + * match_wildcard: - parse if a string matches given wildcard pattern + * @pattern: wildcard pattern + * @str: the string to be parsed + * + * Description: Parse the string @str to check if matches wildcard + * pattern @pattern. The pattern may contain two type wildcardes: + * '*' - matches zero or more characters + * '?' - matches one character + * If it's matched, return true, else return false. + */ +bool match_wildcard(const char *pattern, const char *str) +{ + const char *s = str; + const char *p = pattern; + bool star = false; + + while (*s) { + switch (*p) { + case '?': + s++; + p++; + break; + case '*': + star = true; + str = s; + if (!*++p) + return true; + pattern = p; + break; + default: + if (*s == *p) { + s++; + p++; + } else { + if (!star) + return false; + str++; + s = str; + p = pattern; + } + break; + } + } + + if (*p == '*') + ++p; + return !*p; +} +EXPORT_SYMBOL(match_wildcard); /** * match_strlcpy: - Copy the characters from a substring_t to a sized buffer @@ -213,6 +268,7 @@ size_t match_strlcpy(char *dest, const substring_t *src, size_t size) } return ret; } +EXPORT_SYMBOL(match_strlcpy); /** * match_strdup: - allocate a new string with the contents of a substring_t @@ -230,10 +286,4 @@ char *match_strdup(const substring_t *s) match_strlcpy(p, s, sz); return p; } - -EXPORT_SYMBOL(match_token); -EXPORT_SYMBOL(match_int); -EXPORT_SYMBOL(match_octal); -EXPORT_SYMBOL(match_hex); -EXPORT_SYMBOL(match_strlcpy); EXPORT_SYMBOL(match_strdup); diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c index 1a53d497a8c5..963b7034a51b 100644 --- a/lib/percpu-refcount.c +++ b/lib/percpu-refcount.c @@ -120,6 +120,9 @@ static void percpu_ref_kill_rcu(struct rcu_head *rcu) atomic_add((int) count - PCPU_COUNT_BIAS, &ref->count); + WARN_ONCE(atomic_read(&ref->count) <= 0, "percpu ref <= 0 (%i)", + atomic_read(&ref->count)); + /* @ref is viewed as dead on all CPUs, send out kill confirmation */ if (ref->confirm_kill) ref->confirm_kill(ref); diff --git a/lib/percpu_counter.c b/lib/percpu_counter.c index 8280a5dd1727..7dd33577b905 100644 --- a/lib/percpu_counter.c +++ b/lib/percpu_counter.c @@ -169,7 +169,7 @@ static int percpu_counter_hotcpu_callback(struct notifier_block *nb, struct percpu_counter *fbc; compute_batch_value(); - if (action != CPU_DEAD) + if (action != CPU_DEAD && action != CPU_DEAD_FROZEN) return NOTIFY_OK; cpu = (unsigned long)hcpu; diff --git a/lib/percpu_ida.c b/lib/percpu_ida.c index 9d054bf91d0f..93d145e5539c 100644 --- a/lib/percpu_ida.c +++ b/lib/percpu_ida.c @@ -54,9 +54,7 @@ static inline void move_tags(unsigned *dst, unsigned *dst_nr, /* * Try to steal tags from a remote cpu's percpu freelist. * - * We first check how many percpu freelists have tags - we don't steal tags - * unless enough percpu freelists have tags on them that it's possible more than - * half the total tags could be stuck on remote percpu freelists. + * We first check how many percpu freelists have tags * * Then we iterate through the cpus until we find some tags - we don't attempt * to find the "best" cpu to steal from, to keep cacheline bouncing to a @@ -69,8 +67,7 @@ static inline void steal_tags(struct percpu_ida *pool, struct percpu_ida_cpu *remote; for (cpus_have_tags = cpumask_weight(&pool->cpus_have_tags); - cpus_have_tags * pool->percpu_max_size > pool->nr_tags / 2; - cpus_have_tags--) { + cpus_have_tags; cpus_have_tags--) { cpu = cpumask_next(cpu, &pool->cpus_have_tags); if (cpu >= nr_cpu_ids) { @@ -132,22 +129,22 @@ static inline unsigned alloc_local_tag(struct percpu_ida_cpu *tags) /** * percpu_ida_alloc - allocate a tag * @pool: pool to allocate from - * @gfp: gfp flags + * @state: task state for prepare_to_wait * * Returns a tag - an integer in the range [0..nr_tags) (passed to * tag_pool_init()), or otherwise -ENOSPC on allocation failure. * * Safe to be called from interrupt context (assuming it isn't passed - * __GFP_WAIT, of course). + * TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE, of course). * * @gfp indicates whether or not to wait until a free id is available (it's not * used for internal memory allocations); thus if passed __GFP_WAIT we may sleep * however long it takes until another thread frees an id (same semantics as a * mempool). * - * Will not fail if passed __GFP_WAIT. + * Will not fail if passed TASK_UNINTERRUPTIBLE | TASK_INTERRUPTIBLE. */ -int percpu_ida_alloc(struct percpu_ida *pool, gfp_t gfp) +int percpu_ida_alloc(struct percpu_ida *pool, int state) { DEFINE_WAIT(wait); struct percpu_ida_cpu *tags; @@ -174,7 +171,8 @@ int percpu_ida_alloc(struct percpu_ida *pool, gfp_t gfp) * * global lock held and irqs disabled, don't need percpu lock */ - prepare_to_wait(&pool->wait, &wait, TASK_UNINTERRUPTIBLE); + if (state != TASK_RUNNING) + prepare_to_wait(&pool->wait, &wait, state); if (!tags->nr_free) alloc_global_tags(pool, tags); @@ -191,16 +189,22 @@ int percpu_ida_alloc(struct percpu_ida *pool, gfp_t gfp) spin_unlock(&pool->lock); local_irq_restore(flags); - if (tag >= 0 || !(gfp & __GFP_WAIT)) + if (tag >= 0 || state == TASK_RUNNING) break; + if (signal_pending_state(state, current)) { + tag = -ERESTARTSYS; + break; + } + schedule(); local_irq_save(flags); tags = this_cpu_ptr(pool->tag_cpu); } + if (state != TASK_RUNNING) + finish_wait(&pool->wait, &wait); - finish_wait(&pool->wait, &wait); return tag; } EXPORT_SYMBOL_GPL(percpu_ida_alloc); diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 7811ed3b4e70..9599aa72d7a0 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -35,33 +35,6 @@ #include <linux/hardirq.h> /* in_interrupt() */ -#ifdef __KERNEL__ -#define RADIX_TREE_MAP_SHIFT (CONFIG_BASE_SMALL ? 4 : 6) -#else -#define RADIX_TREE_MAP_SHIFT 3 /* For more stressful testing */ -#endif - -#define RADIX_TREE_MAP_SIZE (1UL << RADIX_TREE_MAP_SHIFT) -#define RADIX_TREE_MAP_MASK (RADIX_TREE_MAP_SIZE-1) - -#define RADIX_TREE_TAG_LONGS \ - ((RADIX_TREE_MAP_SIZE + BITS_PER_LONG - 1) / BITS_PER_LONG) - -struct radix_tree_node { - unsigned int height; /* Height from the bottom */ - unsigned int count; - union { - struct radix_tree_node *parent; /* Used when ascending tree */ - struct rcu_head rcu_head; /* Used when freeing node */ - }; - void __rcu *slots[RADIX_TREE_MAP_SIZE]; - unsigned long tags[RADIX_TREE_MAX_TAGS][RADIX_TREE_TAG_LONGS]; -}; - -#define RADIX_TREE_INDEX_BITS (8 /* CHAR_BIT */ * sizeof(unsigned long)) -#define RADIX_TREE_MAX_PATH (DIV_ROUND_UP(RADIX_TREE_INDEX_BITS, \ - RADIX_TREE_MAP_SHIFT)) - /* * The height_to_maxindex array needs to be one deeper than the maximum * path as height 0 holds only 1 entry. @@ -369,7 +342,8 @@ static int radix_tree_extend(struct radix_tree_root *root, unsigned long index) /* Increase the height. */ newheight = root->height+1; - node->height = newheight; + BUG_ON(newheight & ~RADIX_TREE_HEIGHT_MASK); + node->path = newheight; node->count = 1; node->parent = NULL; slot = root->rnode; @@ -387,23 +361,28 @@ out: } /** - * radix_tree_insert - insert into a radix tree + * __radix_tree_create - create a slot in a radix tree * @root: radix tree root * @index: index key - * @item: item to insert + * @nodep: returns node + * @slotp: returns slot * - * Insert an item into the radix tree at position @index. + * Create, if necessary, and return the node and slot for an item + * at position @index in the radix tree @root. + * + * Until there is more than one item in the tree, no nodes are + * allocated and @root->rnode is used as a direct slot instead of + * pointing to a node, in which case *@nodep will be NULL. + * + * Returns -ENOMEM, or 0 for success. */ -int radix_tree_insert(struct radix_tree_root *root, - unsigned long index, void *item) +int __radix_tree_create(struct radix_tree_root *root, unsigned long index, + struct radix_tree_node **nodep, void ***slotp) { struct radix_tree_node *node = NULL, *slot; - unsigned int height, shift; - int offset; + unsigned int height, shift, offset; int error; - BUG_ON(radix_tree_is_indirect_ptr(item)); - /* Make sure the tree is high enough. */ if (index > radix_tree_maxindex(root->height)) { error = radix_tree_extend(root, index); @@ -422,11 +401,12 @@ int radix_tree_insert(struct radix_tree_root *root, /* Have to add a child node. */ if (!(slot = radix_tree_node_alloc(root))) return -ENOMEM; - slot->height = height; + slot->path = height; slot->parent = node; if (node) { rcu_assign_pointer(node->slots[offset], slot); node->count++; + slot->path |= offset << RADIX_TREE_HEIGHT_SHIFT; } else rcu_assign_pointer(root->rnode, ptr_to_indirect(slot)); } @@ -439,16 +419,42 @@ int radix_tree_insert(struct radix_tree_root *root, height--; } - if (slot != NULL) + if (nodep) + *nodep = node; + if (slotp) + *slotp = node ? node->slots + offset : (void **)&root->rnode; + return 0; +} + +/** + * radix_tree_insert - insert into a radix tree + * @root: radix tree root + * @index: index key + * @item: item to insert + * + * Insert an item into the radix tree at position @index. + */ +int radix_tree_insert(struct radix_tree_root *root, + unsigned long index, void *item) +{ + struct radix_tree_node *node; + void **slot; + int error; + + BUG_ON(radix_tree_is_indirect_ptr(item)); + + error = __radix_tree_create(root, index, &node, &slot); + if (error) + return error; + if (*slot != NULL) return -EEXIST; + rcu_assign_pointer(*slot, item); if (node) { node->count++; - rcu_assign_pointer(node->slots[offset], item); - BUG_ON(tag_get(node, 0, offset)); - BUG_ON(tag_get(node, 1, offset)); + BUG_ON(tag_get(node, 0, index & RADIX_TREE_MAP_MASK)); + BUG_ON(tag_get(node, 1, index & RADIX_TREE_MAP_MASK)); } else { - rcu_assign_pointer(root->rnode, item); BUG_ON(root_tag_get(root, 0)); BUG_ON(root_tag_get(root, 1)); } @@ -457,15 +463,26 @@ int radix_tree_insert(struct radix_tree_root *root, } EXPORT_SYMBOL(radix_tree_insert); -/* - * is_slot == 1 : search for the slot. - * is_slot == 0 : search for the node. +/** + * __radix_tree_lookup - lookup an item in a radix tree + * @root: radix tree root + * @index: index key + * @nodep: returns node + * @slotp: returns slot + * + * Lookup and return the item at position @index in the radix + * tree @root. + * + * Until there is more than one item in the tree, no nodes are + * allocated and @root->rnode is used as a direct slot instead of + * pointing to a node, in which case *@nodep will be NULL. */ -static void *radix_tree_lookup_element(struct radix_tree_root *root, - unsigned long index, int is_slot) +void *__radix_tree_lookup(struct radix_tree_root *root, unsigned long index, + struct radix_tree_node **nodep, void ***slotp) { + struct radix_tree_node *node, *parent; unsigned int height, shift; - struct radix_tree_node *node, **slot; + void **slot; node = rcu_dereference_raw(root->rnode); if (node == NULL) @@ -474,19 +491,24 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root, if (!radix_tree_is_indirect_ptr(node)) { if (index > 0) return NULL; - return is_slot ? (void *)&root->rnode : node; + + if (nodep) + *nodep = NULL; + if (slotp) + *slotp = (void **)&root->rnode; + return node; } node = indirect_to_ptr(node); - height = node->height; + height = node->path & RADIX_TREE_HEIGHT_MASK; if (index > radix_tree_maxindex(height)) return NULL; shift = (height-1) * RADIX_TREE_MAP_SHIFT; do { - slot = (struct radix_tree_node **) - (node->slots + ((index>>shift) & RADIX_TREE_MAP_MASK)); + parent = node; + slot = node->slots + ((index >> shift) & RADIX_TREE_MAP_MASK); node = rcu_dereference_raw(*slot); if (node == NULL) return NULL; @@ -495,7 +517,11 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root, height--; } while (height > 0); - return is_slot ? (void *)slot : indirect_to_ptr(node); + if (nodep) + *nodep = parent; + if (slotp) + *slotp = slot; + return node; } /** @@ -513,7 +539,11 @@ static void *radix_tree_lookup_element(struct radix_tree_root *root, */ void **radix_tree_lookup_slot(struct radix_tree_root *root, unsigned long index) { - return (void **)radix_tree_lookup_element(root, index, 1); + void **slot; + + if (!__radix_tree_lookup(root, index, NULL, &slot)) + return NULL; + return slot; } EXPORT_SYMBOL(radix_tree_lookup_slot); @@ -531,7 +561,7 @@ EXPORT_SYMBOL(radix_tree_lookup_slot); */ void *radix_tree_lookup(struct radix_tree_root *root, unsigned long index) { - return radix_tree_lookup_element(root, index, 0); + return __radix_tree_lookup(root, index, NULL, NULL); } EXPORT_SYMBOL(radix_tree_lookup); @@ -676,7 +706,7 @@ int radix_tree_tag_get(struct radix_tree_root *root, return (index == 0); node = indirect_to_ptr(node); - height = node->height; + height = node->path & RADIX_TREE_HEIGHT_MASK; if (index > radix_tree_maxindex(height)) return 0; @@ -713,7 +743,7 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, { unsigned shift, tag = flags & RADIX_TREE_ITER_TAG_MASK; struct radix_tree_node *rnode, *node; - unsigned long index, offset; + unsigned long index, offset, height; if ((flags & RADIX_TREE_ITER_TAGGED) && !root_tag_get(root, tag)) return NULL; @@ -744,7 +774,8 @@ void **radix_tree_next_chunk(struct radix_tree_root *root, return NULL; restart: - shift = (rnode->height - 1) * RADIX_TREE_MAP_SHIFT; + height = rnode->path & RADIX_TREE_HEIGHT_MASK; + shift = (height - 1) * RADIX_TREE_MAP_SHIFT; offset = index >> shift; /* Index outside of the tree */ @@ -946,81 +977,6 @@ next: } EXPORT_SYMBOL(radix_tree_range_tag_if_tagged); - -/** - * radix_tree_next_hole - find the next hole (not-present entry) - * @root: tree root - * @index: index key - * @max_scan: maximum range to search - * - * Search the set [index, min(index+max_scan-1, MAX_INDEX)] for the lowest - * indexed hole. - * - * Returns: the index of the hole if found, otherwise returns an index - * outside of the set specified (in which case 'return - index >= max_scan' - * will be true). In rare cases of index wrap-around, 0 will be returned. - * - * radix_tree_next_hole may be called under rcu_read_lock. However, like - * radix_tree_gang_lookup, this will not atomically search a snapshot of - * the tree at a single point in time. For example, if a hole is created - * at index 5, then subsequently a hole is created at index 10, - * radix_tree_next_hole covering both indexes may return 10 if called - * under rcu_read_lock. - */ -unsigned long radix_tree_next_hole(struct radix_tree_root *root, - unsigned long index, unsigned long max_scan) -{ - unsigned long i; - - for (i = 0; i < max_scan; i++) { - if (!radix_tree_lookup(root, index)) - break; - index++; - if (index == 0) - break; - } - - return index; -} -EXPORT_SYMBOL(radix_tree_next_hole); - -/** - * radix_tree_prev_hole - find the prev hole (not-present entry) - * @root: tree root - * @index: index key - * @max_scan: maximum range to search - * - * Search backwards in the range [max(index-max_scan+1, 0), index] - * for the first hole. - * - * Returns: the index of the hole if found, otherwise returns an index - * outside of the set specified (in which case 'index - return >= max_scan' - * will be true). In rare cases of wrap-around, ULONG_MAX will be returned. - * - * radix_tree_next_hole may be called under rcu_read_lock. However, like - * radix_tree_gang_lookup, this will not atomically search a snapshot of - * the tree at a single point in time. For example, if a hole is created - * at index 10, then subsequently a hole is created at index 5, - * radix_tree_prev_hole covering both indexes may return 5 if called under - * rcu_read_lock. - */ -unsigned long radix_tree_prev_hole(struct radix_tree_root *root, - unsigned long index, unsigned long max_scan) -{ - unsigned long i; - - for (i = 0; i < max_scan; i++) { - if (!radix_tree_lookup(root, index)) - break; - index--; - if (index == ULONG_MAX) - break; - } - - return index; -} -EXPORT_SYMBOL(radix_tree_prev_hole); - /** * radix_tree_gang_lookup - perform multiple lookup on a radix tree * @root: radix tree root @@ -1189,7 +1145,7 @@ static unsigned long __locate(struct radix_tree_node *slot, void *item, unsigned int shift, height; unsigned long i; - height = slot->height; + height = slot->path & RADIX_TREE_HEIGHT_MASK; shift = (height-1) * RADIX_TREE_MAP_SHIFT; for ( ; height > 1; height--) { @@ -1252,9 +1208,12 @@ unsigned long radix_tree_locate_item(struct radix_tree_root *root, void *item) } node = indirect_to_ptr(node); - max_index = radix_tree_maxindex(node->height); - if (cur_index > max_index) + max_index = radix_tree_maxindex(node->path & + RADIX_TREE_HEIGHT_MASK); + if (cur_index > max_index) { + rcu_read_unlock(); break; + } cur_index = __locate(node, item, cur_index, &found_index); rcu_read_unlock(); @@ -1335,48 +1294,90 @@ static inline void radix_tree_shrink(struct radix_tree_root *root) } /** - * radix_tree_delete - delete an item from a radix tree + * __radix_tree_delete_node - try to free node after clearing a slot * @root: radix tree root * @index: index key + * @node: node containing @index * - * Remove the item at @index from the radix tree rooted at @root. + * After clearing the slot at @index in @node from radix tree + * rooted at @root, call this function to attempt freeing the + * node and shrinking the tree. * - * Returns the address of the deleted item, or NULL if it was not present. + * Returns %true if @node was freed, %false otherwise. */ -void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) +bool __radix_tree_delete_node(struct radix_tree_root *root, + struct radix_tree_node *node) { - struct radix_tree_node *node = NULL; - struct radix_tree_node *slot = NULL; - struct radix_tree_node *to_free; - unsigned int height, shift; + bool deleted = false; + + do { + struct radix_tree_node *parent; + + if (node->count) { + if (node == indirect_to_ptr(root->rnode)) { + radix_tree_shrink(root); + if (root->height == 0) + deleted = true; + } + return deleted; + } + + parent = node->parent; + if (parent) { + unsigned int offset; + + offset = node->path >> RADIX_TREE_HEIGHT_SHIFT; + parent->slots[offset] = NULL; + parent->count--; + } else { + root_tag_clear_all(root); + root->height = 0; + root->rnode = NULL; + } + + radix_tree_node_free(node); + deleted = true; + + node = parent; + } while (node); + + return deleted; +} + +/** + * radix_tree_delete_item - delete an item from a radix tree + * @root: radix tree root + * @index: index key + * @item: expected item + * + * Remove @item at @index from the radix tree rooted at @root. + * + * Returns the address of the deleted item, or NULL if it was not present + * or the entry at the given @index was not @item. + */ +void *radix_tree_delete_item(struct radix_tree_root *root, + unsigned long index, void *item) +{ + struct radix_tree_node *node; + unsigned int offset; + void **slot; + void *entry; int tag; - int uninitialized_var(offset); - height = root->height; - if (index > radix_tree_maxindex(height)) - goto out; + entry = __radix_tree_lookup(root, index, &node, &slot); + if (!entry) + return NULL; - slot = root->rnode; - if (height == 0) { + if (item && entry != item) + return NULL; + + if (!node) { root_tag_clear_all(root); root->rnode = NULL; - goto out; + return entry; } - slot = indirect_to_ptr(slot); - shift = height * RADIX_TREE_MAP_SHIFT; - do { - if (slot == NULL) - goto out; - - shift -= RADIX_TREE_MAP_SHIFT; - offset = (index >> shift) & RADIX_TREE_MAP_MASK; - node = slot; - slot = slot->slots[offset]; - } while (shift); - - if (slot == NULL) - goto out; + offset = index & RADIX_TREE_MAP_MASK; /* * Clear all tags associated with the item to be deleted. @@ -1387,40 +1388,27 @@ void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) radix_tree_tag_clear(root, index, tag); } - to_free = NULL; - /* Now free the nodes we do not need anymore */ - while (node) { - node->slots[offset] = NULL; - node->count--; - /* - * Queue the node for deferred freeing after the - * last reference to it disappears (set NULL, above). - */ - if (to_free) - radix_tree_node_free(to_free); - - if (node->count) { - if (node == indirect_to_ptr(root->rnode)) - radix_tree_shrink(root); - goto out; - } - - /* Node with zero slots in use so free it */ - to_free = node; + node->slots[offset] = NULL; + node->count--; - index >>= RADIX_TREE_MAP_SHIFT; - offset = index & RADIX_TREE_MAP_MASK; - node = node->parent; - } + __radix_tree_delete_node(root, node); - root_tag_clear_all(root); - root->height = 0; - root->rnode = NULL; - if (to_free) - radix_tree_node_free(to_free); + return entry; +} +EXPORT_SYMBOL(radix_tree_delete_item); -out: - return slot; +/** + * radix_tree_delete - delete an item from a radix tree + * @root: radix tree root + * @index: index key + * + * Remove the item at @index from the radix tree rooted at @root. + * + * Returns the address of the deleted item, or NULL if it was not present. + */ +void *radix_tree_delete(struct radix_tree_root *root, unsigned long index) +{ + return radix_tree_delete_item(root, index, NULL); } EXPORT_SYMBOL(radix_tree_delete); @@ -1436,9 +1424,12 @@ int radix_tree_tagged(struct radix_tree_root *root, unsigned int tag) EXPORT_SYMBOL(radix_tree_tagged); static void -radix_tree_node_ctor(void *node) +radix_tree_node_ctor(void *arg) { - memset(node, 0, sizeof(struct radix_tree_node)); + struct radix_tree_node *node = arg; + + memset(node, 0, sizeof(*node)); + INIT_LIST_HEAD(&node->private_list); } static __init unsigned long __maxindex(unsigned int height) diff --git a/lib/random32.c b/lib/random32.c index 1e5b2df44291..fa5da61ce7ad 100644 --- a/lib/random32.c +++ b/lib/random32.c @@ -1,37 +1,35 @@ /* - This is a maximally equidistributed combined Tausworthe generator - based on code from GNU Scientific Library 1.5 (30 Jun 2004) - - lfsr113 version: - - x_n = (s1_n ^ s2_n ^ s3_n ^ s4_n) - - s1_{n+1} = (((s1_n & 4294967294) << 18) ^ (((s1_n << 6) ^ s1_n) >> 13)) - s2_{n+1} = (((s2_n & 4294967288) << 2) ^ (((s2_n << 2) ^ s2_n) >> 27)) - s3_{n+1} = (((s3_n & 4294967280) << 7) ^ (((s3_n << 13) ^ s3_n) >> 21)) - s4_{n+1} = (((s4_n & 4294967168) << 13) ^ (((s4_n << 3) ^ s4_n) >> 12)) - - The period of this generator is about 2^113 (see erratum paper). - - From: P. L'Ecuyer, "Maximally Equidistributed Combined Tausworthe - Generators", Mathematics of Computation, 65, 213 (1996), 203--213: - http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme.ps - ftp://ftp.iro.umontreal.ca/pub/simulation/lecuyer/papers/tausme.ps - - There is an erratum in the paper "Tables of Maximally - Equidistributed Combined LFSR Generators", Mathematics of - Computation, 68, 225 (1999), 261--269: - http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme2.ps - - ... the k_j most significant bits of z_j must be non- - zero, for each j. (Note: this restriction also applies to the - computer code given in [4], but was mistakenly not mentioned in - that paper.) - - This affects the seeding procedure by imposing the requirement - s1 > 1, s2 > 7, s3 > 15, s4 > 127. - -*/ + * This is a maximally equidistributed combined Tausworthe generator + * based on code from GNU Scientific Library 1.5 (30 Jun 2004) + * + * lfsr113 version: + * + * x_n = (s1_n ^ s2_n ^ s3_n ^ s4_n) + * + * s1_{n+1} = (((s1_n & 4294967294) << 18) ^ (((s1_n << 6) ^ s1_n) >> 13)) + * s2_{n+1} = (((s2_n & 4294967288) << 2) ^ (((s2_n << 2) ^ s2_n) >> 27)) + * s3_{n+1} = (((s3_n & 4294967280) << 7) ^ (((s3_n << 13) ^ s3_n) >> 21)) + * s4_{n+1} = (((s4_n & 4294967168) << 13) ^ (((s4_n << 3) ^ s4_n) >> 12)) + * + * The period of this generator is about 2^113 (see erratum paper). + * + * From: P. L'Ecuyer, "Maximally Equidistributed Combined Tausworthe + * Generators", Mathematics of Computation, 65, 213 (1996), 203--213: + * http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme.ps + * ftp://ftp.iro.umontreal.ca/pub/simulation/lecuyer/papers/tausme.ps + * + * There is an erratum in the paper "Tables of Maximally Equidistributed + * Combined LFSR Generators", Mathematics of Computation, 68, 225 (1999), + * 261--269: http://www.iro.umontreal.ca/~lecuyer/myftp/papers/tausme2.ps + * + * ... the k_j most significant bits of z_j must be non-zero, + * for each j. (Note: this restriction also applies to the + * computer code given in [4], but was mistakenly not mentioned + * in that paper.) + * + * This affects the seeding procedure by imposing the requirement + * s1 > 1, s2 > 7, s3 > 15, s4 > 127. + */ #include <linux/types.h> #include <linux/percpu.h> @@ -75,15 +73,17 @@ EXPORT_SYMBOL(prandom_u32_state); */ u32 prandom_u32(void) { - unsigned long r; struct rnd_state *state = &get_cpu_var(net_rand_state); - r = prandom_u32_state(state); + u32 res; + + res = prandom_u32_state(state); put_cpu_var(state); - return r; + + return res; } EXPORT_SYMBOL(prandom_u32); -/* +/** * prandom_bytes_state - get the requested number of pseudo-random bytes * * @state: pointer to state structure holding seeded state. @@ -204,6 +204,7 @@ static int __init prandom_init(void) prandom_seed_very_weak(state, (i + jiffies) ^ random_get_entropy()); prandom_warmup(state); } + return 0; } core_initcall(prandom_init); @@ -244,10 +245,22 @@ static void __prandom_reseed(bool late) static bool latch = false; static DEFINE_SPINLOCK(lock); + /* Asking for random bytes might result in bytes getting + * moved into the nonblocking pool and thus marking it + * as initialized. In this case we would double back into + * this function and attempt to do a late reseed. + * Ignore the pointless attempt to reseed again if we're + * already waiting for bytes when the nonblocking pool + * got initialized. + */ + /* only allow initial seeding (late == false) once */ - spin_lock_irqsave(&lock, flags); + if (!spin_trylock_irqsave(&lock, flags)) + return; + if (latch && !late) goto out; + latch = true; for_each_possible_cpu(i) { diff --git a/lib/rbtree_test.c b/lib/rbtree_test.c index 31dd4ccd3baa..8b3c9dc88262 100644 --- a/lib/rbtree_test.c +++ b/lib/rbtree_test.c @@ -8,8 +8,8 @@ #define CHECK_LOOPS 100 struct test_node { - struct rb_node rb; u32 key; + struct rb_node rb; /* following fields used for testing augmented rbtree functionality */ u32 val; @@ -114,6 +114,16 @@ static int black_path_count(struct rb_node *rb) return count; } +static void check_postorder_foreach(int nr_nodes) +{ + struct test_node *cur, *n; + int count = 0; + rbtree_postorder_for_each_entry_safe(cur, n, &root, rb) + count++; + + WARN_ON_ONCE(count != nr_nodes); +} + static void check_postorder(int nr_nodes) { struct rb_node *rb; @@ -148,6 +158,7 @@ static void check(int nr_nodes) WARN_ON_ONCE(count < (1 << black_path_count(rb_last(&root))) - 1); check_postorder(nr_nodes); + check_postorder_foreach(nr_nodes); } static void check_augmented(int nr_nodes) diff --git a/lib/reciprocal_div.c b/lib/reciprocal_div.c index 75510e94f7d0..464152410c51 100644 --- a/lib/reciprocal_div.c +++ b/lib/reciprocal_div.c @@ -1,11 +1,27 @@ +#include <linux/kernel.h> #include <asm/div64.h> #include <linux/reciprocal_div.h> #include <linux/export.h> -u32 reciprocal_value(u32 k) +/* + * For a description of the algorithm please have a look at + * include/linux/reciprocal_div.h + */ + +struct reciprocal_value reciprocal_value(u32 d) { - u64 val = (1LL << 32) + (k - 1); - do_div(val, k); - return (u32)val; + struct reciprocal_value R; + u64 m; + int l; + + l = fls(d - 1); + m = ((1ULL << 32) * ((1ULL << l) - d)); + do_div(m, d); + ++m; + R.m = (u32)m; + R.sh1 = min(l, 1); + R.sh2 = max(l - 1, 0); + + return R; } EXPORT_SYMBOL(reciprocal_value); diff --git a/lib/scatterlist.c b/lib/scatterlist.c index d16fa295ae1d..3a8e8e8fb2a5 100644 --- a/lib/scatterlist.c +++ b/lib/scatterlist.c @@ -495,7 +495,7 @@ static bool sg_miter_get_next_page(struct sg_mapping_iter *miter) * true if @miter contains the valid mapping. false if end of sg * list is reached. */ -static bool sg_miter_skip(struct sg_mapping_iter *miter, off_t offset) +bool sg_miter_skip(struct sg_mapping_iter *miter, off_t offset) { sg_miter_stop(miter); @@ -513,6 +513,7 @@ static bool sg_miter_skip(struct sg_mapping_iter *miter, off_t offset) return true; } +EXPORT_SYMBOL(sg_miter_skip); /** * sg_miter_next - proceed mapping iterator to the next mapping diff --git a/lib/show_mem.c b/lib/show_mem.c index 5847a4921b8e..09225796991a 100644 --- a/lib/show_mem.c +++ b/lib/show_mem.c @@ -17,9 +17,6 @@ void show_mem(unsigned int filter) printk("Mem-Info:\n"); show_free_areas(filter); - if (filter & SHOW_MEM_FILTER_PAGE_COUNT) - return; - for_each_online_pgdat(pgdat) { unsigned long flags; int zoneid; @@ -46,4 +43,7 @@ void show_mem(unsigned int filter) printk("%lu pages in pagetable cache\n", quicklist_total_size()); #endif +#ifdef CONFIG_MEMORY_FAILURE + printk("%lu pages hwpoisoned\n", atomic_long_read(&num_poisoned_pages)); +#endif } diff --git a/lib/smp_processor_id.c b/lib/smp_processor_id.c index 04abe53f12a1..1afec32de6f2 100644 --- a/lib/smp_processor_id.c +++ b/lib/smp_processor_id.c @@ -7,7 +7,8 @@ #include <linux/kallsyms.h> #include <linux/sched.h> -notrace unsigned int debug_smp_processor_id(void) +notrace static unsigned int check_preemption_disabled(const char *what1, + const char *what2) { int this_cpu = raw_smp_processor_id(); @@ -38,9 +39,9 @@ notrace unsigned int debug_smp_processor_id(void) if (!printk_ratelimit()) goto out_enable; - printk(KERN_ERR "BUG: using smp_processor_id() in preemptible [%08x] " - "code: %s/%d\n", - preempt_count() - 1, current->comm, current->pid); + printk(KERN_ERR "BUG: using %s%s() in preemptible [%08x] code: %s/%d\n", + what1, what2, preempt_count() - 1, current->comm, current->pid); + print_symbol("caller is %s\n", (long)__builtin_return_address(0)); dump_stack(); @@ -50,5 +51,14 @@ out: return this_cpu; } +notrace unsigned int debug_smp_processor_id(void) +{ + return check_preemption_disabled("smp_processor_id", ""); +} EXPORT_SYMBOL(debug_smp_processor_id); +notrace void __this_cpu_preempt_check(const char *op) +{ + check_preemption_disabled("__this_cpu_", op); +} +EXPORT_SYMBOL(__this_cpu_preempt_check); diff --git a/lib/string.c b/lib/string.c index e5878de4f101..9b1f9062a202 100644 --- a/lib/string.c +++ b/lib/string.c @@ -648,7 +648,7 @@ EXPORT_SYMBOL(memmove); * @count: The size of the area. */ #undef memcmp -int memcmp(const void *cs, const void *ct, size_t count) +__visible int memcmp(const void *cs, const void *ct, size_t count) { const unsigned char *su1, *su2; int res = 0; diff --git a/lib/swiotlb.c b/lib/swiotlb.c index e4399fa65ad6..b604b831f4d1 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -172,8 +172,9 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) /* * Get the overflow emergency buffer */ - v_overflow_buffer = alloc_bootmem_low_pages_nopanic( - PAGE_ALIGN(io_tlb_overflow)); + v_overflow_buffer = memblock_virt_alloc_low_nopanic( + PAGE_ALIGN(io_tlb_overflow), + PAGE_SIZE); if (!v_overflow_buffer) return -ENOMEM; @@ -184,11 +185,15 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) * to find contiguous free memory regions of size up to IO_TLB_SEGSIZE * between io_tlb_start and io_tlb_end. */ - io_tlb_list = alloc_bootmem_pages(PAGE_ALIGN(io_tlb_nslabs * sizeof(int))); + io_tlb_list = memblock_virt_alloc( + PAGE_ALIGN(io_tlb_nslabs * sizeof(int)), + PAGE_SIZE); for (i = 0; i < io_tlb_nslabs; i++) io_tlb_list[i] = IO_TLB_SEGSIZE - OFFSET(i, IO_TLB_SEGSIZE); io_tlb_index = 0; - io_tlb_orig_addr = alloc_bootmem_pages(PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t))); + io_tlb_orig_addr = memblock_virt_alloc( + PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t)), + PAGE_SIZE); if (verbose) swiotlb_print_info(); @@ -215,13 +220,13 @@ swiotlb_init(int verbose) bytes = io_tlb_nslabs << IO_TLB_SHIFT; /* Get IO TLB memory from the low pages */ - vstart = alloc_bootmem_low_pages_nopanic(PAGE_ALIGN(bytes)); + vstart = memblock_virt_alloc_low_nopanic(PAGE_ALIGN(bytes), PAGE_SIZE); if (vstart && !swiotlb_init_with_tbl(vstart, io_tlb_nslabs, verbose)) return; if (io_tlb_start) - free_bootmem(io_tlb_start, - PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); + memblock_free_early(io_tlb_start, + PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); pr_warn("Cannot allocate SWIOTLB buffer"); no_iotlb_memory = true; } @@ -357,14 +362,14 @@ void __init swiotlb_free(void) free_pages((unsigned long)phys_to_virt(io_tlb_start), get_order(io_tlb_nslabs << IO_TLB_SHIFT)); } else { - free_bootmem_late(io_tlb_overflow_buffer, - PAGE_ALIGN(io_tlb_overflow)); - free_bootmem_late(__pa(io_tlb_orig_addr), - PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t))); - free_bootmem_late(__pa(io_tlb_list), - PAGE_ALIGN(io_tlb_nslabs * sizeof(int))); - free_bootmem_late(io_tlb_start, - PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); + memblock_free_late(io_tlb_overflow_buffer, + PAGE_ALIGN(io_tlb_overflow)); + memblock_free_late(__pa(io_tlb_orig_addr), + PAGE_ALIGN(io_tlb_nslabs * sizeof(phys_addr_t))); + memblock_free_late(__pa(io_tlb_list), + PAGE_ALIGN(io_tlb_nslabs * sizeof(int))); + memblock_free_late(io_tlb_start, + PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); } io_tlb_nslabs = 0; } @@ -505,7 +510,8 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, not_found: spin_unlock_irqrestore(&io_tlb_lock, flags); - dev_warn(hwdev, "swiotlb buffer is full\n"); + if (printk_ratelimit()) + dev_warn(hwdev, "swiotlb buffer is full (sz: %zd bytes)\n", size); return SWIOTLB_MAP_ERROR; found: spin_unlock_irqrestore(&io_tlb_lock, flags); diff --git a/lib/syscall.c b/lib/syscall.c index 58710eefeac8..e30e03932480 100644 --- a/lib/syscall.c +++ b/lib/syscall.c @@ -72,4 +72,3 @@ int task_current_syscall(struct task_struct *target, long *callno, return 0; } -EXPORT_SYMBOL_GPL(task_current_syscall); diff --git a/lib/test_module.c b/lib/test_module.c new file mode 100644 index 000000000000..319b66f1ff61 --- /dev/null +++ b/lib/test_module.c @@ -0,0 +1,33 @@ +/* + * This module emits "Hello, world" on printk when loaded. + * + * It is designed to be used for basic evaluation of the module loading + * subsystem (for example when validating module signing/verification). It + * lacks any extra dependencies, and will not normally be loaded by the + * system unless explicitly requested by name. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/init.h> +#include <linux/module.h> +#include <linux/printk.h> + +static int __init test_module_init(void) +{ + pr_warn("Hello, world\n"); + + return 0; +} + +module_init(test_module_init); + +static void __exit test_module_exit(void) +{ + pr_warn("Goodbye\n"); +} + +module_exit(test_module_exit); + +MODULE_AUTHOR("Kees Cook <keescook@chromium.org>"); +MODULE_LICENSE("GPL"); diff --git a/lib/test_user_copy.c b/lib/test_user_copy.c new file mode 100644 index 000000000000..0ecef3e4690e --- /dev/null +++ b/lib/test_user_copy.c @@ -0,0 +1,110 @@ +/* + * Kernel module for testing copy_to/from_user infrastructure. + * + * Copyright 2013 Google Inc. All Rights Reserved + * + * Authors: + * Kees Cook <keescook@chromium.org> + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt + +#include <linux/mman.h> +#include <linux/module.h> +#include <linux/sched.h> +#include <linux/slab.h> +#include <linux/uaccess.h> +#include <linux/vmalloc.h> + +#define test(condition, msg) \ +({ \ + int cond = (condition); \ + if (cond) \ + pr_warn("%s\n", msg); \ + cond; \ +}) + +static int __init test_user_copy_init(void) +{ + int ret = 0; + char *kmem; + char __user *usermem; + char *bad_usermem; + unsigned long user_addr; + unsigned long value = 0x5A; + + kmem = kmalloc(PAGE_SIZE * 2, GFP_KERNEL); + if (!kmem) + return -ENOMEM; + + user_addr = vm_mmap(NULL, 0, PAGE_SIZE * 2, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_ANONYMOUS | MAP_PRIVATE, 0); + if (user_addr >= (unsigned long)(TASK_SIZE)) { + pr_warn("Failed to allocate user memory\n"); + kfree(kmem); + return -ENOMEM; + } + + usermem = (char __user *)user_addr; + bad_usermem = (char *)user_addr; + + /* Legitimate usage: none of these should fail. */ + ret |= test(copy_from_user(kmem, usermem, PAGE_SIZE), + "legitimate copy_from_user failed"); + ret |= test(copy_to_user(usermem, kmem, PAGE_SIZE), + "legitimate copy_to_user failed"); + ret |= test(get_user(value, (unsigned long __user *)usermem), + "legitimate get_user failed"); + ret |= test(put_user(value, (unsigned long __user *)usermem), + "legitimate put_user failed"); + + /* Invalid usage: none of these should succeed. */ + ret |= test(!copy_from_user(kmem, (char __user *)(kmem + PAGE_SIZE), + PAGE_SIZE), + "illegal all-kernel copy_from_user passed"); + ret |= test(!copy_from_user(bad_usermem, (char __user *)kmem, + PAGE_SIZE), + "illegal reversed copy_from_user passed"); + ret |= test(!copy_to_user((char __user *)kmem, kmem + PAGE_SIZE, + PAGE_SIZE), + "illegal all-kernel copy_to_user passed"); + ret |= test(!copy_to_user((char __user *)kmem, bad_usermem, + PAGE_SIZE), + "illegal reversed copy_to_user passed"); + ret |= test(!get_user(value, (unsigned long __user *)kmem), + "illegal get_user passed"); + ret |= test(!put_user(value, (unsigned long __user *)kmem), + "illegal put_user passed"); + + vm_munmap(user_addr, PAGE_SIZE * 2); + kfree(kmem); + + if (ret == 0) { + pr_info("tests passed.\n"); + return 0; + } + + return -EINVAL; +} + +module_init(test_user_copy_init); + +static void __exit test_user_copy_exit(void) +{ + pr_info("unloaded.\n"); +} + +module_exit(test_user_copy_exit); + +MODULE_AUTHOR("Kees Cook <keescook@chromium.org>"); +MODULE_LICENSE("GPL"); diff --git a/lib/vsprintf.c b/lib/vsprintf.c index 10909c571494..0648291cdafe 100644 --- a/lib/vsprintf.c +++ b/lib/vsprintf.c @@ -364,7 +364,6 @@ enum format_type { FORMAT_TYPE_SHORT, FORMAT_TYPE_UINT, FORMAT_TYPE_INT, - FORMAT_TYPE_NRCHARS, FORMAT_TYPE_SIZE_T, FORMAT_TYPE_PTRDIFF }; @@ -719,10 +718,15 @@ char *resource_string(char *buf, char *end, struct resource *res, specp = &mem_spec; decode = 0; } - p = number(p, pend, res->start, *specp); - if (res->start != res->end) { - *p++ = '-'; - p = number(p, pend, res->end, *specp); + if (decode && res->flags & IORESOURCE_UNSET) { + p = string(p, pend, "size ", str_spec); + p = number(p, pend, resource_size(res), *specp); + } else { + p = number(p, pend, res->start, *specp); + if (res->start != res->end) { + *p++ = '-'; + p = number(p, pend, res->end, *specp); + } } if (decode) { if (res->flags & IORESOURCE_MEM_64) @@ -1155,6 +1159,30 @@ char *netdev_feature_string(char *buf, char *end, const u8 *addr, return number(buf, end, *(const netdev_features_t *)addr, spec); } +static noinline_for_stack +char *address_val(char *buf, char *end, const void *addr, + struct printf_spec spec, const char *fmt) +{ + unsigned long long num; + + spec.flags |= SPECIAL | SMALL | ZEROPAD; + spec.base = 16; + + switch (fmt[1]) { + case 'd': + num = *(const dma_addr_t *)addr; + spec.field_width = sizeof(dma_addr_t) * 2 + 2; + break; + case 'p': + default: + num = *(const phys_addr_t *)addr; + spec.field_width = sizeof(phys_addr_t) * 2 + 2; + break; + } + + return number(buf, end, num, spec); +} + int kptr_restrict __read_mostly; /* @@ -1218,7 +1246,8 @@ int kptr_restrict __read_mostly; * N no separator * The maximum supported length is 64 bytes of the input. Consider * to use print_hex_dump() for the larger input. - * - 'a' For a phys_addr_t type and its derivative types (passed by reference) + * - 'a[pd]' For address types [p] phys_addr_t, [d] dma_addr_t and derivatives + * (default assumed to be phys_addr_t, passed by reference) * - 'd[234]' For a dentry name (optionally 2-4 last components) * - 'D[234]' Same as 'd' but for a struct file * @@ -1353,11 +1382,7 @@ char *pointer(const char *fmt, char *buf, char *end, void *ptr, } break; case 'a': - spec.flags |= SPECIAL | SMALL | ZEROPAD; - spec.field_width = sizeof(phys_addr_t) * 2 + 2; - spec.base = 16; - return number(buf, end, - (unsigned long long) *((phys_addr_t *)ptr), spec); + return address_val(buf, end, ptr, spec, fmt); case 'd': return dentry_name(buf, end, ptr, spec, fmt); case 'D': @@ -1512,10 +1537,6 @@ qualifier: return fmt - start; /* skip alnum */ - case 'n': - spec->type = FORMAT_TYPE_NRCHARS; - return ++fmt - start; - case '%': spec->type = FORMAT_TYPE_PERCENT_CHAR; return ++fmt - start; @@ -1538,6 +1559,15 @@ qualifier: case 'u': break; + case 'n': + /* + * Since %n poses a greater security risk than utility, treat + * it as an invalid format specifier. Warn about its use so + * that new instances don't get added. + */ + WARN_ONCE(1, "Please remove ignored %%n in '%s'\n", fmt); + /* Fall-through */ + default: spec->type = FORMAT_TYPE_INVALID; return fmt - start; @@ -1711,20 +1741,6 @@ int vsnprintf(char *buf, size_t size, const char *fmt, va_list args) ++str; break; - case FORMAT_TYPE_NRCHARS: { - /* - * Since %n poses a greater security risk than - * utility, ignore %n and skip its argument. - */ - void *skip_arg; - - WARN_ONCE(1, "Please remove ignored %%n in '%s'\n", - old_fmt); - - skip_arg = va_arg(args, void *); - break; - } - default: switch (spec.type) { case FORMAT_TYPE_LONG_LONG: @@ -1999,19 +2015,6 @@ do { \ fmt++; break; - case FORMAT_TYPE_NRCHARS: { - /* skip %n 's argument */ - u8 qualifier = spec.qualifier; - void *skip_arg; - if (qualifier == 'l') - skip_arg = va_arg(args, long *); - else if (_tolower(qualifier) == 'z') - skip_arg = va_arg(args, size_t *); - else - skip_arg = va_arg(args, int *); - break; - } - default: switch (spec.type) { @@ -2170,10 +2173,6 @@ int bstr_printf(char *buf, size_t size, const char *fmt, const u32 *bin_buf) ++str; break; - case FORMAT_TYPE_NRCHARS: - /* skip */ - break; - default: { unsigned long long num; |