diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Kconfig.debug | 22 | ||||
-rw-r--r-- | lib/Makefile | 4 | ||||
-rw-r--r-- | lib/bug.c | 15 | ||||
-rw-r--r-- | lib/cpumask.c | 52 | ||||
-rw-r--r-- | lib/crypto/blake2s-selftest.c | 25 | ||||
-rw-r--r-- | lib/find_bit.c | 9 | ||||
-rw-r--r-- | lib/group_cpus.c | 428 | ||||
-rw-r--r-- | lib/iov_iter.c | 299 | ||||
-rw-r--r-- | lib/mpi/mpicoder.c | 3 | ||||
-rw-r--r-- | lib/nmi_backtrace.c | 2 | ||||
-rw-r--r-- | lib/parser.c | 39 | ||||
-rw-r--r-- | lib/sbitmap.c | 102 | ||||
-rw-r--r-- | lib/string.c | 10 | ||||
-rw-r--r-- | lib/ubsan.c | 73 | ||||
-rw-r--r-- | lib/ubsan.h | 32 | ||||
-rw-r--r-- | lib/usercopy.c | 7 |
16 files changed, 993 insertions, 129 deletions
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 6a7ce60c8412..1dd4bd7dc271 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -389,6 +389,15 @@ config PAHOLE_HAS_BTF_TAG btf_decl_tag) or not. Currently only clang compiler implements these attributes, so make the config depend on CC_IS_CLANG. +config PAHOLE_HAS_LANG_EXCLUDE + def_bool PAHOLE_VERSION >= 124 + help + Support for the --lang_exclude flag which makes pahole exclude + compilation units from the supplied language. Used in Kbuild to + omit Rust CUs which are not supported in version 1.24 of pahole, + otherwise it would emit malformed kernel and module binaries when + using DEBUG_INFO_BTF_MODULES. + config DEBUG_INFO_BTF_MODULES def_bool y depends on DEBUG_INFO_BTF && MODULES && PAHOLE_HAS_SPLIT_BTF @@ -1553,6 +1562,17 @@ config TRACE_IRQFLAGS_NMI depends on TRACE_IRQFLAGS depends on TRACE_IRQFLAGS_NMI_SUPPORT +config NMI_CHECK_CPU + bool "Debugging for CPUs failing to respond to backtrace requests" + depends on DEBUG_KERNEL + depends on X86 + default n + help + Enables debug prints when a CPU fails to respond to a given + backtrace NMI. These prints provide some reasons why a CPU + might legitimately be failing to respond, for example, if it + is offline of if ignore_nmis is set. + config DEBUG_IRQFLAGS bool "Debug IRQ flag manipulation" help @@ -2895,6 +2915,4 @@ config RUST_BUILD_ASSERT_ALLOW endmenu # "Rust" -source "Documentation/Kconfig" - endmenu # Kernel hacking diff --git a/lib/Makefile b/lib/Makefile index b25a324fa326..a269af847e2e 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -348,9 +348,7 @@ quiet_cmd_build_OID_registry = GEN $@ clean-files += oid_registry_data.c obj-$(CONFIG_UCS2_STRING) += ucs2_string.o -ifneq ($(CONFIG_UBSAN_TRAP),y) obj-$(CONFIG_UBSAN) += ubsan.o -endif UBSAN_SANITIZE_ubsan.o := n KASAN_SANITIZE_ubsan.o := n @@ -361,6 +359,8 @@ obj-$(CONFIG_SBITMAP) += sbitmap.o obj-$(CONFIG_PARMAN) += parman.o +obj-y += group_cpus.o + # GCC library routines obj-$(CONFIG_GENERIC_LIB_ASHLDI3) += ashldi3.o obj-$(CONFIG_GENERIC_LIB_ASHRDI3) += ashrdi3.o diff --git a/lib/bug.c b/lib/bug.c index c223a2575b72..e0ff21989990 100644 --- a/lib/bug.c +++ b/lib/bug.c @@ -47,6 +47,7 @@ #include <linux/sched.h> #include <linux/rculist.h> #include <linux/ftrace.h> +#include <linux/context_tracking.h> extern struct bug_entry __start___bug_table[], __stop___bug_table[]; @@ -153,7 +154,7 @@ struct bug_entry *find_bug(unsigned long bugaddr) return module_find_bug(bugaddr); } -enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs) +static enum bug_trap_type __report_bug(unsigned long bugaddr, struct pt_regs *regs) { struct bug_entry *bug; const char *file; @@ -209,6 +210,18 @@ enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs) return BUG_TRAP_TYPE_BUG; } +enum bug_trap_type report_bug(unsigned long bugaddr, struct pt_regs *regs) +{ + enum bug_trap_type ret; + bool rcu = false; + + rcu = warn_rcu_enter(); + ret = __report_bug(bugaddr, regs); + warn_rcu_exit(rcu); + + return ret; +} + static void clear_once_table(struct bug_entry *start, struct bug_entry *end) { struct bug_entry *bug; diff --git a/lib/cpumask.c b/lib/cpumask.c index c7c392514fd3..e7258836b60b 100644 --- a/lib/cpumask.c +++ b/lib/cpumask.c @@ -110,15 +110,33 @@ void __init free_bootmem_cpumask_var(cpumask_var_t mask) #endif /** - * cpumask_local_spread - select the i'th cpu with local numa cpu's first + * cpumask_local_spread - select the i'th cpu based on NUMA distances * @i: index number * @node: local numa_node * - * This function selects an online CPU according to a numa aware policy; - * local cpus are returned first, followed by non-local ones, then it - * wraps around. + * Returns online CPU according to a numa aware policy; local cpus are returned + * first, followed by non-local ones, then it wraps around. * - * It's not very efficient, but useful for setup. + * For those who wants to enumerate all CPUs based on their NUMA distances, + * i.e. call this function in a loop, like: + * + * for (i = 0; i < num_online_cpus(); i++) { + * cpu = cpumask_local_spread(i, node); + * do_something(cpu); + * } + * + * There's a better alternative based on for_each()-like iterators: + * + * for_each_numa_hop_mask(mask, node) { + * for_each_cpu_andnot(cpu, mask, prev) + * do_something(cpu); + * prev = mask; + * } + * + * It's simpler and more verbose than above. Complexity of iterator-based + * enumeration is O(sched_domains_numa_levels * nr_cpu_ids), while + * cpumask_local_spread() when called for each cpu is + * O(sched_domains_numa_levels * nr_cpu_ids * log(nr_cpu_ids)). */ unsigned int cpumask_local_spread(unsigned int i, int node) { @@ -127,24 +145,12 @@ unsigned int cpumask_local_spread(unsigned int i, int node) /* Wrap: we always want a cpu. */ i %= num_online_cpus(); - if (node == NUMA_NO_NODE) { - cpu = cpumask_nth(i, cpu_online_mask); - if (cpu < nr_cpu_ids) - return cpu; - } else { - /* NUMA first. */ - cpu = cpumask_nth_and(i, cpu_online_mask, cpumask_of_node(node)); - if (cpu < nr_cpu_ids) - return cpu; - - i -= cpumask_weight_and(cpu_online_mask, cpumask_of_node(node)); - - /* Skip NUMA nodes, done above. */ - cpu = cpumask_nth_andnot(i, cpu_online_mask, cpumask_of_node(node)); - if (cpu < nr_cpu_ids) - return cpu; - } - BUG(); + cpu = (node == NUMA_NO_NODE) ? + cpumask_nth(i, cpu_online_mask) : + sched_numa_find_nth_cpu(cpu_online_mask, i, node); + + WARN_ON(cpu >= nr_cpu_ids); + return cpu; } EXPORT_SYMBOL(cpumask_local_spread); diff --git a/lib/crypto/blake2s-selftest.c b/lib/crypto/blake2s-selftest.c index 7d77dea15587..d0634ed6a937 100644 --- a/lib/crypto/blake2s-selftest.c +++ b/lib/crypto/blake2s-selftest.c @@ -545,7 +545,7 @@ static const u8 blake2s_testvecs[][BLAKE2S_HASH_SIZE] __initconst = { 0xd6, 0x98, 0x6b, 0x07, 0x10, 0x65, 0x52, 0x65, }, }; -bool __init blake2s_selftest(void) +static bool __init noinline_for_stack blake2s_digest_test(void) { u8 key[BLAKE2S_KEY_SIZE]; u8 buf[ARRAY_SIZE(blake2s_testvecs)]; @@ -589,11 +589,20 @@ bool __init blake2s_selftest(void) } } + return success; +} + +static bool __init noinline_for_stack blake2s_random_test(void) +{ + struct blake2s_state state; + bool success = true; + int i, l; + for (i = 0; i < 32; ++i) { enum { TEST_ALIGNMENT = 16 }; - u8 unaligned_block[BLAKE2S_BLOCK_SIZE + TEST_ALIGNMENT - 1] + u8 blocks[BLAKE2S_BLOCK_SIZE * 2 + TEST_ALIGNMENT - 1] __aligned(TEST_ALIGNMENT); - u8 blocks[BLAKE2S_BLOCK_SIZE * 2]; + u8 *unaligned_block = blocks + BLAKE2S_BLOCK_SIZE; struct blake2s_state state1, state2; get_random_bytes(blocks, sizeof(blocks)); @@ -630,3 +639,13 @@ bool __init blake2s_selftest(void) return success; } + +bool __init blake2s_selftest(void) +{ + bool success; + + success = blake2s_digest_test(); + success &= blake2s_random_test(); + + return success; +} diff --git a/lib/find_bit.c b/lib/find_bit.c index 18bc0a7ac8ee..c10920e66788 100644 --- a/lib/find_bit.c +++ b/lib/find_bit.c @@ -155,6 +155,15 @@ unsigned long __find_nth_andnot_bit(const unsigned long *addr1, const unsigned l } EXPORT_SYMBOL(__find_nth_andnot_bit); +unsigned long __find_nth_and_andnot_bit(const unsigned long *addr1, + const unsigned long *addr2, + const unsigned long *addr3, + unsigned long size, unsigned long n) +{ + return FIND_NTH_BIT(addr1[idx] & addr2[idx] & ~addr3[idx], size, n); +} +EXPORT_SYMBOL(__find_nth_and_andnot_bit); + #ifndef find_next_and_bit unsigned long _find_next_and_bit(const unsigned long *addr1, const unsigned long *addr2, unsigned long nbits, unsigned long start) diff --git a/lib/group_cpus.c b/lib/group_cpus.c new file mode 100644 index 000000000000..9c837a35fef7 --- /dev/null +++ b/lib/group_cpus.c @@ -0,0 +1,428 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Copyright (C) 2016 Thomas Gleixner. + * Copyright (C) 2016-2017 Christoph Hellwig. + */ +#include <linux/kernel.h> +#include <linux/slab.h> +#include <linux/cpu.h> +#include <linux/sort.h> +#include <linux/group_cpus.h> + +#ifdef CONFIG_SMP + +static void grp_spread_init_one(struct cpumask *irqmsk, struct cpumask *nmsk, + unsigned int cpus_per_grp) +{ + const struct cpumask *siblmsk; + int cpu, sibl; + + for ( ; cpus_per_grp > 0; ) { + cpu = cpumask_first(nmsk); + + /* Should not happen, but I'm too lazy to think about it */ + if (cpu >= nr_cpu_ids) + return; + + cpumask_clear_cpu(cpu, nmsk); + cpumask_set_cpu(cpu, irqmsk); + cpus_per_grp--; + + /* If the cpu has siblings, use them first */ + siblmsk = topology_sibling_cpumask(cpu); + for (sibl = -1; cpus_per_grp > 0; ) { + sibl = cpumask_next(sibl, siblmsk); + if (sibl >= nr_cpu_ids) + break; + if (!cpumask_test_and_clear_cpu(sibl, nmsk)) + continue; + cpumask_set_cpu(sibl, irqmsk); + cpus_per_grp--; + } + } +} + +static cpumask_var_t *alloc_node_to_cpumask(void) +{ + cpumask_var_t *masks; + int node; + + masks = kcalloc(nr_node_ids, sizeof(cpumask_var_t), GFP_KERNEL); + if (!masks) + return NULL; + + for (node = 0; node < nr_node_ids; node++) { + if (!zalloc_cpumask_var(&masks[node], GFP_KERNEL)) + goto out_unwind; + } + + return masks; + +out_unwind: + while (--node >= 0) + free_cpumask_var(masks[node]); + kfree(masks); + return NULL; +} + +static void free_node_to_cpumask(cpumask_var_t *masks) +{ + int node; + + for (node = 0; node < nr_node_ids; node++) + free_cpumask_var(masks[node]); + kfree(masks); +} + +static void build_node_to_cpumask(cpumask_var_t *masks) +{ + int cpu; + + for_each_possible_cpu(cpu) + cpumask_set_cpu(cpu, masks[cpu_to_node(cpu)]); +} + +static int get_nodes_in_cpumask(cpumask_var_t *node_to_cpumask, + const struct cpumask *mask, nodemask_t *nodemsk) +{ + int n, nodes = 0; + + /* Calculate the number of nodes in the supplied affinity mask */ + for_each_node(n) { + if (cpumask_intersects(mask, node_to_cpumask[n])) { + node_set(n, *nodemsk); + nodes++; + } + } + return nodes; +} + +struct node_groups { + unsigned id; + + union { + unsigned ngroups; + unsigned ncpus; + }; +}; + +static int ncpus_cmp_func(const void *l, const void *r) +{ + const struct node_groups *ln = l; + const struct node_groups *rn = r; + + return ln->ncpus - rn->ncpus; +} + +/* + * Allocate group number for each node, so that for each node: + * + * 1) the allocated number is >= 1 + * + * 2) the allocated number is <= active CPU number of this node + * + * The actual allocated total groups may be less than @numgrps when + * active total CPU number is less than @numgrps. + * + * Active CPUs means the CPUs in '@cpu_mask AND @node_to_cpumask[]' + * for each node. + */ +static void alloc_nodes_groups(unsigned int numgrps, + cpumask_var_t *node_to_cpumask, + const struct cpumask *cpu_mask, + const nodemask_t nodemsk, + struct cpumask *nmsk, + struct node_groups *node_groups) +{ + unsigned n, remaining_ncpus = 0; + + for (n = 0; n < nr_node_ids; n++) { + node_groups[n].id = n; + node_groups[n].ncpus = UINT_MAX; + } + + for_each_node_mask(n, nodemsk) { + unsigned ncpus; + + cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]); + ncpus = cpumask_weight(nmsk); + + if (!ncpus) + continue; + remaining_ncpus += ncpus; + node_groups[n].ncpus = ncpus; + } + + numgrps = min_t(unsigned, remaining_ncpus, numgrps); + + sort(node_groups, nr_node_ids, sizeof(node_groups[0]), + ncpus_cmp_func, NULL); + + /* + * Allocate groups for each node according to the ratio of this + * node's nr_cpus to remaining un-assigned ncpus. 'numgrps' is + * bigger than number of active numa nodes. Always start the + * allocation from the node with minimized nr_cpus. + * + * This way guarantees that each active node gets allocated at + * least one group, and the theory is simple: over-allocation + * is only done when this node is assigned by one group, so + * other nodes will be allocated >= 1 groups, since 'numgrps' is + * bigger than number of numa nodes. + * + * One perfect invariant is that number of allocated groups for + * each node is <= CPU count of this node: + * + * 1) suppose there are two nodes: A and B + * ncpu(X) is CPU count of node X + * grps(X) is the group count allocated to node X via this + * algorithm + * + * ncpu(A) <= ncpu(B) + * ncpu(A) + ncpu(B) = N + * grps(A) + grps(B) = G + * + * grps(A) = max(1, round_down(G * ncpu(A) / N)) + * grps(B) = G - grps(A) + * + * both N and G are integer, and 2 <= G <= N, suppose + * G = N - delta, and 0 <= delta <= N - 2 + * + * 2) obviously grps(A) <= ncpu(A) because: + * + * if grps(A) is 1, then grps(A) <= ncpu(A) given + * ncpu(A) >= 1 + * + * otherwise, + * grps(A) <= G * ncpu(A) / N <= ncpu(A), given G <= N + * + * 3) prove how grps(B) <= ncpu(B): + * + * if round_down(G * ncpu(A) / N) == 0, vecs(B) won't be + * over-allocated, so grps(B) <= ncpu(B), + * + * otherwise: + * + * grps(A) = + * round_down(G * ncpu(A) / N) = + * round_down((N - delta) * ncpu(A) / N) = + * round_down((N * ncpu(A) - delta * ncpu(A)) / N) >= + * round_down((N * ncpu(A) - delta * N) / N) = + * cpu(A) - delta + * + * then: + * + * grps(A) - G >= ncpu(A) - delta - G + * => + * G - grps(A) <= G + delta - ncpu(A) + * => + * grps(B) <= N - ncpu(A) + * => + * grps(B) <= cpu(B) + * + * For nodes >= 3, it can be thought as one node and another big + * node given that is exactly what this algorithm is implemented, + * and we always re-calculate 'remaining_ncpus' & 'numgrps', and + * finally for each node X: grps(X) <= ncpu(X). + * + */ + for (n = 0; n < nr_node_ids; n++) { + unsigned ngroups, ncpus; + + if (node_groups[n].ncpus == UINT_MAX) + continue; + + WARN_ON_ONCE(numgrps == 0); + + ncpus = node_groups[n].ncpus; + ngroups = max_t(unsigned, 1, + numgrps * ncpus / remaining_ncpus); + WARN_ON_ONCE(ngroups > ncpus); + + node_groups[n].ngroups = ngroups; + + remaining_ncpus -= ncpus; + numgrps -= ngroups; + } +} + +static int __group_cpus_evenly(unsigned int startgrp, unsigned int numgrps, + cpumask_var_t *node_to_cpumask, + const struct cpumask *cpu_mask, + struct cpumask *nmsk, struct cpumask *masks) +{ + unsigned int i, n, nodes, cpus_per_grp, extra_grps, done = 0; + unsigned int last_grp = numgrps; + unsigned int curgrp = startgrp; + nodemask_t nodemsk = NODE_MASK_NONE; + struct node_groups *node_groups; + + if (cpumask_empty(cpu_mask)) + return 0; + + nodes = get_nodes_in_cpumask(node_to_cpumask, cpu_mask, &nodemsk); + + /* + * If the number of nodes in the mask is greater than or equal the + * number of groups we just spread the groups across the nodes. + */ + if (numgrps <= nodes) { + for_each_node_mask(n, nodemsk) { + /* Ensure that only CPUs which are in both masks are set */ + cpumask_and(nmsk, cpu_mask, node_to_cpumask[n]); + cpumask_or(&masks[curgrp], &masks[curgrp], nmsk); + if (++curgrp == last_grp) + curgrp = 0; + } + return numgrps; + } + + node_groups = kcalloc(nr_node_ids, + sizeof(struct node_groups), + GFP_KERNEL); + if (!node_groups) + return -ENOMEM; + + /* allocate group number for each node */ + alloc_nodes_groups(numgrps, node_to_cpumask, cpu_mask, + nodemsk, nmsk, node_groups); + for (i = 0; i < nr_node_ids; i++) { + unsigned int ncpus, v; + struct node_groups *nv = &node_groups[i]; + + if (nv->ngroups == UINT_MAX) + continue; + + /* Get the cpus on this node which are in the mask */ + cpumask_and(nmsk, cpu_mask, node_to_cpumask[nv->id]); + ncpus = cpumask_weight(nmsk); + if (!ncpus) + continue; + + WARN_ON_ONCE(nv->ngroups > ncpus); + + /* Account for rounding errors */ + extra_grps = ncpus - nv->ngroups * (ncpus / nv->ngroups); + + /* Spread allocated groups on CPUs of the current node */ + for (v = 0; v < nv->ngroups; v++, curgrp++) { + cpus_per_grp = ncpus / nv->ngroups; + + /* Account for extra groups to compensate rounding errors */ + if (extra_grps) { + cpus_per_grp++; + --extra_grps; + } + + /* + * wrapping has to be considered given 'startgrp' + * may start anywhere + */ + if (curgrp >= last_grp) + curgrp = 0; + grp_spread_init_one(&masks[curgrp], nmsk, + cpus_per_grp); + } + done += nv->ngroups; + } + kfree(node_groups); + return done; +} + +/** + * group_cpus_evenly - Group all CPUs evenly per NUMA/CPU locality + * @numgrps: number of groups + * + * Return: cpumask array if successful, NULL otherwise. And each element + * includes CPUs assigned to this group + * + * Try to put close CPUs from viewpoint of CPU and NUMA locality into + * same group, and run two-stage grouping: + * 1) allocate present CPUs on these groups evenly first + * 2) allocate other possible CPUs on these groups evenly + * + * We guarantee in the resulted grouping that all CPUs are covered, and + * no same CPU is assigned to multiple groups + */ +struct cpumask *group_cpus_evenly(unsigned int numgrps) +{ + unsigned int curgrp = 0, nr_present = 0, nr_others = 0; + cpumask_var_t *node_to_cpumask; + cpumask_var_t nmsk, npresmsk; + int ret = -ENOMEM; + struct cpumask *masks = NULL; + + if (!zalloc_cpumask_var(&nmsk, GFP_KERNEL)) + return NULL; + + if (!zalloc_cpumask_var(&npresmsk, GFP_KERNEL)) + goto fail_nmsk; + + node_to_cpumask = alloc_node_to_cpumask(); + if (!node_to_cpumask) + goto fail_npresmsk; + + masks = kcalloc(numgrps, sizeof(*masks), GFP_KERNEL); + if (!masks) + goto fail_node_to_cpumask; + + /* Stabilize the cpumasks */ + cpus_read_lock(); + build_node_to_cpumask(node_to_cpumask); + + /* grouping present CPUs first */ + ret = __group_cpus_evenly(curgrp, numgrps, node_to_cpumask, + cpu_present_mask, nmsk, masks); + if (ret < 0) + goto fail_build_affinity; + nr_present = ret; + + /* + * Allocate non present CPUs starting from the next group to be + * handled. If the grouping of present CPUs already exhausted the + * group space, assign the non present CPUs to the already + * allocated out groups. + */ + if (nr_present >= numgrps) + curgrp = 0; + else + curgrp = nr_present; + cpumask_andnot(npresmsk, cpu_possible_mask, cpu_present_mask); + ret = __group_cpus_evenly(curgrp, numgrps, node_to_cpumask, + npresmsk, nmsk, masks); + if (ret >= 0) + nr_others = ret; + + fail_build_affinity: + cpus_read_unlock(); + + if (ret >= 0) + WARN_ON(nr_present + nr_others < numgrps); + + fail_node_to_cpumask: + free_node_to_cpumask(node_to_cpumask); + + fail_npresmsk: + free_cpumask_var(npresmsk); + + fail_nmsk: + free_cpumask_var(nmsk); + if (ret < 0) { + kfree(masks); + return NULL; + } + return masks; +} +#else /* CONFIG_SMP */ +struct cpumask *group_cpus_evenly(unsigned int numgrps) +{ + struct cpumask *masks = kcalloc(numgrps, sizeof(*masks), GFP_KERNEL); + + if (!masks) + return NULL; + + /* assign all CPUs(cpu 0) to the 1st group only */ + cpumask_copy(&masks[0], cpu_possible_mask); + return masks; +} +#endif /* CONFIG_SMP */ diff --git a/lib/iov_iter.c b/lib/iov_iter.c index f9a3ff37ecd1..274014e4eafe 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -186,12 +186,6 @@ static int copyin(void *to, const void __user *from, size_t n) return res; } -static inline struct pipe_buffer *pipe_buf(const struct pipe_inode_info *pipe, - unsigned int slot) -{ - return &pipe->bufs[slot & (pipe->ring_size - 1)]; -} - #ifdef PIPE_PARANOIA static bool sanity(const struct iov_iter *i) { @@ -1432,9 +1426,9 @@ static struct page *first_bvec_segment(const struct iov_iter *i, static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages, size_t maxsize, unsigned int maxpages, size_t *start, - unsigned int gup_flags) + iov_iter_extraction_t extraction_flags) { - unsigned int n; + unsigned int n, gup_flags = 0; if (maxsize > i->count) maxsize = i->count; @@ -1442,6 +1436,8 @@ static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i, return 0; if (maxsize > MAX_RW_COUNT) maxsize = MAX_RW_COUNT; + if (extraction_flags & ITER_ALLOW_P2PDMA) + gup_flags |= FOLL_PCI_P2PDMA; if (likely(user_backed_iter(i))) { unsigned long addr; @@ -1495,14 +1491,14 @@ static ssize_t __iov_iter_get_pages_alloc(struct iov_iter *i, ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages, size_t maxsize, unsigned maxpages, - size_t *start, unsigned gup_flags) + size_t *start, iov_iter_extraction_t extraction_flags) { if (!maxpages) return 0; BUG_ON(!pages); return __iov_iter_get_pages_alloc(i, &pages, maxsize, maxpages, - start, gup_flags); + start, extraction_flags); } EXPORT_SYMBOL_GPL(iov_iter_get_pages); @@ -1515,14 +1511,14 @@ EXPORT_SYMBOL(iov_iter_get_pages2); ssize_t iov_iter_get_pages_alloc(struct iov_iter *i, struct page ***pages, size_t maxsize, - size_t *start, unsigned gup_flags) + size_t *start, iov_iter_extraction_t extraction_flags) { ssize_t len; *pages = NULL; len = __iov_iter_get_pages_alloc(i, pages, maxsize, ~0U, start, - gup_flags); + extraction_flags); if (len <= 0) { kvfree(*pages); *pages = NULL; @@ -1877,6 +1873,17 @@ int import_single_range(int rw, void __user *buf, size_t len, } EXPORT_SYMBOL(import_single_range); +int import_ubuf(int rw, void __user *buf, size_t len, struct iov_iter *i) +{ + if (len > MAX_RW_COUNT) + len = MAX_RW_COUNT; + if (unlikely(!access_ok(buf, len))) + return -EFAULT; + + iov_iter_ubuf(i, rw, buf, len); + return 0; +} + /** * iov_iter_restore() - Restore a &struct iov_iter to the same state as when * iov_iter_save_state() was called. @@ -1891,8 +1898,8 @@ EXPORT_SYMBOL(import_single_range); */ void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state) { - if (WARN_ON_ONCE(!iov_iter_is_bvec(i) && !iter_is_iovec(i)) && - !iov_iter_is_kvec(i) && !iter_is_ubuf(i)) + if (WARN_ON_ONCE(!iov_iter_is_bvec(i) && !iter_is_iovec(i) && + !iter_is_ubuf(i)) && !iov_iter_is_kvec(i)) return; i->iov_offset = state->iov_offset; i->count = state->count; @@ -1914,3 +1921,267 @@ void iov_iter_restore(struct iov_iter *i, struct iov_iter_state *state) i->iov -= state->nr_segs - i->nr_segs; i->nr_segs = state->nr_segs; } + +/* + * Extract a list of contiguous pages from an ITER_XARRAY iterator. This does not + * get references on the pages, nor does it get a pin on them. + */ +static ssize_t iov_iter_extract_xarray_pages(struct iov_iter *i, + struct page ***pages, size_t maxsize, + unsigned int maxpages, + iov_iter_extraction_t extraction_flags, + size_t *offset0) +{ + struct page *page, **p; + unsigned int nr = 0, offset; + loff_t pos = i->xarray_start + i->iov_offset; + pgoff_t index = pos >> PAGE_SHIFT; + XA_STATE(xas, i->xarray, index); + + offset = pos & ~PAGE_MASK; + *offset0 = offset; + + maxpages = want_pages_array(pages, maxsize, offset, maxpages); + if (!maxpages) + return -ENOMEM; + p = *pages; + + rcu_read_lock(); + for (page = xas_load(&xas); page; page = xas_next(&xas)) { + if (xas_retry(&xas, page)) + continue; + + /* Has the page moved or been split? */ + if (unlikely(page != xas_reload(&xas))) { + xas_reset(&xas); + continue; + } + + p[nr++] = find_subpage(page, xas.xa_index); + if (nr == maxpages) + break; + } + rcu_read_unlock(); + + maxsize = min_t(size_t, nr * PAGE_SIZE - offset, maxsize); + iov_iter_advance(i, maxsize); + return maxsize; +} + +/* + * Extract a list of contiguous pages from an ITER_BVEC iterator. This does + * not get references on the pages, nor does it get a pin on them. + */ +static ssize_t iov_iter_extract_bvec_pages(struct iov_iter *i, + struct page ***pages, size_t maxsize, + unsigned int maxpages, + iov_iter_extraction_t extraction_flags, + size_t *offset0) +{ + struct page **p, *page; + size_t skip = i->iov_offset, offset; + int k; + + for (;;) { + if (i->nr_segs == 0) + return 0; + maxsize = min(maxsize, i->bvec->bv_len - skip); + if (maxsize) + break; + i->iov_offset = 0; + i->nr_segs--; + i->bvec++; + skip = 0; + } + + skip += i->bvec->bv_offset; + page = i->bvec->bv_page + skip / PAGE_SIZE; + offset = skip % PAGE_SIZE; + *offset0 = offset; + + maxpages = want_pages_array(pages, maxsize, offset, maxpages); + if (!maxpages) + return -ENOMEM; + p = *pages; + for (k = 0; k < maxpages; k++) + p[k] = page + k; + + maxsize = min_t(size_t, maxsize, maxpages * PAGE_SIZE - offset); + iov_iter_advance(i, maxsize); + return maxsize; +} + +/* + * Extract a list of virtually contiguous pages from an ITER_KVEC iterator. + * This does not get references on the pages, nor does it get a pin on them. + */ +static ssize_t iov_iter_extract_kvec_pages(struct iov_iter *i, + struct page ***pages, size_t maxsize, + unsigned int maxpages, + iov_iter_extraction_t extraction_flags, + size_t *offset0) +{ + struct page **p, *page; + const void *kaddr; + size_t skip = i->iov_offset, offset, len; + int k; + + for (;;) { + if (i->nr_segs == 0) + return 0; + maxsize = min(maxsize, i->kvec->iov_len - skip); + if (maxsize) + break; + i->iov_offset = 0; + i->nr_segs--; + i->kvec++; + skip = 0; + } + + kaddr = i->kvec->iov_base + skip; + offset = (unsigned long)kaddr & ~PAGE_MASK; + *offset0 = offset; + + maxpages = want_pages_array(pages, maxsize, offset, maxpages); + if (!maxpages) + return -ENOMEM; + p = *pages; + + kaddr -= offset; + len = offset + maxsize; + for (k = 0; k < maxpages; k++) { + size_t seg = min_t(size_t, len, PAGE_SIZE); + + if (is_vmalloc_or_module_addr(kaddr)) + page = vmalloc_to_page(kaddr); + else + page = virt_to_page(kaddr); + + p[k] = page; + len -= seg; + kaddr += PAGE_SIZE; + } + + maxsize = min_t(size_t, maxsize, maxpages * PAGE_SIZE - offset); + iov_iter_advance(i, maxsize); + return maxsize; +} + +/* + * Extract a list of contiguous pages from a user iterator and get a pin on + * each of them. This should only be used if the iterator is user-backed + * (IOBUF/UBUF). + * + * It does not get refs on the pages, but the pages must be unpinned by the + * caller once the transfer is complete. + * + * This is safe to be used where background IO/DMA *is* going to be modifying + * the buffer; using a pin rather than a ref makes forces fork() to give the + * child a copy of the page. + */ +static ssize_t iov_iter_extract_user_pages(struct iov_iter *i, + struct page ***pages, + size_t maxsize, + unsigned int maxpages, + iov_iter_extraction_t extraction_flags, + size_t *offset0) +{ + unsigned long addr; + unsigned int gup_flags = 0; + size_t offset; + int res; + + if (i->data_source == ITER_DEST) + gup_flags |= FOLL_WRITE; + if (extraction_flags & ITER_ALLOW_P2PDMA) + gup_flags |= FOLL_PCI_P2PDMA; + if (i->nofault) + gup_flags |= FOLL_NOFAULT; + + addr = first_iovec_segment(i, &maxsize); + *offset0 = offset = addr % PAGE_SIZE; + addr &= PAGE_MASK; + maxpages = want_pages_array(pages, maxsize, offset, maxpages); + if (!maxpages) + return -ENOMEM; + res = pin_user_pages_fast(addr, maxpages, gup_flags, *pages); + if (unlikely(res <= 0)) + return res; + maxsize = min_t(size_t, maxsize, res * PAGE_SIZE - offset); + iov_iter_advance(i, maxsize); + return maxsize; +} + +/** + * iov_iter_extract_pages - Extract a list of contiguous pages from an iterator + * @i: The iterator to extract from + * @pages: Where to return the list of pages + * @maxsize: The maximum amount of iterator to extract + * @maxpages: The maximum size of the list of pages + * @extraction_flags: Flags to qualify request + * @offset0: Where to return the starting offset into (*@pages)[0] + * + * Extract a list of contiguous pages from the current point of the iterator, + * advancing the iterator. The maximum number of pages and the maximum amount + * of page contents can be set. + * + * If *@pages is NULL, a page list will be allocated to the required size and + * *@pages will be set to its base. If *@pages is not NULL, it will be assumed + * that the caller allocated a page list at least @maxpages in size and this + * will be filled in. + * + * @extraction_flags can have ITER_ALLOW_P2PDMA set to request peer-to-peer DMA + * be allowed on the pages extracted. + * + * The iov_iter_extract_will_pin() function can be used to query how cleanup + * should be performed. + * + * Extra refs or pins on the pages may be obtained as follows: + * + * (*) If the iterator is user-backed (ITER_IOVEC/ITER_UBUF), pins will be + * added to the pages, but refs will not be taken. + * iov_iter_extract_will_pin() will return true. + * + * (*) If the iterator is ITER_KVEC, ITER_BVEC or ITER_XARRAY, the pages are + * merely listed; no extra refs or pins are obtained. + * iov_iter_extract_will_pin() will return 0. + * + * Note also: + * + * (*) Use with ITER_DISCARD is not supported as that has no content. + * + * On success, the function sets *@pages to the new pagelist, if allocated, and + * sets *offset0 to the offset into the first page. + * + * It may also return -ENOMEM and -EFAULT. + */ +ssize_t iov_iter_extract_pages(struct iov_iter *i, + struct page ***pages, + size_t maxsize, + unsigned int maxpages, + iov_iter_extraction_t extraction_flags, + size_t *offset0) +{ + maxsize = min_t(size_t, min_t(size_t, maxsize, i->count), MAX_RW_COUNT); + if (!maxsize) + return 0; + + if (likely(user_backed_iter(i))) + return iov_iter_extract_user_pages(i, pages, maxsize, + maxpages, extraction_flags, + offset0); + if (iov_iter_is_kvec(i)) + return iov_iter_extract_kvec_pages(i, pages, maxsize, + maxpages, extraction_flags, + offset0); + if (iov_iter_is_bvec(i)) + return iov_iter_extract_bvec_pages(i, pages, maxsize, + maxpages, extraction_flags, + offset0); + if (iov_iter_is_xarray(i)) + return iov_iter_extract_xarray_pages(i, pages, maxsize, + maxpages, extraction_flags, + offset0); + return -EFAULT; +} +EXPORT_SYMBOL_GPL(iov_iter_extract_pages); diff --git a/lib/mpi/mpicoder.c b/lib/mpi/mpicoder.c index 39c4c6731094..3cb6bd148fa9 100644 --- a/lib/mpi/mpicoder.c +++ b/lib/mpi/mpicoder.c @@ -504,7 +504,8 @@ MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int nbytes) while (sg_miter_next(&miter)) { buff = miter.addr; - len = miter.length; + len = min_t(unsigned, miter.length, nbytes); + nbytes -= len; for (x = 0; x < len; x++) { a <<= 8; diff --git a/lib/nmi_backtrace.c b/lib/nmi_backtrace.c index d01aec6ae15c..5274bbb026d7 100644 --- a/lib/nmi_backtrace.c +++ b/lib/nmi_backtrace.c @@ -64,6 +64,7 @@ void nmi_trigger_cpumask_backtrace(const cpumask_t *mask, if (!cpumask_empty(to_cpumask(backtrace_mask))) { pr_info("Sending NMI from CPU %d to CPUs %*pbl:\n", this_cpu, nr_cpumask_bits, to_cpumask(backtrace_mask)); + nmi_backtrace_stall_snap(to_cpumask(backtrace_mask)); raise(to_cpumask(backtrace_mask)); } @@ -74,6 +75,7 @@ void nmi_trigger_cpumask_backtrace(const cpumask_t *mask, mdelay(1); touch_softlockup_watchdog(); } + nmi_backtrace_stall_check(to_cpumask(backtrace_mask)); /* * Force flush any remote buffers that might be stuck in IRQ context diff --git a/lib/parser.c b/lib/parser.c index bcb23484100e..2b5e2b480253 100644 --- a/lib/parser.c +++ b/lib/parser.c @@ -11,6 +11,15 @@ #include <linux/slab.h> #include <linux/string.h> +/* + * max size needed by different bases to express U64 + * HEX: "0xFFFFFFFFFFFFFFFF" --> 18 + * DEC: "18446744073709551615" --> 20 + * OCT: "01777777777777777777777" --> 23 + * pick the max one to define NUMBER_BUF_LEN + */ +#define NUMBER_BUF_LEN 24 + /** * match_one - Determines if a string matches a simple pattern * @s: the string to examine for presence of the pattern @@ -129,14 +138,12 @@ EXPORT_SYMBOL(match_token); static int match_number(substring_t *s, int *result, int base) { char *endp; - char *buf; + char buf[NUMBER_BUF_LEN]; int ret; long val; - buf = match_strdup(s); - if (!buf) - return -ENOMEM; - + if (match_strlcpy(buf, s, NUMBER_BUF_LEN) >= NUMBER_BUF_LEN) + return -ERANGE; ret = 0; val = simple_strtol(buf, &endp, base); if (endp == buf) @@ -145,7 +152,6 @@ static int match_number(substring_t *s, int *result, int base) ret = -ERANGE; else *result = (int) val; - kfree(buf); return ret; } @@ -163,18 +169,15 @@ static int match_number(substring_t *s, int *result, int base) */ static int match_u64int(substring_t *s, u64 *result, int base) { - char *buf; + char buf[NUMBER_BUF_LEN]; int ret; u64 val; - buf = match_strdup(s); - if (!buf) - return -ENOMEM; - + if (match_strlcpy(buf, s, NUMBER_BUF_LEN) >= NUMBER_BUF_LEN) + return -ERANGE; ret = kstrtoull(buf, base, &val); if (!ret) *result = val; - kfree(buf); return ret; } @@ -206,14 +209,12 @@ EXPORT_SYMBOL(match_int); */ int match_uint(substring_t *s, unsigned int *result) { - int err = -ENOMEM; - char *buf = match_strdup(s); + char buf[NUMBER_BUF_LEN]; - if (buf) { - err = kstrtouint(buf, 10, result); - kfree(buf); - } - return err; + if (match_strlcpy(buf, s, NUMBER_BUF_LEN) >= NUMBER_BUF_LEN) + return -ERANGE; + + return kstrtouint(buf, 10, result); } EXPORT_SYMBOL(match_uint); diff --git a/lib/sbitmap.c b/lib/sbitmap.c index 1fcede228fa2..eff4e42c425a 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -167,15 +167,16 @@ static int __sbitmap_get_word(unsigned long *word, unsigned long depth, return nr; } -static int sbitmap_find_bit_in_index(struct sbitmap *sb, int index, - unsigned int alloc_hint) +static int sbitmap_find_bit_in_word(struct sbitmap_word *map, + unsigned int depth, + unsigned int alloc_hint, + bool wrap) { - struct sbitmap_word *map = &sb->map[index]; int nr; do { - nr = __sbitmap_get_word(&map->word, __map_depth(sb, index), - alloc_hint, !sb->round_robin); + nr = __sbitmap_get_word(&map->word, depth, + alloc_hint, wrap); if (nr != -1) break; if (!sbitmap_deferred_clear(map)) @@ -185,25 +186,22 @@ static int sbitmap_find_bit_in_index(struct sbitmap *sb, int index, return nr; } -static int __sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint) +static int sbitmap_find_bit(struct sbitmap *sb, + unsigned int depth, + unsigned int index, + unsigned int alloc_hint, + bool wrap) { - unsigned int i, index; + unsigned int i; int nr = -1; - index = SB_NR_TO_INDEX(sb, alloc_hint); - - /* - * Unless we're doing round robin tag allocation, just use the - * alloc_hint to find the right word index. No point in looping - * twice in find_next_zero_bit() for that case. - */ - if (sb->round_robin) - alloc_hint = SB_NR_TO_BIT(sb, alloc_hint); - else - alloc_hint = 0; - for (i = 0; i < sb->map_nr; i++) { - nr = sbitmap_find_bit_in_index(sb, index, alloc_hint); + nr = sbitmap_find_bit_in_word(&sb->map[index], + min_t(unsigned int, + __map_depth(sb, index), + depth), + alloc_hint, wrap); + if (nr != -1) { nr += index << sb->shift; break; @@ -218,6 +216,26 @@ static int __sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint) return nr; } +static int __sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint) +{ + unsigned int index; + + index = SB_NR_TO_INDEX(sb, alloc_hint); + + /* + * Unless we're doing round robin tag allocation, just use the + * alloc_hint to find the right word index. No point in looping + * twice in find_next_zero_bit() for that case. + */ + if (sb->round_robin) + alloc_hint = SB_NR_TO_BIT(sb, alloc_hint); + else + alloc_hint = 0; + + return sbitmap_find_bit(sb, UINT_MAX, index, alloc_hint, + !sb->round_robin); +} + int sbitmap_get(struct sbitmap *sb) { int nr; @@ -239,37 +257,12 @@ static int __sbitmap_get_shallow(struct sbitmap *sb, unsigned int alloc_hint, unsigned long shallow_depth) { - unsigned int i, index; - int nr = -1; + unsigned int index; index = SB_NR_TO_INDEX(sb, alloc_hint); + alloc_hint = SB_NR_TO_BIT(sb, alloc_hint); - for (i = 0; i < sb->map_nr; i++) { -again: - nr = __sbitmap_get_word(&sb->map[index].word, - min_t(unsigned int, - __map_depth(sb, index), - shallow_depth), - SB_NR_TO_BIT(sb, alloc_hint), true); - if (nr != -1) { - nr += index << sb->shift; - break; - } - - if (sbitmap_deferred_clear(&sb->map[index])) - goto again; - - /* Jump to next index. */ - index++; - alloc_hint = index << sb->shift; - - if (index >= sb->map_nr) { - index = 0; - alloc_hint = 0; - } - } - - return nr; + return sbitmap_find_bit(sb, shallow_depth, index, alloc_hint, true); } int sbitmap_get_shallow(struct sbitmap *sb, unsigned long shallow_depth) @@ -464,13 +457,10 @@ void sbitmap_queue_recalculate_wake_batch(struct sbitmap_queue *sbq, unsigned int users) { unsigned int wake_batch; - unsigned int min_batch; unsigned int depth = (sbq->sb.depth + users - 1) / users; - min_batch = sbq->sb.depth >= (4 * SBQ_WAIT_QUEUES) ? 4 : 1; - wake_batch = clamp_val(depth / SBQ_WAIT_QUEUES, - min_batch, SBQ_WAKE_BATCH); + 1, SBQ_WAKE_BATCH); WRITE_ONCE(sbq->wake_batch, wake_batch); } @@ -521,11 +511,9 @@ unsigned long __sbitmap_queue_get_batch(struct sbitmap_queue *sbq, int nr_tags, get_mask = ((1UL << nr_tags) - 1) << nr; val = READ_ONCE(map->word); - do { - if ((val & ~get_mask) != val) - goto next; - } while (!atomic_long_try_cmpxchg(ptr, &val, - get_mask | val)); + while (!atomic_long_try_cmpxchg(ptr, &val, + get_mask | val)) + ; get_mask = (get_mask & ~val) >> nr; if (get_mask) { *offset = nr + (index << sb->shift); diff --git a/lib/string.c b/lib/string.c index 4fb566ea610f..3d55ef890106 100644 --- a/lib/string.c +++ b/lib/string.c @@ -480,13 +480,11 @@ EXPORT_SYMBOL(strcspn); */ char *strpbrk(const char *cs, const char *ct) { - const char *sc1, *sc2; + const char *sc; - for (sc1 = cs; *sc1 != '\0'; ++sc1) { - for (sc2 = ct; *sc2 != '\0'; ++sc2) { - if (*sc1 == *sc2) - return (char *)sc1; - } + for (sc = cs; *sc != '\0'; ++sc) { + if (strchr(ct, *sc)) + return (char *)sc; } return NULL; } diff --git a/lib/ubsan.c b/lib/ubsan.c index 60c7099857a0..e2cc4a799312 100644 --- a/lib/ubsan.c +++ b/lib/ubsan.c @@ -14,10 +14,76 @@ #include <linux/types.h> #include <linux/sched.h> #include <linux/uaccess.h> +#include <linux/ubsan.h> #include <kunit/test-bug.h> #include "ubsan.h" +#ifdef CONFIG_UBSAN_TRAP +/* + * Only include matches for UBSAN checks that are actually compiled in. + * The mappings of struct SanitizerKind (the -fsanitize=xxx args) to + * enum SanitizerHandler (the traps) in Clang is in clang/lib/CodeGen/. + */ +const char *report_ubsan_failure(struct pt_regs *regs, u32 check_type) +{ + switch (check_type) { +#ifdef CONFIG_UBSAN_BOUNDS + /* + * SanitizerKind::ArrayBounds and SanitizerKind::LocalBounds + * emit SanitizerHandler::OutOfBounds. + */ + case ubsan_out_of_bounds: + return "UBSAN: array index out of bounds"; +#endif +#ifdef CONFIG_UBSAN_SHIFT + /* + * SanitizerKind::ShiftBase and SanitizerKind::ShiftExponent + * emit SanitizerHandler::ShiftOutOfBounds. + */ + case ubsan_shift_out_of_bounds: + return "UBSAN: shift out of bounds"; +#endif +#ifdef CONFIG_UBSAN_DIV_ZERO + /* + * SanitizerKind::IntegerDivideByZero emits + * SanitizerHandler::DivremOverflow. + */ + case ubsan_divrem_overflow: + return "UBSAN: divide/remainder overflow"; +#endif +#ifdef CONFIG_UBSAN_UNREACHABLE + /* + * SanitizerKind::Unreachable emits + * SanitizerHandler::BuiltinUnreachable. + */ + case ubsan_builtin_unreachable: + return "UBSAN: unreachable code"; +#endif +#if defined(CONFIG_UBSAN_BOOL) || defined(CONFIG_UBSAN_ENUM) + /* + * SanitizerKind::Bool and SanitizerKind::Enum emit + * SanitizerHandler::LoadInvalidValue. + */ + case ubsan_load_invalid_value: + return "UBSAN: loading invalid value"; +#endif +#ifdef CONFIG_UBSAN_ALIGNMENT + /* + * SanitizerKind::Alignment emits SanitizerHandler::TypeMismatch + * or SanitizerHandler::AlignmentAssumption. + */ + case ubsan_alignment_assumption: + return "UBSAN: alignment assumption"; + case ubsan_type_mismatch: + return "UBSAN: type mismatch"; +#endif + default: + return "UBSAN: unrecognized failure code"; + } +} + +#else static const char * const type_check_kinds[] = { "load of", "store to", @@ -339,9 +405,10 @@ void __ubsan_handle_load_invalid_value(void *_data, void *val) { struct invalid_value_data *data = _data; char val_str[VALUE_LENGTH]; + unsigned long ua_flags = user_access_save(); if (suppress_report(&data->location)) - return; + goto out; ubsan_prologue(&data->location, "invalid-load"); @@ -351,6 +418,8 @@ void __ubsan_handle_load_invalid_value(void *_data, void *val) val_str, data->type->type_name); ubsan_epilogue(); +out: + user_access_restore(ua_flags); } EXPORT_SYMBOL(__ubsan_handle_load_invalid_value); @@ -384,3 +453,5 @@ void __ubsan_handle_alignment_assumption(void *_data, unsigned long ptr, ubsan_epilogue(); } EXPORT_SYMBOL(__ubsan_handle_alignment_assumption); + +#endif /* !CONFIG_UBSAN_TRAP */ diff --git a/lib/ubsan.h b/lib/ubsan.h index 9a0b71c5ff9f..cc5cb94895a6 100644 --- a/lib/ubsan.h +++ b/lib/ubsan.h @@ -2,6 +2,38 @@ #ifndef _LIB_UBSAN_H #define _LIB_UBSAN_H +/* + * ABI defined by Clang's UBSAN enum SanitizerHandler: + * https://github.com/llvm/llvm-project/blob/release/16.x/clang/lib/CodeGen/CodeGenFunction.h#L113 + */ +enum ubsan_checks { + ubsan_add_overflow, + ubsan_builtin_unreachable, + ubsan_cfi_check_fail, + ubsan_divrem_overflow, + ubsan_dynamic_type_cache_miss, + ubsan_float_cast_overflow, + ubsan_function_type_mismatch, + ubsan_implicit_conversion, + ubsan_invalid_builtin, + ubsan_invalid_objc_cast, + ubsan_load_invalid_value, + ubsan_missing_return, + ubsan_mul_overflow, + ubsan_negate_overflow, + ubsan_nullability_arg, + ubsan_nullability_return, + ubsan_nonnull_arg, + ubsan_nonnull_return, + ubsan_out_of_bounds, + ubsan_pointer_overflow, + ubsan_shift_out_of_bounds, + ubsan_sub_overflow, + ubsan_type_mismatch, + ubsan_alignment_assumption, + ubsan_vla_bound_not_positive, +}; + enum { type_kind_int = 0, type_kind_float = 1, diff --git a/lib/usercopy.c b/lib/usercopy.c index 1505a52f23a0..d29fe29c6849 100644 --- a/lib/usercopy.c +++ b/lib/usercopy.c @@ -3,6 +3,7 @@ #include <linux/fault-inject-usercopy.h> #include <linux/instrumented.h> #include <linux/uaccess.h> +#include <linux/nospec.h> /* out-of-line parts */ @@ -12,6 +13,12 @@ unsigned long _copy_from_user(void *to, const void __user *from, unsigned long n unsigned long res = n; might_fault(); if (!should_fail_usercopy() && likely(access_ok(from, n))) { + /* + * Ensure that bad access_ok() speculation will not + * lead to nasty side effects *after* the copy is + * finished: + */ + barrier_nospec(); instrument_copy_from_user_before(to, from, n); res = raw_copy_from_user(to, from, n); instrument_copy_from_user_after(to, from, n, res); |