diff options
Diffstat (limited to 'lib')
-rw-r--r-- | lib/Kconfig.debug | 42 | ||||
-rw-r--r-- | lib/Makefile | 2 | ||||
-rw-r--r-- | lib/debugobjects.c | 58 | ||||
-rw-r--r-- | lib/halfmd4.c | 67 | ||||
-rw-r--r-- | lib/ioremap.c | 1 | ||||
-rw-r--r-- | lib/iov_iter.c | 54 | ||||
-rw-r--r-- | lib/radix-tree.c | 11 | ||||
-rw-r--r-- | lib/sbitmap.c | 139 | ||||
-rw-r--r-- | lib/swiotlb.c | 58 | ||||
-rw-r--r-- | lib/timerqueue.c | 3 |
10 files changed, 295 insertions, 140 deletions
diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index b06848a104e6..acedbe626d47 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -164,7 +164,7 @@ config DEBUG_INFO_REDUCED config DEBUG_INFO_SPLIT bool "Produce split debuginfo in .dwo files" - depends on DEBUG_INFO + depends on DEBUG_INFO && !FRV help Generate debug info into separate .dwo files. This significantly reduces the build directory size for builds with DEBUG_INFO, @@ -716,6 +716,19 @@ source "lib/Kconfig.kmemcheck" source "lib/Kconfig.kasan" +config DEBUG_REFCOUNT + bool "Verbose refcount checks" + help + Say Y here if you want reference counters (refcount_t and kref) to + generate WARNs on dubious usage. Without this refcount_t will still + be a saturating counter and avoid Use-After-Free by turning it into + a resource leak Denial-Of-Service. + + Use of this option will increase kernel text size but will alert the + admin of potential abuse. + + If in doubt, say "N". + endmenu # "Memory Debugging" config ARCH_HAS_KCOV @@ -980,20 +993,6 @@ config DEBUG_TIMEKEEPING If unsure, say N. -config TIMER_STATS - bool "Collect kernel timers statistics" - depends on DEBUG_KERNEL && PROC_FS - help - If you say Y here, additional code will be inserted into the - timer routines to collect statistics about kernel timers being - reprogrammed. The statistics can be read from /proc/timer_stats. - The statistics collection is started by writing 1 to /proc/timer_stats, - writing 0 stops it. This feature is useful to collect information - about timer usage patterns in kernel and userspace. This feature - is lightweight if enabled in the kernel config but not activated - (it defaults to deactivated on bootup and will only be activated - if some application like powertop activates it explicitly). - config DEBUG_PREEMPT bool "Debug preemptible kernel" depends on DEBUG_KERNEL && PREEMPT && TRACE_IRQFLAGS_SUPPORT @@ -1180,6 +1179,18 @@ config LOCK_TORTURE_TEST Say M if you want these torture tests to build as a module. Say N if you are unsure. +config WW_MUTEX_SELFTEST + tristate "Wait/wound mutex selftests" + help + This option provides a kernel module that runs tests on the + on the struct ww_mutex locking API. + + It is recommended to enable DEBUG_WW_MUTEX_SLOWPATH in conjunction + with this test harness. + + Say M if you want these self tests to build as a module. + Say N if you are unsure. + endmenu # lock debugging config TRACE_IRQFLAGS @@ -1450,6 +1461,7 @@ config RCU_CPU_STALL_TIMEOUT config RCU_TRACE bool "Enable tracing for RCU" depends on DEBUG_KERNEL + default y if TREE_RCU select TRACE_CLOCK help This option provides tracing in RCU which presents stats diff --git a/lib/Makefile b/lib/Makefile index bc4073a8cd08..19ea76149a37 100644 --- a/lib/Makefile +++ b/lib/Makefile @@ -31,7 +31,7 @@ lib-$(CONFIG_HAS_DMA) += dma-noop.o lib-y += kobject.o klist.o obj-y += lockref.o -obj-y += bcd.o div64.o sort.o parser.o halfmd4.o debug_locks.o random32.o \ +obj-y += bcd.o div64.o sort.o parser.o debug_locks.o random32.o \ bust_spinlocks.o kasprintf.o bitmap.o scatterlist.o \ gcd.o lcm.o list_sort.o uuid.o flex_array.o iov_iter.o clz_ctz.o \ bsearch.o find_bit.o llist.o memweight.o kfifo.o \ diff --git a/lib/debugobjects.c b/lib/debugobjects.c index 04c1ef717fe0..8c28cbd7e104 100644 --- a/lib/debugobjects.c +++ b/lib/debugobjects.c @@ -52,9 +52,18 @@ static int debug_objects_fixups __read_mostly; static int debug_objects_warnings __read_mostly; static int debug_objects_enabled __read_mostly = CONFIG_DEBUG_OBJECTS_ENABLE_DEFAULT; - +static int debug_objects_pool_size __read_mostly + = ODEBUG_POOL_SIZE; +static int debug_objects_pool_min_level __read_mostly + = ODEBUG_POOL_MIN_LEVEL; static struct debug_obj_descr *descr_test __read_mostly; +/* + * Track numbers of kmem_cache_alloc()/free() calls done. + */ +static int debug_objects_allocated; +static int debug_objects_freed; + static void free_obj_work(struct work_struct *work); static DECLARE_WORK(debug_obj_work, free_obj_work); @@ -88,13 +97,13 @@ static void fill_pool(void) struct debug_obj *new; unsigned long flags; - if (likely(obj_pool_free >= ODEBUG_POOL_MIN_LEVEL)) + if (likely(obj_pool_free >= debug_objects_pool_min_level)) return; if (unlikely(!obj_cache)) return; - while (obj_pool_free < ODEBUG_POOL_MIN_LEVEL) { + while (obj_pool_free < debug_objects_pool_min_level) { new = kmem_cache_zalloc(obj_cache, gfp); if (!new) @@ -102,6 +111,7 @@ static void fill_pool(void) raw_spin_lock_irqsave(&pool_lock, flags); hlist_add_head(&new->node, &obj_pool); + debug_objects_allocated++; obj_pool_free++; raw_spin_unlock_irqrestore(&pool_lock, flags); } @@ -162,24 +172,39 @@ alloc_object(void *addr, struct debug_bucket *b, struct debug_obj_descr *descr) /* * workqueue function to free objects. + * + * To reduce contention on the global pool_lock, the actual freeing of + * debug objects will be delayed if the pool_lock is busy. We also free + * the objects in a batch of 4 for each lock/unlock cycle. */ +#define ODEBUG_FREE_BATCH 4 + static void free_obj_work(struct work_struct *work) { - struct debug_obj *obj; + struct debug_obj *objs[ODEBUG_FREE_BATCH]; unsigned long flags; + int i; - raw_spin_lock_irqsave(&pool_lock, flags); - while (obj_pool_free > ODEBUG_POOL_SIZE) { - obj = hlist_entry(obj_pool.first, typeof(*obj), node); - hlist_del(&obj->node); - obj_pool_free--; + if (!raw_spin_trylock_irqsave(&pool_lock, flags)) + return; + while (obj_pool_free >= debug_objects_pool_size + ODEBUG_FREE_BATCH) { + for (i = 0; i < ODEBUG_FREE_BATCH; i++) { + objs[i] = hlist_entry(obj_pool.first, + typeof(*objs[0]), node); + hlist_del(&objs[i]->node); + } + + obj_pool_free -= ODEBUG_FREE_BATCH; + debug_objects_freed += ODEBUG_FREE_BATCH; /* * We release pool_lock across kmem_cache_free() to * avoid contention on pool_lock. */ raw_spin_unlock_irqrestore(&pool_lock, flags); - kmem_cache_free(obj_cache, obj); - raw_spin_lock_irqsave(&pool_lock, flags); + for (i = 0; i < ODEBUG_FREE_BATCH; i++) + kmem_cache_free(obj_cache, objs[i]); + if (!raw_spin_trylock_irqsave(&pool_lock, flags)) + return; } raw_spin_unlock_irqrestore(&pool_lock, flags); } @@ -198,7 +223,7 @@ static void free_object(struct debug_obj *obj) * schedule work when the pool is filled and the cache is * initialized: */ - if (obj_pool_free > ODEBUG_POOL_SIZE && obj_cache) + if (obj_pool_free > debug_objects_pool_size && obj_cache) sched = 1; hlist_add_head(&obj->node, &obj_pool); obj_pool_free++; @@ -758,6 +783,8 @@ static int debug_stats_show(struct seq_file *m, void *v) seq_printf(m, "pool_min_free :%d\n", obj_pool_min_free); seq_printf(m, "pool_used :%d\n", obj_pool_used); seq_printf(m, "pool_max_used :%d\n", obj_pool_max_used); + seq_printf(m, "objs_allocated:%d\n", debug_objects_allocated); + seq_printf(m, "objs_freed :%d\n", debug_objects_freed); return 0; } @@ -1116,4 +1143,11 @@ void __init debug_objects_mem_init(void) pr_warn("out of memory.\n"); } else debug_objects_selftest(); + + /* + * Increase the thresholds for allocating and freeing objects + * according to the number of possible CPUs available in the system. + */ + debug_objects_pool_size += num_possible_cpus() * 32; + debug_objects_pool_min_level += num_possible_cpus() * 4; } diff --git a/lib/halfmd4.c b/lib/halfmd4.c deleted file mode 100644 index 137e861d9690..000000000000 --- a/lib/halfmd4.c +++ /dev/null @@ -1,67 +0,0 @@ -#include <linux/compiler.h> -#include <linux/export.h> -#include <linux/cryptohash.h> -#include <linux/bitops.h> - -/* F, G and H are basic MD4 functions: selection, majority, parity */ -#define F(x, y, z) ((z) ^ ((x) & ((y) ^ (z)))) -#define G(x, y, z) (((x) & (y)) + (((x) ^ (y)) & (z))) -#define H(x, y, z) ((x) ^ (y) ^ (z)) - -/* - * The generic round function. The application is so specific that - * we don't bother protecting all the arguments with parens, as is generally - * good macro practice, in favor of extra legibility. - * Rotation is separate from addition to prevent recomputation - */ -#define ROUND(f, a, b, c, d, x, s) \ - (a += f(b, c, d) + x, a = rol32(a, s)) -#define K1 0 -#define K2 013240474631UL -#define K3 015666365641UL - -/* - * Basic cut-down MD4 transform. Returns only 32 bits of result. - */ -__u32 half_md4_transform(__u32 buf[4], __u32 const in[8]) -{ - __u32 a = buf[0], b = buf[1], c = buf[2], d = buf[3]; - - /* Round 1 */ - ROUND(F, a, b, c, d, in[0] + K1, 3); - ROUND(F, d, a, b, c, in[1] + K1, 7); - ROUND(F, c, d, a, b, in[2] + K1, 11); - ROUND(F, b, c, d, a, in[3] + K1, 19); - ROUND(F, a, b, c, d, in[4] + K1, 3); - ROUND(F, d, a, b, c, in[5] + K1, 7); - ROUND(F, c, d, a, b, in[6] + K1, 11); - ROUND(F, b, c, d, a, in[7] + K1, 19); - - /* Round 2 */ - ROUND(G, a, b, c, d, in[1] + K2, 3); - ROUND(G, d, a, b, c, in[3] + K2, 5); - ROUND(G, c, d, a, b, in[5] + K2, 9); - ROUND(G, b, c, d, a, in[7] + K2, 13); - ROUND(G, a, b, c, d, in[0] + K2, 3); - ROUND(G, d, a, b, c, in[2] + K2, 5); - ROUND(G, c, d, a, b, in[4] + K2, 9); - ROUND(G, b, c, d, a, in[6] + K2, 13); - - /* Round 3 */ - ROUND(H, a, b, c, d, in[3] + K3, 3); - ROUND(H, d, a, b, c, in[7] + K3, 9); - ROUND(H, c, d, a, b, in[2] + K3, 11); - ROUND(H, b, c, d, a, in[6] + K3, 15); - ROUND(H, a, b, c, d, in[1] + K3, 3); - ROUND(H, d, a, b, c, in[5] + K3, 9); - ROUND(H, c, d, a, b, in[0] + K3, 11); - ROUND(H, b, c, d, a, in[4] + K3, 15); - - buf[0] += a; - buf[1] += b; - buf[2] += c; - buf[3] += d; - - return buf[1]; /* "most hashed" word */ -} -EXPORT_SYMBOL(half_md4_transform); diff --git a/lib/ioremap.c b/lib/ioremap.c index 86c8911b0e3a..a3e14ce92a56 100644 --- a/lib/ioremap.c +++ b/lib/ioremap.c @@ -144,4 +144,3 @@ int ioremap_page_range(unsigned long addr, return err; } -EXPORT_SYMBOL_GPL(ioremap_page_range); diff --git a/lib/iov_iter.c b/lib/iov_iter.c index 25f572303801..e68604ae3ced 100644 --- a/lib/iov_iter.c +++ b/lib/iov_iter.c @@ -730,43 +730,50 @@ size_t iov_iter_copy_from_user_atomic(struct page *page, } EXPORT_SYMBOL(iov_iter_copy_from_user_atomic); +static inline void pipe_truncate(struct iov_iter *i) +{ + struct pipe_inode_info *pipe = i->pipe; + if (pipe->nrbufs) { + size_t off = i->iov_offset; + int idx = i->idx; + int nrbufs = (idx - pipe->curbuf) & (pipe->buffers - 1); + if (off) { + pipe->bufs[idx].len = off - pipe->bufs[idx].offset; + idx = next_idx(idx, pipe); + nrbufs++; + } + while (pipe->nrbufs > nrbufs) { + pipe_buf_release(pipe, &pipe->bufs[idx]); + idx = next_idx(idx, pipe); + pipe->nrbufs--; + } + } +} + static void pipe_advance(struct iov_iter *i, size_t size) { struct pipe_inode_info *pipe = i->pipe; - struct pipe_buffer *buf; - int idx = i->idx; - size_t off = i->iov_offset, orig_sz; - if (unlikely(i->count < size)) size = i->count; - orig_sz = size; - if (size) { + struct pipe_buffer *buf; + size_t off = i->iov_offset, left = size; + int idx = i->idx; if (off) /* make it relative to the beginning of buffer */ - size += off - pipe->bufs[idx].offset; + left += off - pipe->bufs[idx].offset; while (1) { buf = &pipe->bufs[idx]; - if (size <= buf->len) + if (left <= buf->len) break; - size -= buf->len; + left -= buf->len; idx = next_idx(idx, pipe); } - buf->len = size; i->idx = idx; - off = i->iov_offset = buf->offset + size; - } - if (off) - idx = next_idx(idx, pipe); - if (pipe->nrbufs) { - int unused = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); - /* [curbuf,unused) is in use. Free [idx,unused) */ - while (idx != unused) { - pipe_buf_release(pipe, &pipe->bufs[idx]); - idx = next_idx(idx, pipe); - pipe->nrbufs--; - } + i->iov_offset = buf->offset + left; } - i->count -= orig_sz; + i->count -= size; + /* ... and discard everything past that point */ + pipe_truncate(i); } void iov_iter_advance(struct iov_iter *i, size_t size) @@ -826,6 +833,7 @@ void iov_iter_pipe(struct iov_iter *i, int direction, size_t count) { BUG_ON(direction != ITER_PIPE); + WARN_ON(pipe->nrbufs == pipe->buffers); i->type = direction; i->pipe = pipe; i->idx = (pipe->curbuf + pipe->nrbufs) & (pipe->buffers - 1); diff --git a/lib/radix-tree.c b/lib/radix-tree.c index 6f382e07de77..84812a9fb16f 100644 --- a/lib/radix-tree.c +++ b/lib/radix-tree.c @@ -640,6 +640,7 @@ static inline void radix_tree_shrink(struct radix_tree_root *root, update_node(node, private); } + WARN_ON_ONCE(!list_empty(&node->private_list)); radix_tree_node_free(node); } } @@ -666,6 +667,7 @@ static void delete_node(struct radix_tree_root *root, root->rnode = NULL; } + WARN_ON_ONCE(!list_empty(&node->private_list)); radix_tree_node_free(node); node = parent; @@ -767,6 +769,7 @@ static void radix_tree_free_nodes(struct radix_tree_node *node) struct radix_tree_node *old = child; offset = child->offset + 1; child = child->parent; + WARN_ON_ONCE(!list_empty(&old->private_list)); radix_tree_node_free(old); if (old == entry_to_node(node)) return; @@ -1824,15 +1827,19 @@ EXPORT_SYMBOL(radix_tree_gang_lookup_tag_slot); * __radix_tree_delete_node - try to free node after clearing a slot * @root: radix tree root * @node: node containing @index + * @update_node: callback for changing leaf nodes + * @private: private data to pass to @update_node * * After clearing the slot at @index in @node from radix tree * rooted at @root, call this function to attempt freeing the * node and shrinking the tree. */ void __radix_tree_delete_node(struct radix_tree_root *root, - struct radix_tree_node *node) + struct radix_tree_node *node, + radix_tree_update_node_t update_node, + void *private) { - delete_node(root, node, NULL, NULL); + delete_node(root, node, update_node, private); } /** diff --git a/lib/sbitmap.c b/lib/sbitmap.c index 2cecf05c82fd..55e11c4b2f3b 100644 --- a/lib/sbitmap.c +++ b/lib/sbitmap.c @@ -17,6 +17,7 @@ #include <linux/random.h> #include <linux/sbitmap.h> +#include <linux/seq_file.h> int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, gfp_t flags, int node) @@ -180,6 +181,62 @@ unsigned int sbitmap_weight(const struct sbitmap *sb) } EXPORT_SYMBOL_GPL(sbitmap_weight); +void sbitmap_show(struct sbitmap *sb, struct seq_file *m) +{ + seq_printf(m, "depth=%u\n", sb->depth); + seq_printf(m, "busy=%u\n", sbitmap_weight(sb)); + seq_printf(m, "bits_per_word=%u\n", 1U << sb->shift); + seq_printf(m, "map_nr=%u\n", sb->map_nr); +} +EXPORT_SYMBOL_GPL(sbitmap_show); + +static inline void emit_byte(struct seq_file *m, unsigned int offset, u8 byte) +{ + if ((offset & 0xf) == 0) { + if (offset != 0) + seq_putc(m, '\n'); + seq_printf(m, "%08x:", offset); + } + if ((offset & 0x1) == 0) + seq_putc(m, ' '); + seq_printf(m, "%02x", byte); +} + +void sbitmap_bitmap_show(struct sbitmap *sb, struct seq_file *m) +{ + u8 byte = 0; + unsigned int byte_bits = 0; + unsigned int offset = 0; + int i; + + for (i = 0; i < sb->map_nr; i++) { + unsigned long word = READ_ONCE(sb->map[i].word); + unsigned int word_bits = READ_ONCE(sb->map[i].depth); + + while (word_bits > 0) { + unsigned int bits = min(8 - byte_bits, word_bits); + + byte |= (word & (BIT(bits) - 1)) << byte_bits; + byte_bits += bits; + if (byte_bits == 8) { + emit_byte(m, offset, byte); + byte = 0; + byte_bits = 0; + offset++; + } + word >>= bits; + word_bits -= bits; + } + } + if (byte_bits) { + emit_byte(m, offset, byte); + offset++; + } + if (offset) + seq_putc(m, '\n'); +} +EXPORT_SYMBOL_GPL(sbitmap_bitmap_show); + static unsigned int sbq_calc_wake_batch(unsigned int depth) { unsigned int wake_batch; @@ -239,7 +296,19 @@ EXPORT_SYMBOL_GPL(sbitmap_queue_init_node); void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth) { - sbq->wake_batch = sbq_calc_wake_batch(depth); + unsigned int wake_batch = sbq_calc_wake_batch(depth); + int i; + + if (sbq->wake_batch != wake_batch) { + WRITE_ONCE(sbq->wake_batch, wake_batch); + /* + * Pairs with the memory barrier in sbq_wake_up() to ensure that + * the batch size is updated before the wait counts. + */ + smp_mb__before_atomic(); + for (i = 0; i < SBQ_WAIT_QUEUES; i++) + atomic_set(&sbq->ws[i].wait_cnt, 1); + } sbitmap_resize(&sbq->sb, depth); } EXPORT_SYMBOL_GPL(sbitmap_queue_resize); @@ -297,20 +366,39 @@ static struct sbq_wait_state *sbq_wake_ptr(struct sbitmap_queue *sbq) static void sbq_wake_up(struct sbitmap_queue *sbq) { struct sbq_wait_state *ws; + unsigned int wake_batch; int wait_cnt; - /* Ensure that the wait list checks occur after clear_bit(). */ - smp_mb(); + /* + * Pairs with the memory barrier in set_current_state() to ensure the + * proper ordering of clear_bit()/waitqueue_active() in the waker and + * test_and_set_bit()/prepare_to_wait()/finish_wait() in the waiter. See + * the comment on waitqueue_active(). This is __after_atomic because we + * just did clear_bit() in the caller. + */ + smp_mb__after_atomic(); ws = sbq_wake_ptr(sbq); if (!ws) return; wait_cnt = atomic_dec_return(&ws->wait_cnt); - if (unlikely(wait_cnt < 0)) - wait_cnt = atomic_inc_return(&ws->wait_cnt); - if (wait_cnt == 0) { - atomic_add(sbq->wake_batch, &ws->wait_cnt); + if (wait_cnt <= 0) { + wake_batch = READ_ONCE(sbq->wake_batch); + /* + * Pairs with the memory barrier in sbitmap_queue_resize() to + * ensure that we see the batch size update before the wait + * count is reset. + */ + smp_mb__before_atomic(); + /* + * If there are concurrent callers to sbq_wake_up(), the last + * one to decrement the wait count below zero will bump it back + * up. If there is a concurrent resize, the count reset will + * either cause the cmpxchg to fail or overwrite after the + * cmpxchg. + */ + atomic_cmpxchg(&ws->wait_cnt, wait_cnt, wait_cnt + wake_batch); sbq_index_atomic_inc(&sbq->wake_index); wake_up(&ws->wait); } @@ -331,7 +419,8 @@ void sbitmap_queue_wake_all(struct sbitmap_queue *sbq) int i, wake_index; /* - * Make sure all changes prior to this are visible from other CPUs. + * Pairs with the memory barrier in set_current_state() like in + * sbq_wake_up(). */ smp_mb(); wake_index = atomic_read(&sbq->wake_index); @@ -345,3 +434,37 @@ void sbitmap_queue_wake_all(struct sbitmap_queue *sbq) } } EXPORT_SYMBOL_GPL(sbitmap_queue_wake_all); + +void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m) +{ + bool first; + int i; + + sbitmap_show(&sbq->sb, m); + + seq_puts(m, "alloc_hint={"); + first = true; + for_each_possible_cpu(i) { + if (!first) + seq_puts(m, ", "); + first = false; + seq_printf(m, "%u", *per_cpu_ptr(sbq->alloc_hint, i)); + } + seq_puts(m, "}\n"); + + seq_printf(m, "wake_batch=%u\n", sbq->wake_batch); + seq_printf(m, "wake_index=%d\n", atomic_read(&sbq->wake_index)); + + seq_puts(m, "ws={\n"); + for (i = 0; i < SBQ_WAIT_QUEUES; i++) { + struct sbq_wait_state *ws = &sbq->ws[i]; + + seq_printf(m, "\t{.wait_cnt=%d, .wait=%s},\n", + atomic_read(&ws->wait_cnt), + waitqueue_active(&ws->wait) ? "active" : "inactive"); + } + seq_puts(m, "}\n"); + + seq_printf(m, "round_robin=%d\n", sbq->round_robin); +} +EXPORT_SYMBOL_GPL(sbitmap_queue_show); diff --git a/lib/swiotlb.c b/lib/swiotlb.c index cb1b54ee8527..a8d74a733a38 100644 --- a/lib/swiotlb.c +++ b/lib/swiotlb.c @@ -53,7 +53,7 @@ */ #define IO_TLB_MIN_SLABS ((1<<20) >> IO_TLB_SHIFT) -int swiotlb_force; +enum swiotlb_force swiotlb_force; /* * Used to do a quick range check in swiotlb_tbl_unmap_single and @@ -83,6 +83,12 @@ static unsigned int *io_tlb_list; static unsigned int io_tlb_index; /* + * Max segment that we can provide which (if pages are contingous) will + * not be bounced (unless SWIOTLB_FORCE is set). + */ +unsigned int max_segment; + +/* * We need to save away the original address corresponding to a mapped entry * for the sync operations. */ @@ -106,8 +112,12 @@ setup_io_tlb_npages(char *str) } if (*str == ',') ++str; - if (!strcmp(str, "force")) - swiotlb_force = 1; + if (!strcmp(str, "force")) { + swiotlb_force = SWIOTLB_FORCE; + } else if (!strcmp(str, "noforce")) { + swiotlb_force = SWIOTLB_NO_FORCE; + io_tlb_nslabs = 1; + } return 0; } @@ -120,6 +130,20 @@ unsigned long swiotlb_nr_tbl(void) } EXPORT_SYMBOL_GPL(swiotlb_nr_tbl); +unsigned int swiotlb_max_segment(void) +{ + return max_segment; +} +EXPORT_SYMBOL_GPL(swiotlb_max_segment); + +void swiotlb_set_max_segment(unsigned int val) +{ + if (swiotlb_force == SWIOTLB_FORCE) + max_segment = 1; + else + max_segment = rounddown(val, PAGE_SIZE); +} + /* default to 64MB */ #define IO_TLB_DEFAULT_SIZE (64UL<<20) unsigned long swiotlb_size_or_default(void) @@ -201,6 +225,7 @@ int __init swiotlb_init_with_tbl(char *tlb, unsigned long nslabs, int verbose) if (verbose) swiotlb_print_info(); + swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT); return 0; } @@ -279,6 +304,7 @@ swiotlb_late_init_with_default_size(size_t default_size) rc = swiotlb_late_init_with_tbl(vstart, io_tlb_nslabs); if (rc) free_pages((unsigned long)vstart, order); + return rc; } @@ -333,6 +359,8 @@ swiotlb_late_init_with_tbl(char *tlb, unsigned long nslabs) late_alloc = 1; + swiotlb_set_max_segment(io_tlb_nslabs << IO_TLB_SHIFT); + return 0; cleanup4: @@ -347,6 +375,7 @@ cleanup2: io_tlb_end = 0; io_tlb_start = 0; io_tlb_nslabs = 0; + max_segment = 0; return -ENOMEM; } @@ -375,6 +404,7 @@ void __init swiotlb_free(void) PAGE_ALIGN(io_tlb_nslabs << IO_TLB_SHIFT)); } io_tlb_nslabs = 0; + max_segment = 0; } int is_swiotlb_buffer(phys_addr_t paddr) @@ -453,11 +483,11 @@ phys_addr_t swiotlb_tbl_map_single(struct device *hwdev, : 1UL << (BITS_PER_LONG - IO_TLB_SHIFT); /* - * For mappings greater than a page, we limit the stride (and - * hence alignment) to a page size. + * For mappings greater than or equal to a page, we limit the stride + * (and hence alignment) to a page size. */ nslots = ALIGN(size, 1 << IO_TLB_SHIFT) >> IO_TLB_SHIFT; - if (size > PAGE_SIZE) + if (size >= PAGE_SIZE) stride = (1 << (PAGE_SHIFT - IO_TLB_SHIFT)); else stride = 1; @@ -543,8 +573,15 @@ static phys_addr_t map_single(struct device *hwdev, phys_addr_t phys, size_t size, enum dma_data_direction dir, unsigned long attrs) { - dma_addr_t start_dma_addr = phys_to_dma(hwdev, io_tlb_start); + dma_addr_t start_dma_addr; + if (swiotlb_force == SWIOTLB_NO_FORCE) { + dev_warn_ratelimited(hwdev, "Cannot do DMA to address %pa\n", + &phys); + return SWIOTLB_MAP_ERROR; + } + + start_dma_addr = phys_to_dma(hwdev, io_tlb_start); return swiotlb_tbl_map_single(hwdev, start_dma_addr, phys, size, dir, attrs); } @@ -721,6 +758,9 @@ static void swiotlb_full(struct device *dev, size_t size, enum dma_data_direction dir, int do_panic) { + if (swiotlb_force == SWIOTLB_NO_FORCE) + return; + /* * Ran out of IOMMU space for this operation. This is very bad. * Unfortunately the drivers cannot handle this operation properly. @@ -763,7 +803,7 @@ dma_addr_t swiotlb_map_page(struct device *dev, struct page *page, * we can safely return the device addr and not worry about bounce * buffering it. */ - if (dma_capable(dev, dev_addr, size) && !swiotlb_force) + if (dma_capable(dev, dev_addr, size) && swiotlb_force != SWIOTLB_FORCE) return dev_addr; trace_swiotlb_bounced(dev, dev_addr, size, swiotlb_force); @@ -904,7 +944,7 @@ swiotlb_map_sg_attrs(struct device *hwdev, struct scatterlist *sgl, int nelems, phys_addr_t paddr = sg_phys(sg); dma_addr_t dev_addr = phys_to_dma(hwdev, paddr); - if (swiotlb_force || + if (swiotlb_force == SWIOTLB_FORCE || !dma_capable(hwdev, dev_addr, sg->length)) { phys_addr_t map = map_single(hwdev, sg_phys(sg), sg->length, dir, attrs); diff --git a/lib/timerqueue.c b/lib/timerqueue.c index adc6ee0a5126..4a720ed4fdaf 100644 --- a/lib/timerqueue.c +++ b/lib/timerqueue.c @@ -80,8 +80,7 @@ bool timerqueue_del(struct timerqueue_head *head, struct timerqueue_node *node) if (head->next == node) { struct rb_node *rbn = rb_next(&node->node); - head->next = rbn ? - rb_entry(rbn, struct timerqueue_node, node) : NULL; + head->next = rb_entry_safe(rbn, struct timerqueue_node, node); } rb_erase(&node->node, &head->head); RB_CLEAR_NODE(&node->node); |