From 4d6923733f158e7f8f0695b43c30c22a59ec0a34 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Wed, 27 Aug 2014 11:19:26 -0400 Subject: ww-mutex: clarify help text for DEBUG_WW_MUTEX_SLOWPATH We really don't want distro's enabling this in their kernels. Try and make that more clear. Signed-off-by: Rob Clark Acked-by: Maarten Lankhorst Signed-off-by: Dave Airlie --- lib/Kconfig.debug | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 07c28323f88f..1b233fc67466 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -892,6 +892,10 @@ config DEBUG_WW_MUTEX_SLOWPATH the full mutex checks enabled with (CONFIG_PROVE_LOCKING) this will test all possible w/w mutex interface abuse with the exception of simply not acquiring all the required locks. + Note that this feature can introduce significant overhead, so + it really should not be enabled in a production or distro kernel, + even a debug kernel. If you are a driver writer, enable it. If + you are a distro, do not. config DEBUG_LOCK_ALLOC bool "Lock debugging: detect incorrect freeing of live locks" -- cgit v1.2.3 From 0c38e1fe0fced6aa06dbf444f7203dd7f325e369 Mon Sep 17 00:00:00 2001 From: Dave Jones Date: Fri, 29 Aug 2014 15:18:35 -0700 Subject: lib: turn CONFIG_STACKTRACE into an actual option. I was puzzled why /proc/$$/stack had disappeared, until I figured out I had disabled the last debug option that did a 'select STACKTRACE'. This patch makes the option show up at config time, so it can be enabled without enabling any of the more heavyweight debug options. Signed-off-by: Dave Jones Acked-by: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- lib/Kconfig.debug | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 1b233fc67466..a28590083622 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -1036,8 +1036,13 @@ config TRACE_IRQFLAGS either tracing or lock debugging. config STACKTRACE - bool + bool "Stack backtrace support" depends on STACKTRACE_SUPPORT + help + This option causes the kernel to create a /proc/pid/stack for + every process, showing its current stack trace. + It is also used by various kernel debugging features that require + stack trace generation. config DEBUG_KOBJECT bool "kobject debugging" -- cgit v1.2.3 From 27419604f51a97d497853f14142c1059d46eb597 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 2 Sep 2014 13:52:20 +0100 Subject: KEYS: Fix use-after-free in assoc_array_gc() An edit script should be considered inaccessible by a function once it has called assoc_array_apply_edit() or assoc_array_cancel_edit(). However, assoc_array_gc() is accessing the edit script just after the gc_complete: label. Reported-by: Andreea-Cristina Bernat Signed-off-by: David Howells Reviewed-by: Andreea-Cristina Bernat cc: shemming@brocade.com cc: paulmck@linux.vnet.ibm.com Cc: stable@vger.kernel.org Signed-off-by: James Morris --- lib/assoc_array.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/assoc_array.c b/lib/assoc_array.c index c0b1007011e1..ae146f0734eb 100644 --- a/lib/assoc_array.c +++ b/lib/assoc_array.c @@ -1735,7 +1735,7 @@ ascend_old_tree: gc_complete: edit->set[0].to = new_root; assoc_array_apply_edit(edit); - edit->array->nr_leaves_on_tree = nr_leaves_on_tree; + array->nr_leaves_on_tree = nr_leaves_on_tree; return 0; enomem: -- cgit v1.2.3 From 95389b08d93d5c06ec63ab49bd732b0069b7c35e Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 10 Sep 2014 22:22:00 +0100 Subject: KEYS: Fix termination condition in assoc array garbage collection This fixes CVE-2014-3631. It is possible for an associative array to end up with a shortcut node at the root of the tree if there are more than fan-out leaves in the tree, but they all crowd into the same slot in the lowest level (ie. they all have the same first nibble of their index keys). When assoc_array_gc() returns back up the tree after scanning some leaves, it can fall off of the root and crash because it assumes that the back pointer from a shortcut (after label ascend_old_tree) must point to a normal node - which isn't true of a shortcut node at the root. Should we find we're ascending rootwards over a shortcut, we should check to see if the backpointer is zero - and if it is, we have completed the scan. This particular bug cannot occur if the root node is not a shortcut - ie. if you have fewer than 17 keys in a keyring or if you have at least two keys that sit into separate slots (eg. a keyring and a non keyring). This can be reproduced by: ring=`keyctl newring bar @s` for ((i=1; i<=18; i++)); do last_key=`keyctl newring foo$i $ring`; done keyctl timeout $last_key 2 Doing this: echo 3 >/proc/sys/kernel/keys/gc_delay first will speed things up. If we do fall off of the top of the tree, we get the following oops: BUG: unable to handle kernel NULL pointer dereference at 0000000000000018 IP: [] assoc_array_gc+0x2f7/0x540 PGD dae15067 PUD cfc24067 PMD 0 Oops: 0000 [#1] SMP Modules linked in: xt_nat xt_mark nf_conntrack_netbios_ns nf_conntrack_broadcast ip6t_rpfilter ip6t_REJECT xt_conntrack ebtable_nat ebtable_broute bridge stp llc ebtable_filter ebtables ip6table_ni CPU: 0 PID: 26011 Comm: kworker/0:1 Not tainted 3.14.9-200.fc20.x86_64 #1 Hardware name: Bochs Bochs, BIOS Bochs 01/01/2011 Workqueue: events key_garbage_collector task: ffff8800918bd580 ti: ffff8800aac14000 task.ti: ffff8800aac14000 RIP: 0010:[] [] assoc_array_gc+0x2f7/0x540 RSP: 0018:ffff8800aac15d40 EFLAGS: 00010206 RAX: 0000000000000000 RBX: 0000000000000000 RCX: ffff8800aaecacc0 RDX: ffff8800daecf440 RSI: 0000000000000001 RDI: ffff8800aadc2bc0 RBP: ffff8800aac15da8 R08: 0000000000000001 R09: 0000000000000003 R10: ffffffff8136ccc7 R11: 0000000000000000 R12: 0000000000000000 R13: 0000000000000000 R14: 0000000000000070 R15: 0000000000000001 FS: 0000000000000000(0000) GS:ffff88011fc00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 000000008005003b CR2: 0000000000000018 CR3: 00000000db10d000 CR4: 00000000000006f0 Stack: ffff8800aac15d50 0000000000000011 ffff8800aac15db8 ffffffff812e2a70 ffff880091a00600 0000000000000000 ffff8800aadc2bc3 00000000cd42c987 ffff88003702df20 ffff88003702dfa0 0000000053b65c09 ffff8800aac15fd8 Call Trace: [] ? keyring_detect_cycle_iterator+0x30/0x30 [] keyring_gc+0x75/0x80 [] key_garbage_collector+0x154/0x3c0 [] process_one_work+0x176/0x430 [] worker_thread+0x11b/0x3a0 [] ? rescuer_thread+0x3b0/0x3b0 [] kthread+0xd8/0xf0 [] ? insert_kthread_work+0x40/0x40 [] ret_from_fork+0x7c/0xb0 [] ? insert_kthread_work+0x40/0x40 Code: 08 4c 8b 22 0f 84 bf 00 00 00 41 83 c7 01 49 83 e4 fc 41 83 ff 0f 4c 89 65 c0 0f 8f 5a fe ff ff 48 8b 45 c0 4d 63 cf 49 83 c1 02 <4e> 8b 34 c8 4d 85 f6 0f 84 be 00 00 00 41 f6 c6 01 0f 84 92 RIP [] assoc_array_gc+0x2f7/0x540 RSP CR2: 0000000000000018 ---[ end trace 1129028a088c0cbd ]--- Signed-off-by: David Howells Acked-by: Don Zickus Signed-off-by: James Morris --- lib/assoc_array.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/lib/assoc_array.c b/lib/assoc_array.c index ae146f0734eb..2404d03e251a 100644 --- a/lib/assoc_array.c +++ b/lib/assoc_array.c @@ -1723,11 +1723,13 @@ ascend_old_tree: shortcut = assoc_array_ptr_to_shortcut(ptr); slot = shortcut->parent_slot; cursor = shortcut->back_pointer; + if (!cursor) + goto gc_complete; } else { slot = node->parent_slot; cursor = ptr; } - BUG_ON(!ptr); + BUG_ON(!cursor); node = assoc_array_ptr_to_node(cursor); slot++; goto continue_node; -- cgit v1.2.3 From 72d931046030beb2d13dad6d205be0e228618432 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 13 Sep 2014 11:14:53 -0700 Subject: Make ARCH_HAS_FAST_MULTIPLIER a real config variable It used to be an ad-hoc hack defined by the x86 version of that enabled a couple of library routines to know whether an integer multiply is faster than repeated shifts and additions. This just makes it use the real Kconfig system instead, and makes x86 (which was the only architecture that did this) select the option. NOTE! Even for x86, this really is kind of wrong. If we cared, we would probably not enable this for builds optimized for netburst (P4), where shifts-and-adds are generally faster than multiplies. This patch does *not* change that kind of logic, though, it is purely a syntactic change with no code changes. This was triggered by the fact that we have other places that really want to know "do I want to expand multiples by constants by hand or not", particularly the hash generation code. Signed-off-by: Linus Torvalds --- arch/x86/Kconfig | 1 + arch/x86/include/asm/bitops.h | 2 -- lib/Kconfig | 3 +++ lib/hweight.c | 4 ++-- lib/string.c | 4 ++-- 5 files changed, 8 insertions(+), 6 deletions(-) (limited to 'lib') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 778178f4c7d1..36327438caf0 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -23,6 +23,7 @@ config X86 def_bool y select ARCH_MIGHT_HAVE_ACPI_PDC if ACPI select ARCH_HAS_DEBUG_STRICT_USER_COPY_CHECKS + select ARCH_HAS_FAST_MULTIPLIER select ARCH_MIGHT_HAVE_PC_PARPORT select ARCH_MIGHT_HAVE_PC_SERIO select HAVE_AOUT if X86_32 diff --git a/arch/x86/include/asm/bitops.h b/arch/x86/include/asm/bitops.h index afcd35d331de..cfe3b954d5e4 100644 --- a/arch/x86/include/asm/bitops.h +++ b/arch/x86/include/asm/bitops.h @@ -497,8 +497,6 @@ static __always_inline int fls64(__u64 x) #include -#define ARCH_HAS_FAST_MULTIPLIER 1 - #include #include diff --git a/lib/Kconfig b/lib/Kconfig index a5ce0c7f6c30..54cf309a92a5 100644 --- a/lib/Kconfig +++ b/lib/Kconfig @@ -51,6 +51,9 @@ config PERCPU_RWSEM config ARCH_USE_CMPXCHG_LOCKREF bool +config ARCH_HAS_FAST_MULTIPLIER + bool + config CRC_CCITT tristate "CRC-CCITT functions" help diff --git a/lib/hweight.c b/lib/hweight.c index b7d81ba143d1..9a5c1f221558 100644 --- a/lib/hweight.c +++ b/lib/hweight.c @@ -11,7 +11,7 @@ unsigned int __sw_hweight32(unsigned int w) { -#ifdef ARCH_HAS_FAST_MULTIPLIER +#ifdef CONFIG_ARCH_HAS_FAST_MULTIPLIER w -= (w >> 1) & 0x55555555; w = (w & 0x33333333) + ((w >> 2) & 0x33333333); w = (w + (w >> 4)) & 0x0f0f0f0f; @@ -49,7 +49,7 @@ unsigned long __sw_hweight64(__u64 w) return __sw_hweight32((unsigned int)(w >> 32)) + __sw_hweight32((unsigned int)w); #elif BITS_PER_LONG == 64 -#ifdef ARCH_HAS_FAST_MULTIPLIER +#ifdef CONFIG_ARCH_HAS_FAST_MULTIPLIER w -= (w >> 1) & 0x5555555555555555ul; w = (w & 0x3333333333333333ul) + ((w >> 2) & 0x3333333333333333ul); w = (w + (w >> 4)) & 0x0f0f0f0f0f0f0f0ful; diff --git a/lib/string.c b/lib/string.c index 992bf30af759..f3c6ff596414 100644 --- a/lib/string.c +++ b/lib/string.c @@ -807,9 +807,9 @@ void *memchr_inv(const void *start, int c, size_t bytes) return check_bytes8(start, value, bytes); value64 = value; -#if defined(ARCH_HAS_FAST_MULTIPLIER) && BITS_PER_LONG == 64 +#if defined(CONFIG_ARCH_HAS_FAST_MULTIPLIER) && BITS_PER_LONG == 64 value64 *= 0x0101010101010101; -#elif defined(ARCH_HAS_FAST_MULTIPLIER) +#elif defined(CONFIG_ARCH_HAS_FAST_MULTIPLIER) value64 *= 0x01010101; value64 |= value64 << 32; #else -- cgit v1.2.3 From b3f2512ecdb3561ffa44737f370fdb78e1febf6b Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Wed, 17 Sep 2014 21:07:19 +0200 Subject: lib: rhashtable: remove second linux/log2.h inclusion linux/log2.h was included twice. Signed-off-by: Fabian Frederick Signed-off-by: David S. Miller --- lib/rhashtable.c | 1 - 1 file changed, 1 deletion(-) (limited to 'lib') diff --git a/lib/rhashtable.c b/lib/rhashtable.c index a2c78810ebc1..7b36e4d40ed7 100644 --- a/lib/rhashtable.c +++ b/lib/rhashtable.c @@ -23,7 +23,6 @@ #include #include #include -#include #define HASH_DEFAULT_SIZE 64UL #define HASH_MIN_SIZE 4UL -- cgit v1.2.3 From 0a30288da1aec914e158c2d7a3482a85f632750f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 23 Sep 2014 15:24:32 -0400 Subject: blk-mq, percpu_ref: implement a kludge for SCSI blk-mq stall during probe blk-mq uses percpu_ref for its usage counter which tracks the number of in-flight commands and used to synchronously drain the queue on freeze. percpu_ref shutdown takes measureable wallclock time as it involves a sched RCU grace period. This means that draining a blk-mq takes measureable wallclock time. One would think that this shouldn't matter as queue shutdown should be a rare event which takes place asynchronously w.r.t. userland. Unfortunately, SCSI probing involves synchronously setting up and then tearing down a lot of request_queues back-to-back for non-existent LUNs. This means that SCSI probing may take more than ten seconds when scsi-mq is used. This will be properly fixed by implementing a mechanism to keep q->mq_usage_counter in atomic mode till genhd registration; however, that involves rather big updates to percpu_ref which is difficult to apply late in the devel cycle (v3.17-rc6 at the moment). As a stop-gap measure till the proper fix can be implemented in the next cycle, this patch introduces __percpu_ref_kill_expedited() and makes blk_mq_freeze_queue() use it. This is heavy-handed but should work for testing the experimental SCSI blk-mq implementation. Signed-off-by: Tejun Heo Reported-by: Christoph Hellwig Link: http://lkml.kernel.org/g/20140919113815.GA10791@lst.de Fixes: add703fda981 ("blk-mq: use percpu_ref for mq usage count") Cc: Kent Overstreet Cc: Jens Axboe Tested-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-mq.c | 11 ++++++++++- include/linux/percpu-refcount.h | 1 + lib/percpu-refcount.c | 16 ++++++++++++++++ 3 files changed, 27 insertions(+), 1 deletion(-) (limited to 'lib') diff --git a/block/blk-mq.c b/block/blk-mq.c index c88e6089746d..df8e1e09dd17 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -119,7 +119,16 @@ void blk_mq_freeze_queue(struct request_queue *q) spin_unlock_irq(q->queue_lock); if (freeze) { - percpu_ref_kill(&q->mq_usage_counter); + /* + * XXX: Temporary kludge to work around SCSI blk-mq stall. + * SCSI synchronously creates and destroys many queues + * back-to-back during probe leading to lengthy stalls. + * This will be fixed by keeping ->mq_usage_counter in + * atomic mode until genhd registration, but, for now, + * let's work around using expedited synchronization. + */ + __percpu_ref_kill_expedited(&q->mq_usage_counter); + blk_mq_run_queues(q, false); } wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->mq_usage_counter)); diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index 3dfbf237cd8f..ef5894ca8e50 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h @@ -71,6 +71,7 @@ void percpu_ref_reinit(struct percpu_ref *ref); void percpu_ref_exit(struct percpu_ref *ref); void percpu_ref_kill_and_confirm(struct percpu_ref *ref, percpu_ref_func_t *confirm_kill); +void __percpu_ref_kill_expedited(struct percpu_ref *ref); /** * percpu_ref_kill - drop the initial ref diff --git a/lib/percpu-refcount.c b/lib/percpu-refcount.c index fe5a3342e960..a89cf09a8268 100644 --- a/lib/percpu-refcount.c +++ b/lib/percpu-refcount.c @@ -184,3 +184,19 @@ void percpu_ref_kill_and_confirm(struct percpu_ref *ref, call_rcu_sched(&ref->rcu, percpu_ref_kill_rcu); } EXPORT_SYMBOL_GPL(percpu_ref_kill_and_confirm); + +/* + * XXX: Temporary kludge to work around SCSI blk-mq stall. Used only by + * block/blk-mq.c::blk_mq_freeze_queue(). Will be removed during v3.18 + * devel cycle. Do not use anywhere else. + */ +void __percpu_ref_kill_expedited(struct percpu_ref *ref) +{ + WARN_ONCE(ref->pcpu_count_ptr & PCPU_REF_DEAD, + "percpu_ref_kill() called more than once on %pf!", + ref->release); + + ref->pcpu_count_ptr |= PCPU_REF_DEAD; + synchronize_sched_expedited(); + percpu_ref_kill_rcu(&ref->rcu); +} -- cgit v1.2.3