From a227960fe0cafcc229a8d6bb8b454a3a0b33719d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 10 Apr 2014 16:15:59 +0200 Subject: locking/mutex: Fix debug_mutexes debug_mutex_unlock() would bail when !debug_locks and forgets to actually unlock. Reported-by: "Michael L. Semon" Reported-by: "Kirill A. Shutemov" Reported-by: Valdis Kletnieks Fixes: 6f008e72cd11 ("locking/mutex: Fix debug checks") Tested-by: Dave Jones Cc: Jason Low Signed-off-by: Peter Zijlstra Link: http://lkml.kernel.org/r/20140410141559.GE13658@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- kernel/locking/mutex-debug.c | 19 +++++++++---------- 1 file changed, 9 insertions(+), 10 deletions(-) (limited to 'kernel') diff --git a/kernel/locking/mutex-debug.c b/kernel/locking/mutex-debug.c index e1191c996c59..5cf6731b98e9 100644 --- a/kernel/locking/mutex-debug.c +++ b/kernel/locking/mutex-debug.c @@ -71,18 +71,17 @@ void mutex_remove_waiter(struct mutex *lock, struct mutex_waiter *waiter, void debug_mutex_unlock(struct mutex *lock) { - if (unlikely(!debug_locks)) - return; + if (likely(debug_locks)) { + DEBUG_LOCKS_WARN_ON(lock->magic != lock); - DEBUG_LOCKS_WARN_ON(lock->magic != lock); + if (!lock->owner) + DEBUG_LOCKS_WARN_ON(!lock->owner); + else + DEBUG_LOCKS_WARN_ON(lock->owner != current); - if (!lock->owner) - DEBUG_LOCKS_WARN_ON(!lock->owner); - else - DEBUG_LOCKS_WARN_ON(lock->owner != current); - - DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); - mutex_clear_owner(lock); + DEBUG_LOCKS_WARN_ON(!lock->wait_list.prev && !lock->wait_list.next); + mutex_clear_owner(lock); + } /* * __mutex_slowpath_needs_to_unlock() is explicitly 0 for debug -- cgit v1.2.3 From 2eac7648321f4a08aa4078504d7727af0af7173b Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 14 Apr 2014 21:02:59 +0200 Subject: seccomp: fix populating a0-a5 syscall args in 32-bit x86 BPF Linus reports that on 32-bit x86 Chromium throws the following seccomp resp. audit log messages: audit: type=1326 audit(1397359304.356:28108): auid=500 uid=500 gid=500 ses=2 subj=unconfined_u:unconfined_r:chrome_sandbox_t:s0-s0:c0.c1023 pid=3677 comm="chrome" exe="/opt/google/chrome/chrome" sig=0 syscall=172 compat=0 ip=0xb2dd9852 code=0x30000 audit: type=1326 audit(1397359304.356:28109): auid=500 uid=500 gid=500 ses=2 subj=unconfined_u:unconfined_r:chrome_sandbox_t:s0-s0:c0.c1023 pid=3677 comm="chrome" exe="/opt/google/chrome/chrome" sig=0 syscall=5 compat=0 ip=0xb2dd9852 code=0x50000 These audit messages are being triggered via audit_seccomp() through __secure_computing() in seccomp mode (BPF) filter with seccomp return codes 0x30000 (== SECCOMP_RET_TRAP) and 0x50000 (== SECCOMP_RET_ERRNO) during filter runtime. Moreover, Linus reports that x86_64 Chromium seems fine. The underlying issue that explains this is that the implementation of populate_seccomp_data() is wrong. Our seccomp data structure sd that is being shared with user ABI is: struct seccomp_data { int nr; __u32 arch; __u64 instruction_pointer; __u64 args[6]; }; Therefore, a simple cast to 'unsigned long *' for storing the value of the syscall argument via syscall_get_arguments() is just wrong as on 32-bit x86 (or any other 32bit arch), it would result in storing a0-a5 at wrong offsets in args[] member, and thus i) could leak stack memory to user space and ii) tampers with the logic of seccomp BPF programs that read out and check for syscall arguments: syscall_get_arguments(task, regs, 0, 1, (unsigned long *) &sd->args[0]); Tested on 32-bit x86 with Google Chrome, unfortunately only via remote test machine through slow ssh X forwarding, but it fixes the issue on my side. So fix it up by storing args in type correct variables, gcc is clever and optimizes the copy away in other cases, e.g. x86_64. Fixes: bd4cf0ed331a ("net: filter: rework/optimize internal BPF interpreter's instruction set") Reported-and-bisected-by: Linus Torvalds Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Cc: Linus Torvalds Cc: Eric Paris Cc: James Morris Cc: Kees Cook Signed-off-by: David S. Miller --- kernel/seccomp.c | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'kernel') diff --git a/kernel/seccomp.c b/kernel/seccomp.c index d8d046c0726a..590c37925084 100644 --- a/kernel/seccomp.c +++ b/kernel/seccomp.c @@ -69,18 +69,17 @@ static void populate_seccomp_data(struct seccomp_data *sd) { struct task_struct *task = current; struct pt_regs *regs = task_pt_regs(task); + unsigned long args[6]; sd->nr = syscall_get_nr(task, regs); sd->arch = syscall_get_arch(); - - /* Unroll syscall_get_args to help gcc on arm. */ - syscall_get_arguments(task, regs, 0, 1, (unsigned long *) &sd->args[0]); - syscall_get_arguments(task, regs, 1, 1, (unsigned long *) &sd->args[1]); - syscall_get_arguments(task, regs, 2, 1, (unsigned long *) &sd->args[2]); - syscall_get_arguments(task, regs, 3, 1, (unsigned long *) &sd->args[3]); - syscall_get_arguments(task, regs, 4, 1, (unsigned long *) &sd->args[4]); - syscall_get_arguments(task, regs, 5, 1, (unsigned long *) &sd->args[5]); - + syscall_get_arguments(task, regs, 0, 6, args); + sd->args[0] = args[0]; + sd->args[1] = args[1]; + sd->args[2] = args[2]; + sd->args[3] = args[3]; + sd->args[4] = args[4]; + sd->args[5] = args[5]; sd->instruction_pointer = KSTK_EIP(task); } -- cgit v1.2.3 From e79323bd87808fdfbc68ce6c5371bd224d9672ee Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Mon, 14 Apr 2014 16:58:55 -0400 Subject: user namespace: fix incorrect memory barriers smp_read_barrier_depends() can be used if there is data dependency between the readers - i.e. if the read operation after the barrier uses address that was obtained from the read operation before the barrier. In this file, there is only control dependency, no data dependecy, so the use of smp_read_barrier_depends() is incorrect. The code could fail in the following way: * the cpu predicts that idx < entries is true and starts executing the body of the for loop * the cpu fetches map->extent[0].first and map->extent[0].count * the cpu fetches map->nr_extents * the cpu verifies that idx < extents is true, so it commits the instructions in the body of the for loop The problem is that in this scenario, the cpu read map->extent[0].first and map->nr_extents in the wrong order. We need a full read memory barrier to prevent it. Signed-off-by: Mikulas Patocka Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- kernel/user_namespace.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'kernel') diff --git a/kernel/user_namespace.c b/kernel/user_namespace.c index 0d8f6023fd8d..bf71b4b2d632 100644 --- a/kernel/user_namespace.c +++ b/kernel/user_namespace.c @@ -152,7 +152,7 @@ static u32 map_id_range_down(struct uid_gid_map *map, u32 id, u32 count) /* Find the matching extent */ extents = map->nr_extents; - smp_read_barrier_depends(); + smp_rmb(); for (idx = 0; idx < extents; idx++) { first = map->extent[idx].first; last = first + map->extent[idx].count - 1; @@ -176,7 +176,7 @@ static u32 map_id_down(struct uid_gid_map *map, u32 id) /* Find the matching extent */ extents = map->nr_extents; - smp_read_barrier_depends(); + smp_rmb(); for (idx = 0; idx < extents; idx++) { first = map->extent[idx].first; last = first + map->extent[idx].count - 1; @@ -199,7 +199,7 @@ static u32 map_id_up(struct uid_gid_map *map, u32 id) /* Find the matching extent */ extents = map->nr_extents; - smp_read_barrier_depends(); + smp_rmb(); for (idx = 0; idx < extents; idx++) { first = map->extent[idx].lower_first; last = first + map->extent[idx].count - 1; @@ -615,9 +615,8 @@ static ssize_t map_write(struct file *file, const char __user *buf, * were written before the count of the extents. * * To achieve this smp_wmb() is used on guarantee the write - * order and smp_read_barrier_depends() is guaranteed that we - * don't have crazy architectures returning stale data. - * + * order and smp_rmb() is guaranteed that we don't have crazy + * architectures returning stale data. */ mutex_lock(&id_map_mutex); -- cgit v1.2.3