summaryrefslogtreecommitdiff
path: root/kernel/futex/syscalls.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/futex/syscalls.c')
-rw-r--r--kernel/futex/syscalls.c279
1 files changed, 279 insertions, 0 deletions
diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c
new file mode 100644
index 000000000000..6e7e36c640a1
--- /dev/null
+++ b/kernel/futex/syscalls.c
@@ -0,0 +1,279 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/compat.h>
+#include <linux/syscalls.h>
+#include <linux/time_namespace.h>
+
+#include "futex.h"
+
+/*
+ * Support for robust futexes: the kernel cleans up held futexes at
+ * thread exit time.
+ *
+ * Implementation: user-space maintains a per-thread list of locks it
+ * is holding. Upon do_exit(), the kernel carefully walks this list,
+ * and marks all locks that are owned by this thread with the
+ * FUTEX_OWNER_DIED bit, and wakes up a waiter (if any). The list is
+ * always manipulated with the lock held, so the list is private and
+ * per-thread. Userspace also maintains a per-thread 'list_op_pending'
+ * field, to allow the kernel to clean up if the thread dies after
+ * acquiring the lock, but just before it could have added itself to
+ * the list. There can only be one such pending lock.
+ */
+
+/**
+ * sys_set_robust_list() - Set the robust-futex list head of a task
+ * @head: pointer to the list-head
+ * @len: length of the list-head, as userspace expects
+ */
+SYSCALL_DEFINE2(set_robust_list, struct robust_list_head __user *, head,
+ size_t, len)
+{
+ if (!futex_cmpxchg_enabled)
+ return -ENOSYS;
+ /*
+ * The kernel knows only one size for now:
+ */
+ if (unlikely(len != sizeof(*head)))
+ return -EINVAL;
+
+ current->robust_list = head;
+
+ return 0;
+}
+
+/**
+ * sys_get_robust_list() - Get the robust-futex list head of a task
+ * @pid: pid of the process [zero for current task]
+ * @head_ptr: pointer to a list-head pointer, the kernel fills it in
+ * @len_ptr: pointer to a length field, the kernel fills in the header size
+ */
+SYSCALL_DEFINE3(get_robust_list, int, pid,
+ struct robust_list_head __user * __user *, head_ptr,
+ size_t __user *, len_ptr)
+{
+ struct robust_list_head __user *head;
+ unsigned long ret;
+ struct task_struct *p;
+
+ if (!futex_cmpxchg_enabled)
+ return -ENOSYS;
+
+ rcu_read_lock();
+
+ ret = -ESRCH;
+ if (!pid)
+ p = current;
+ else {
+ p = find_task_by_vpid(pid);
+ if (!p)
+ goto err_unlock;
+ }
+
+ ret = -EPERM;
+ if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
+ goto err_unlock;
+
+ head = p->robust_list;
+ rcu_read_unlock();
+
+ if (put_user(sizeof(*head), len_ptr))
+ return -EFAULT;
+ return put_user(head, head_ptr);
+
+err_unlock:
+ rcu_read_unlock();
+
+ return ret;
+}
+
+long do_futex(u32 __user *uaddr, int op, u32 val, ktime_t *timeout,
+ u32 __user *uaddr2, u32 val2, u32 val3)
+{
+ int cmd = op & FUTEX_CMD_MASK;
+ unsigned int flags = 0;
+
+ if (!(op & FUTEX_PRIVATE_FLAG))
+ flags |= FLAGS_SHARED;
+
+ if (op & FUTEX_CLOCK_REALTIME) {
+ flags |= FLAGS_CLOCKRT;
+ if (cmd != FUTEX_WAIT_BITSET && cmd != FUTEX_WAIT_REQUEUE_PI &&
+ cmd != FUTEX_LOCK_PI2)
+ return -ENOSYS;
+ }
+
+ switch (cmd) {
+ case FUTEX_LOCK_PI:
+ case FUTEX_LOCK_PI2:
+ case FUTEX_UNLOCK_PI:
+ case FUTEX_TRYLOCK_PI:
+ case FUTEX_WAIT_REQUEUE_PI:
+ case FUTEX_CMP_REQUEUE_PI:
+ if (!futex_cmpxchg_enabled)
+ return -ENOSYS;
+ }
+
+ switch (cmd) {
+ case FUTEX_WAIT:
+ val3 = FUTEX_BITSET_MATCH_ANY;
+ fallthrough;
+ case FUTEX_WAIT_BITSET:
+ return futex_wait(uaddr, flags, val, timeout, val3);
+ case FUTEX_WAKE:
+ val3 = FUTEX_BITSET_MATCH_ANY;
+ fallthrough;
+ case FUTEX_WAKE_BITSET:
+ return futex_wake(uaddr, flags, val, val3);
+ case FUTEX_REQUEUE:
+ return futex_requeue(uaddr, flags, uaddr2, val, val2, NULL, 0);
+ case FUTEX_CMP_REQUEUE:
+ return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 0);
+ case FUTEX_WAKE_OP:
+ return futex_wake_op(uaddr, flags, uaddr2, val, val2, val3);
+ case FUTEX_LOCK_PI:
+ flags |= FLAGS_CLOCKRT;
+ fallthrough;
+ case FUTEX_LOCK_PI2:
+ return futex_lock_pi(uaddr, flags, timeout, 0);
+ case FUTEX_UNLOCK_PI:
+ return futex_unlock_pi(uaddr, flags);
+ case FUTEX_TRYLOCK_PI:
+ return futex_lock_pi(uaddr, flags, NULL, 1);
+ case FUTEX_WAIT_REQUEUE_PI:
+ val3 = FUTEX_BITSET_MATCH_ANY;
+ return futex_wait_requeue_pi(uaddr, flags, val, timeout, val3,
+ uaddr2);
+ case FUTEX_CMP_REQUEUE_PI:
+ return futex_requeue(uaddr, flags, uaddr2, val, val2, &val3, 1);
+ }
+ return -ENOSYS;
+}
+
+static __always_inline bool futex_cmd_has_timeout(u32 cmd)
+{
+ switch (cmd) {
+ case FUTEX_WAIT:
+ case FUTEX_LOCK_PI:
+ case FUTEX_LOCK_PI2:
+ case FUTEX_WAIT_BITSET:
+ case FUTEX_WAIT_REQUEUE_PI:
+ return true;
+ }
+ return false;
+}
+
+static __always_inline int
+futex_init_timeout(u32 cmd, u32 op, struct timespec64 *ts, ktime_t *t)
+{
+ if (!timespec64_valid(ts))
+ return -EINVAL;
+
+ *t = timespec64_to_ktime(*ts);
+ if (cmd == FUTEX_WAIT)
+ *t = ktime_add_safe(ktime_get(), *t);
+ else if (cmd != FUTEX_LOCK_PI && !(op & FUTEX_CLOCK_REALTIME))
+ *t = timens_ktime_to_host(CLOCK_MONOTONIC, *t);
+ return 0;
+}
+
+SYSCALL_DEFINE6(futex, u32 __user *, uaddr, int, op, u32, val,
+ const struct __kernel_timespec __user *, utime,
+ u32 __user *, uaddr2, u32, val3)
+{
+ int ret, cmd = op & FUTEX_CMD_MASK;
+ ktime_t t, *tp = NULL;
+ struct timespec64 ts;
+
+ if (utime && futex_cmd_has_timeout(cmd)) {
+ if (unlikely(should_fail_futex(!(op & FUTEX_PRIVATE_FLAG))))
+ return -EFAULT;
+ if (get_timespec64(&ts, utime))
+ return -EFAULT;
+ ret = futex_init_timeout(cmd, op, &ts, &t);
+ if (ret)
+ return ret;
+ tp = &t;
+ }
+
+ return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3);
+}
+
+#ifdef CONFIG_COMPAT
+COMPAT_SYSCALL_DEFINE2(set_robust_list,
+ struct compat_robust_list_head __user *, head,
+ compat_size_t, len)
+{
+ if (!futex_cmpxchg_enabled)
+ return -ENOSYS;
+
+ if (unlikely(len != sizeof(*head)))
+ return -EINVAL;
+
+ current->compat_robust_list = head;
+
+ return 0;
+}
+
+COMPAT_SYSCALL_DEFINE3(get_robust_list, int, pid,
+ compat_uptr_t __user *, head_ptr,
+ compat_size_t __user *, len_ptr)
+{
+ struct compat_robust_list_head __user *head;
+ unsigned long ret;
+ struct task_struct *p;
+
+ if (!futex_cmpxchg_enabled)
+ return -ENOSYS;
+
+ rcu_read_lock();
+
+ ret = -ESRCH;
+ if (!pid)
+ p = current;
+ else {
+ p = find_task_by_vpid(pid);
+ if (!p)
+ goto err_unlock;
+ }
+
+ ret = -EPERM;
+ if (!ptrace_may_access(p, PTRACE_MODE_READ_REALCREDS))
+ goto err_unlock;
+
+ head = p->compat_robust_list;
+ rcu_read_unlock();
+
+ if (put_user(sizeof(*head), len_ptr))
+ return -EFAULT;
+ return put_user(ptr_to_compat(head), head_ptr);
+
+err_unlock:
+ rcu_read_unlock();
+
+ return ret;
+}
+#endif /* CONFIG_COMPAT */
+
+#ifdef CONFIG_COMPAT_32BIT_TIME
+SYSCALL_DEFINE6(futex_time32, u32 __user *, uaddr, int, op, u32, val,
+ const struct old_timespec32 __user *, utime, u32 __user *, uaddr2,
+ u32, val3)
+{
+ int ret, cmd = op & FUTEX_CMD_MASK;
+ ktime_t t, *tp = NULL;
+ struct timespec64 ts;
+
+ if (utime && futex_cmd_has_timeout(cmd)) {
+ if (get_old_timespec32(&ts, utime))
+ return -EFAULT;
+ ret = futex_init_timeout(cmd, op, &ts, &t);
+ if (ret)
+ return ret;
+ tp = &t;
+ }
+
+ return do_futex(uaddr, op, val, tp, uaddr2, (unsigned long)utime, val3);
+}
+#endif /* CONFIG_COMPAT_32BIT_TIME */
+