From 85be6d842447067ce76047a14d4258c96fd33b7b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 15 Aug 2023 12:52:04 +0200 Subject: cleanup: Make no_free_ptr() __must_check recent discussion brought about the realization that it makes sense for no_free_ptr() to have __must_check semantics in order to avoid leaking the resource. Additionally, add a few comments to clarify why/how things work. All credit to Linus on how to combine __must_check and the stmt-expression. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Link: https://lkml.kernel.org/r/20230816103102.GF980931@hirez.programming.kicks-ass.net --- include/linux/cleanup.h | 39 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 36 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cleanup.h b/include/linux/cleanup.h index 53f1a7a932b0..9f1a9c455b68 100644 --- a/include/linux/cleanup.h +++ b/include/linux/cleanup.h @@ -7,8 +7,9 @@ /* * DEFINE_FREE(name, type, free): * simple helper macro that defines the required wrapper for a __free() - * based cleanup function. @free is an expression using '_T' to access - * the variable. + * based cleanup function. @free is an expression using '_T' to access the + * variable. @free should typically include a NULL test before calling a + * function, see the example below. * * __free(name): * variable attribute to add a scoped based cleanup to the variable. @@ -17,6 +18,9 @@ * like a non-atomic xchg(var, NULL), such that the cleanup function will * be inhibited -- provided it sanely deals with a NULL value. * + * NOTE: this has __must_check semantics so that it is harder to accidentally + * leak the resource. + * * return_ptr(p): * returns p while inhibiting the __free(). * @@ -24,6 +28,8 @@ * * DEFINE_FREE(kfree, void *, if (_T) kfree(_T)) * + * void *alloc_obj(...) + * { * struct obj *p __free(kfree) = kmalloc(...); * if (!p) * return NULL; @@ -32,6 +38,24 @@ * return NULL; * * return_ptr(p); + * } + * + * NOTE: the DEFINE_FREE()'s @free expression includes a NULL test even though + * kfree() is fine to be called with a NULL value. This is on purpose. This way + * the compiler sees the end of our alloc_obj() function as: + * + * tmp = p; + * p = NULL; + * if (p) + * kfree(p); + * return tmp; + * + * And through the magic of value-propagation and dead-code-elimination, it + * eliminates the actual cleanup call and compiles into: + * + * return p; + * + * Without the NULL test it turns into a mess and the compiler can't help us. */ #define DEFINE_FREE(_name, _type, _free) \ @@ -39,8 +63,17 @@ #define __free(_name) __cleanup(__free_##_name) +#define __get_and_null_ptr(p) \ + ({ __auto_type __ptr = &(p); \ + __auto_type __val = *__ptr; \ + *__ptr = NULL; __val; }) + +static inline __must_check +const volatile void * __must_check_fn(const volatile void *val) +{ return val; } + #define no_free_ptr(p) \ - ({ __auto_type __ptr = (p); (p) = NULL; __ptr; }) + ((typeof(p)) __must_check_fn(__get_and_null_ptr(p))) #define return_ptr(p) return no_free_ptr(p) -- cgit v1.2.3 From 6b596e62ed9f90c4a97e68ae1f7b1af5beeb3c05 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 8 Sep 2023 18:22:51 +0200 Subject: sched: Provide rt_mutex specific scheduler helpers With PREEMPT_RT there is a rt_mutex recursion problem where sched_submit_work() can use an rtlock (aka spinlock_t). More specifically what happens is: mutex_lock() /* really rt_mutex */ ... __rt_mutex_slowlock_locked() task_blocks_on_rt_mutex() // enqueue current task as waiter // do PI chain walk rt_mutex_slowlock_block() schedule() sched_submit_work() ... spin_lock() /* really rtlock */ ... __rt_mutex_slowlock_locked() task_blocks_on_rt_mutex() // enqueue current task as waiter *AGAIN* // *CONFUSION* Fix this by making rt_mutex do the sched_submit_work() early, before it enqueues itself as a waiter -- before it even knows *if* it will wait. [[ basically Thomas' patch but with different naming and a few asserts added ]] Originally-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20230908162254.999499-5-bigeasy@linutronix.de --- include/linux/sched.h | 3 +++ include/linux/sched/rt.h | 4 ++++ kernel/sched/core.c | 36 ++++++++++++++++++++++++++++++++---- 3 files changed, 39 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 77f01ac385f7..67623ffd4a8e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -911,6 +911,9 @@ struct task_struct { * ->sched_remote_wakeup gets used, so it can be in this word. */ unsigned sched_remote_wakeup:1; +#ifdef CONFIG_RT_MUTEXES + unsigned sched_rt_mutex:1; +#endif /* Bit to tell LSMs we're in execve(): */ unsigned in_execve:1; diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h index 994c25640e15..b2b9e6eb9683 100644 --- a/include/linux/sched/rt.h +++ b/include/linux/sched/rt.h @@ -30,6 +30,10 @@ static inline bool task_is_realtime(struct task_struct *tsk) } #ifdef CONFIG_RT_MUTEXES +extern void rt_mutex_pre_schedule(void); +extern void rt_mutex_schedule(void); +extern void rt_mutex_post_schedule(void); + /* * Must hold either p->pi_lock or task_rq(p)->lock. */ diff --git a/kernel/sched/core.c b/kernel/sched/core.c index 1ea7ba53aad2..58d0346d1bb3 100644 --- a/kernel/sched/core.c +++ b/kernel/sched/core.c @@ -6723,9 +6723,6 @@ static inline void sched_submit_work(struct task_struct *tsk) static DEFINE_WAIT_OVERRIDE_MAP(sched_map, LD_WAIT_CONFIG); unsigned int task_flags; - if (task_is_running(tsk)) - return; - /* * Establish LD_WAIT_CONFIG context to ensure none of the code called * will use a blocking primitive -- which would lead to recursion. @@ -6783,7 +6780,12 @@ asmlinkage __visible void __sched schedule(void) { struct task_struct *tsk = current; - sched_submit_work(tsk); +#ifdef CONFIG_RT_MUTEXES + lockdep_assert(!tsk->sched_rt_mutex); +#endif + + if (!task_is_running(tsk)) + sched_submit_work(tsk); __schedule_loop(SM_NONE); sched_update_worker(tsk); } @@ -7044,6 +7046,32 @@ static void __setscheduler_prio(struct task_struct *p, int prio) #ifdef CONFIG_RT_MUTEXES +/* + * Would be more useful with typeof()/auto_type but they don't mix with + * bit-fields. Since it's a local thing, use int. Keep the generic sounding + * name such that if someone were to implement this function we get to compare + * notes. + */ +#define fetch_and_set(x, v) ({ int _x = (x); (x) = (v); _x; }) + +void rt_mutex_pre_schedule(void) +{ + lockdep_assert(!fetch_and_set(current->sched_rt_mutex, 1)); + sched_submit_work(current); +} + +void rt_mutex_schedule(void) +{ + lockdep_assert(current->sched_rt_mutex); + __schedule_loop(SM_NONE); +} + +void rt_mutex_post_schedule(void) +{ + sched_update_worker(current); + lockdep_assert(fetch_and_set(current->sched_rt_mutex, 0)); +} + static inline int __rt_effective_prio(struct task_struct *pi_task, int prio) { if (pi_task) -- cgit v1.2.3 From 9f6c532f59b20580acf8ede9409c9b8dce6e74e1 Mon Sep 17 00:00:00 2001 From: "peterz@infradead.org" Date: Thu, 21 Sep 2023 12:45:10 +0200 Subject: futex: Add sys_futex_wake() To complement sys_futex_waitv() add sys_futex_wake(). This syscall implements what was previously known as FUTEX_WAKE_BITSET except it uses 'unsigned long' for the bitmask and takes FUTEX2 flags. The 'unsigned long' allows FUTEX2_SIZE_U64 on 64bit platforms. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Thomas Gleixner Acked-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20230921105247.936205525@noisy.programming.kicks-ass.net --- arch/alpha/kernel/syscalls/syscall.tbl | 1 + arch/arm/tools/syscall.tbl | 1 + arch/arm64/include/asm/unistd.h | 2 +- arch/arm64/include/asm/unistd32.h | 2 ++ arch/ia64/kernel/syscalls/syscall.tbl | 1 + arch/m68k/kernel/syscalls/syscall.tbl | 1 + arch/microblaze/kernel/syscalls/syscall.tbl | 1 + arch/mips/kernel/syscalls/syscall_n32.tbl | 1 + arch/mips/kernel/syscalls/syscall_n64.tbl | 1 + arch/mips/kernel/syscalls/syscall_o32.tbl | 1 + arch/parisc/kernel/syscalls/syscall.tbl | 1 + arch/powerpc/kernel/syscalls/syscall.tbl | 1 + arch/s390/kernel/syscalls/syscall.tbl | 1 + arch/sh/kernel/syscalls/syscall.tbl | 1 + arch/sparc/kernel/syscalls/syscall.tbl | 1 + arch/x86/entry/syscalls/syscall_32.tbl | 1 + arch/x86/entry/syscalls/syscall_64.tbl | 1 + arch/xtensa/kernel/syscalls/syscall.tbl | 1 + include/linux/syscalls.h | 3 +++ include/uapi/asm-generic/unistd.h | 4 +++- kernel/futex/syscalls.c | 30 +++++++++++++++++++++++++++++ kernel/sys_ni.c | 1 + 22 files changed, 56 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index ad37569d0507..3b86519d68e4 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -492,3 +492,4 @@ 560 common set_mempolicy_home_node sys_ni_syscall 561 common cachestat sys_cachestat 562 common fchmodat2 sys_fchmodat2 +563 common futex_wake sys_futex_wake diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index c572d6c3dee0..714abeb1e6fa 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -466,3 +466,4 @@ 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 +454 common futex_wake sys_futex_wake diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index bd77253b62e0..63a8a9c4abc1 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -39,7 +39,7 @@ #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5) #define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800) -#define __NR_compat_syscalls 453 +#define __NR_compat_syscalls 455 #endif #define __ARCH_WANT_SYS_CLONE diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 78b68311ec81..68974683737b 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -911,6 +911,8 @@ __SYSCALL(__NR_set_mempolicy_home_node, sys_set_mempolicy_home_node) __SYSCALL(__NR_cachestat, sys_cachestat) #define __NR_fchmodat2 452 __SYSCALL(__NR_fchmodat2, sys_fchmodat2) +#define __NR_futex_wake 454 +__SYSCALL(__NR_futex_wake, sys_futex_wake) /* * Please add new compat syscalls above this comment and update diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl index 83d8609aec03..cd50247508e6 100644 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -373,3 +373,4 @@ 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 +454 common futex_wake sys_futex_wake diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index 259ceb125367..21eb35c693e1 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -452,3 +452,4 @@ 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 +454 common futex_wake sys_futex_wake diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl index a3798c2637fd..3a4e8513a8e1 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -458,3 +458,4 @@ 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 +454 common futex_wake sys_futex_wake diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index 152034b8e0a0..6883ea3b830d 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -391,3 +391,4 @@ 450 n32 set_mempolicy_home_node sys_set_mempolicy_home_node 451 n32 cachestat sys_cachestat 452 n32 fchmodat2 sys_fchmodat2 +454 n32 futex_wake sys_futex_wake diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index cb5e757f6621..48bc0fb4e3dc 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -367,3 +367,4 @@ 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 n64 cachestat sys_cachestat 452 n64 fchmodat2 sys_fchmodat2 +454 n64 futex_wake sys_futex_wake diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 1a646813afdc..a92625f5bad8 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -440,3 +440,4 @@ 450 o32 set_mempolicy_home_node sys_set_mempolicy_home_node 451 o32 cachestat sys_cachestat 452 o32 fchmodat2 sys_fchmodat2 +454 o32 futex_wake sys_futex_wake diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index e97c175b56f9..57faa9786ffe 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -451,3 +451,4 @@ 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 +454 common futex_wake sys_futex_wake diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index 20e50586e8a2..e6c6ed6b30ee 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -539,3 +539,4 @@ 450 nospu set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 +454 common futex_wake sys_futex_wake diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index 0122cc156952..754720154dc1 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -455,3 +455,4 @@ 450 common set_mempolicy_home_node sys_set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 sys_fchmodat2 +454 common futex_wake sys_futex_wake sys_futex_wake diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index e90d585c4d3e..902a997e7ec6 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -455,3 +455,4 @@ 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 +454 common futex_wake sys_futex_wake diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index 4ed06c71c43f..8a1f887c8be6 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -498,3 +498,4 @@ 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 +454 common futex_wake sys_futex_wake diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 2d0b1bd866ea..9e81323979b0 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -457,3 +457,4 @@ 450 i386 set_mempolicy_home_node sys_set_mempolicy_home_node 451 i386 cachestat sys_cachestat 452 i386 fchmodat2 sys_fchmodat2 +454 i386 futex_wake sys_futex_wake diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index 1d6eee30eceb..d10a6003a7c9 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -375,6 +375,7 @@ 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 453 64 map_shadow_stack sys_map_shadow_stack +454 common futex_wake sys_futex_wake # # Due to a historical design error, certain syscalls are numbered differently diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl index fc1a4f3c81d9..4e511bfd4b8f 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -423,3 +423,4 @@ 450 common set_mempolicy_home_node sys_set_mempolicy_home_node 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 +454 common futex_wake sys_futex_wake diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 22bc6bc147f8..e174ed86da1d 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -549,6 +549,9 @@ asmlinkage long sys_set_robust_list(struct robust_list_head __user *head, asmlinkage long sys_futex_waitv(struct futex_waitv *waiters, unsigned int nr_futexes, unsigned int flags, struct __kernel_timespec __user *timeout, clockid_t clockid); + +asmlinkage long sys_futex_wake(void __user *uaddr, unsigned long mask, int nr, unsigned int flags); + asmlinkage long sys_nanosleep(struct __kernel_timespec __user *rqtp, struct __kernel_timespec __user *rmtp); asmlinkage long sys_nanosleep_time32(struct old_timespec32 __user *rqtp, diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index abe087c53b4b..f5454e6f4c6f 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -822,9 +822,11 @@ __SYSCALL(__NR_cachestat, sys_cachestat) #define __NR_fchmodat2 452 __SYSCALL(__NR_fchmodat2, sys_fchmodat2) +#define __NR_futex_wake 454 +__SYSCALL(__NR_futex_wake, sys_futex_wake) #undef __NR_syscalls -#define __NR_syscalls 453 +#define __NR_syscalls 455 /* * 32 bit systems traditionally used different diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c index 2339f9ccee7f..7049a52ef68e 100644 --- a/kernel/futex/syscalls.c +++ b/kernel/futex/syscalls.c @@ -306,6 +306,36 @@ destroy_timer: return ret; } +/* + * sys_futex_wake - Wake a number of futexes + * @uaddr: Address of the futex(es) to wake + * @mask: bitmask + * @nr: Number of the futexes to wake + * @flags: FUTEX2 flags + * + * Identical to the traditional FUTEX_WAKE_BITSET op, except it is part of the + * futex2 family of calls. + */ + +SYSCALL_DEFINE4(futex_wake, + void __user *, uaddr, + unsigned long, mask, + int, nr, + unsigned int, flags) +{ + if (flags & ~FUTEX2_VALID_MASK) + return -EINVAL; + + flags = futex2_to_flags(flags); + if (!futex_flags_valid(flags)) + return -EINVAL; + + if (!futex_validate_input(flags, mask)) + return -EINVAL; + + return futex_wake(uaddr, flags, nr, mask); +} + #ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE2(set_robust_list, struct compat_robust_list_head __user *, head, diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index e137c1385c56..983c0583c627 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -87,6 +87,7 @@ COND_SYSCALL_COMPAT(set_robust_list); COND_SYSCALL(get_robust_list); COND_SYSCALL_COMPAT(get_robust_list); COND_SYSCALL(futex_waitv); +COND_SYSCALL(futex_wake); COND_SYSCALL(kexec_load); COND_SYSCALL_COMPAT(kexec_load); COND_SYSCALL(init_module); -- cgit v1.2.3 From cb8c4312afca1b2dc64107e7e7cea81911055612 Mon Sep 17 00:00:00 2001 From: "peterz@infradead.org" Date: Thu, 21 Sep 2023 12:45:12 +0200 Subject: futex: Add sys_futex_wait() To complement sys_futex_waitv()/wake(), add sys_futex_wait(). This syscall implements what was previously known as FUTEX_WAIT_BITSET except it uses 'unsigned long' for the value and bitmask arguments, takes timespec and clockid_t arguments for the absolute timeout and uses FUTEX2 flags. The 'unsigned long' allows FUTEX2_SIZE_U64 on 64bit platforms. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Thomas Gleixner Acked-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20230921105248.164324363@noisy.programming.kicks-ass.net --- arch/alpha/kernel/syscalls/syscall.tbl | 1 + arch/arm/tools/syscall.tbl | 1 + arch/arm64/include/asm/unistd.h | 2 +- arch/arm64/include/asm/unistd32.h | 2 + arch/ia64/kernel/syscalls/syscall.tbl | 1 + arch/m68k/kernel/syscalls/syscall.tbl | 1 + arch/microblaze/kernel/syscalls/syscall.tbl | 1 + arch/mips/kernel/syscalls/syscall_n32.tbl | 1 + arch/mips/kernel/syscalls/syscall_n64.tbl | 1 + arch/mips/kernel/syscalls/syscall_o32.tbl | 1 + arch/parisc/kernel/syscalls/syscall.tbl | 1 + arch/powerpc/kernel/syscalls/syscall.tbl | 1 + arch/s390/kernel/syscalls/syscall.tbl | 1 + arch/sh/kernel/syscalls/syscall.tbl | 1 + arch/sparc/kernel/syscalls/syscall.tbl | 1 + arch/x86/entry/syscalls/syscall_32.tbl | 1 + arch/x86/entry/syscalls/syscall_64.tbl | 1 + arch/xtensa/kernel/syscalls/syscall.tbl | 1 + include/linux/syscalls.h | 4 + include/uapi/asm-generic/unistd.h | 4 +- kernel/futex/futex.h | 3 + kernel/futex/syscalls.c | 120 +++++++++++++++++++++------- kernel/futex/waitwake.c | 61 ++++++++------ kernel/sys_ni.c | 1 + 24 files changed, 156 insertions(+), 57 deletions(-) (limited to 'include/linux') diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index 3b86519d68e4..c49f12fd264e 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -493,3 +493,4 @@ 561 common cachestat sys_cachestat 562 common fchmodat2 sys_fchmodat2 563 common futex_wake sys_futex_wake +564 common futex_wait sys_futex_wait diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index 714abeb1e6fa..a6cf56277327 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -467,3 +467,4 @@ 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index 63a8a9c4abc1..f33190f17ebb 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -39,7 +39,7 @@ #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5) #define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800) -#define __NR_compat_syscalls 455 +#define __NR_compat_syscalls 456 #endif #define __ARCH_WANT_SYS_CLONE diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 68974683737b..6e7d37282ba1 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -913,6 +913,8 @@ __SYSCALL(__NR_cachestat, sys_cachestat) __SYSCALL(__NR_fchmodat2, sys_fchmodat2) #define __NR_futex_wake 454 __SYSCALL(__NR_futex_wake, sys_futex_wake) +#define __NR_futex_wait 455 +__SYSCALL(__NR_futex_wait, sys_futex_wait) /* * Please add new compat syscalls above this comment and update diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl index cd50247508e6..4043f0c55170 100644 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -374,3 +374,4 @@ 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index 21eb35c693e1..24841674acc5 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -453,3 +453,4 @@ 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl index 3a4e8513a8e1..f03927ab0220 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -459,3 +459,4 @@ 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index 6883ea3b830d..dbb5edfb667b 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -392,3 +392,4 @@ 451 n32 cachestat sys_cachestat 452 n32 fchmodat2 sys_fchmodat2 454 n32 futex_wake sys_futex_wake +455 n32 futex_wait sys_futex_wait diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index 48bc0fb4e3dc..faff8dfd2983 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -368,3 +368,4 @@ 451 n64 cachestat sys_cachestat 452 n64 fchmodat2 sys_fchmodat2 454 n64 futex_wake sys_futex_wake +455 n64 futex_wait sys_futex_wait diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index a92625f5bad8..542f75605b3e 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -441,3 +441,4 @@ 451 o32 cachestat sys_cachestat 452 o32 fchmodat2 sys_fchmodat2 454 o32 futex_wake sys_futex_wake +455 o32 futex_wait sys_futex_wait diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index 57faa9786ffe..8e50e89551f7 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -452,3 +452,4 @@ 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index e6c6ed6b30ee..ad33a9993a6a 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -540,3 +540,4 @@ 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index 754720154dc1..418853fd2a6b 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -456,3 +456,4 @@ 451 common cachestat sys_cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait sys_futex_wait diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index 902a997e7ec6..8ef9557d2779 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -456,3 +456,4 @@ 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index 8a1f887c8be6..df59a9d5f109 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -499,3 +499,4 @@ 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 9e81323979b0..0f6616822bd5 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -458,3 +458,4 @@ 451 i386 cachestat sys_cachestat 452 i386 fchmodat2 sys_fchmodat2 454 i386 futex_wake sys_futex_wake +455 i386 futex_wait sys_futex_wait diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index d10a6003a7c9..ddf6288823ad 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -376,6 +376,7 @@ 452 common fchmodat2 sys_fchmodat2 453 64 map_shadow_stack sys_map_shadow_stack 454 common futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait # # Due to a historical design error, certain syscalls are numbered differently diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl index 4e511bfd4b8f..ac278dbce2ee 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -424,3 +424,4 @@ 451 common cachestat sys_cachestat 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake +455 common futex_wait sys_futex_wait diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index e174ed86da1d..11f3fdd1ee03 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -552,6 +552,10 @@ asmlinkage long sys_futex_waitv(struct futex_waitv *waiters, asmlinkage long sys_futex_wake(void __user *uaddr, unsigned long mask, int nr, unsigned int flags); +asmlinkage long sys_futex_wait(void __user *uaddr, unsigned long val, unsigned long mask, + unsigned int flags, struct __kernel_timespec __user *timespec, + clockid_t clockid); + asmlinkage long sys_nanosleep(struct __kernel_timespec __user *rqtp, struct __kernel_timespec __user *rmtp); asmlinkage long sys_nanosleep_time32(struct old_timespec32 __user *rqtp, diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index f5454e6f4c6f..f6553bd5d213 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -824,9 +824,11 @@ __SYSCALL(__NR_cachestat, sys_cachestat) __SYSCALL(__NR_fchmodat2, sys_fchmodat2) #define __NR_futex_wake 454 __SYSCALL(__NR_futex_wake, sys_futex_wake) +#define __NR_futex_wait 455 +__SYSCALL(__NR_futex_wait, sys_futex_wait) #undef __NR_syscalls -#define __NR_syscalls 455 +#define __NR_syscalls 456 /* * 32 bit systems traditionally used different diff --git a/kernel/futex/futex.h b/kernel/futex/futex.h index 0e7821a944a2..e74888a7d71d 100644 --- a/kernel/futex/futex.h +++ b/kernel/futex/futex.h @@ -332,6 +332,9 @@ extern int futex_requeue(u32 __user *uaddr1, unsigned int flags, u32 __user *uaddr2, int nr_wake, int nr_requeue, u32 *cmpval, int requeue_pi); +extern int __futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, + struct hrtimer_sleeper *to, u32 bitset); + extern int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, ktime_t *abs_time, u32 bitset); diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c index 47398926765e..e4c8ec713787 100644 --- a/kernel/futex/syscalls.c +++ b/kernel/futex/syscalls.c @@ -221,6 +221,46 @@ static int futex_parse_waitv(struct futex_vector *futexv, return 0; } +static int futex2_setup_timeout(struct __kernel_timespec __user *timeout, + clockid_t clockid, struct hrtimer_sleeper *to) +{ + int flag_clkid = 0, flag_init = 0; + struct timespec64 ts; + ktime_t time; + int ret; + + if (!timeout) + return 0; + + if (clockid == CLOCK_REALTIME) { + flag_clkid = FLAGS_CLOCKRT; + flag_init = FUTEX_CLOCK_REALTIME; + } + + if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC) + return -EINVAL; + + if (get_timespec64(&ts, timeout)) + return -EFAULT; + + /* + * Since there's no opcode for futex_waitv, use + * FUTEX_WAIT_BITSET that uses absolute timeout as well + */ + ret = futex_init_timeout(FUTEX_WAIT_BITSET, flag_init, &ts, &time); + if (ret) + return ret; + + futex_setup_timer(&time, to, flag_clkid, 0); + return 0; +} + +static inline void futex2_destroy_timeout(struct hrtimer_sleeper *to) +{ + hrtimer_cancel(&to->timer); + destroy_hrtimer_on_stack(&to->timer); +} + /** * sys_futex_waitv - Wait on a list of futexes * @waiters: List of futexes to wait on @@ -250,8 +290,6 @@ SYSCALL_DEFINE5(futex_waitv, struct futex_waitv __user *, waiters, { struct hrtimer_sleeper to; struct futex_vector *futexv; - struct timespec64 ts; - ktime_t time; int ret; /* This syscall supports no flags for now */ @@ -261,30 +299,8 @@ SYSCALL_DEFINE5(futex_waitv, struct futex_waitv __user *, waiters, if (!nr_futexes || nr_futexes > FUTEX_WAITV_MAX || !waiters) return -EINVAL; - if (timeout) { - int flag_clkid = 0, flag_init = 0; - - if (clockid == CLOCK_REALTIME) { - flag_clkid = FLAGS_CLOCKRT; - flag_init = FUTEX_CLOCK_REALTIME; - } - - if (clockid != CLOCK_REALTIME && clockid != CLOCK_MONOTONIC) - return -EINVAL; - - if (get_timespec64(&ts, timeout)) - return -EFAULT; - - /* - * Since there's no opcode for futex_waitv, use - * FUTEX_WAIT_BITSET that uses absolute timeout as well - */ - ret = futex_init_timeout(FUTEX_WAIT_BITSET, flag_init, &ts, &time); - if (ret) - return ret; - - futex_setup_timer(&time, &to, flag_clkid, 0); - } + if (timeout && (ret = futex2_setup_timeout(timeout, clockid, &to))) + return ret; futexv = kcalloc(nr_futexes, sizeof(*futexv), GFP_KERNEL); if (!futexv) { @@ -299,10 +315,8 @@ SYSCALL_DEFINE5(futex_waitv, struct futex_waitv __user *, waiters, kfree(futexv); destroy_timer: - if (timeout) { - hrtimer_cancel(&to.timer); - destroy_hrtimer_on_stack(&to.timer); - } + if (timeout) + futex2_destroy_timeout(&to); return ret; } @@ -336,6 +350,52 @@ SYSCALL_DEFINE4(futex_wake, return futex_wake(uaddr, FLAGS_STRICT | flags, nr, mask); } +/* + * sys_futex_wait - Wait on a futex + * @uaddr: Address of the futex to wait on + * @val: Value of @uaddr + * @mask: bitmask + * @flags: FUTEX2 flags + * @timeout: Optional absolute timeout + * @clockid: Clock to be used for the timeout, realtime or monotonic + * + * Identical to the traditional FUTEX_WAIT_BITSET op, except it is part of the + * futex2 familiy of calls. + */ + +SYSCALL_DEFINE6(futex_wait, + void __user *, uaddr, + unsigned long, val, + unsigned long, mask, + unsigned int, flags, + struct __kernel_timespec __user *, timeout, + clockid_t, clockid) +{ + struct hrtimer_sleeper to; + int ret; + + if (flags & ~FUTEX2_VALID_MASK) + return -EINVAL; + + flags = futex2_to_flags(flags); + if (!futex_flags_valid(flags)) + return -EINVAL; + + if (!futex_validate_input(flags, val) || + !futex_validate_input(flags, mask)) + return -EINVAL; + + if (timeout && (ret = futex2_setup_timeout(timeout, clockid, &to))) + return ret; + + ret = __futex_wait(uaddr, flags, val, timeout ? &to : NULL, mask); + + if (timeout) + futex2_destroy_timeout(&to); + + return ret; +} + #ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE2(set_robust_list, struct compat_robust_list_head __user *, head, diff --git a/kernel/futex/waitwake.c b/kernel/futex/waitwake.c index ceb05b876597..b109a0810a2c 100644 --- a/kernel/futex/waitwake.c +++ b/kernel/futex/waitwake.c @@ -632,20 +632,18 @@ retry_private: return ret; } -int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, ktime_t *abs_time, u32 bitset) +int __futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, + struct hrtimer_sleeper *to, u32 bitset) { - struct hrtimer_sleeper timeout, *to; - struct restart_block *restart; - struct futex_hash_bucket *hb; struct futex_q q = futex_q_init; + struct futex_hash_bucket *hb; int ret; if (!bitset) return -EINVAL; + q.bitset = bitset; - to = futex_setup_timer(abs_time, &timeout, flags, - current->timer_slack_ns); retry: /* * Prepare to wait on uaddr. On success, it holds hb->lock and q @@ -653,18 +651,17 @@ retry: */ ret = futex_wait_setup(uaddr, val, flags, &q, &hb); if (ret) - goto out; + return ret; /* futex_queue and wait for wakeup, timeout, or a signal. */ futex_wait_queue(hb, &q, to); /* If we were woken (and unqueued), we succeeded, whatever. */ - ret = 0; if (!futex_unqueue(&q)) - goto out; - ret = -ETIMEDOUT; + return 0; + if (to && !to->task) - goto out; + return -ETIMEDOUT; /* * We expect signal_pending(current), but we might be the @@ -673,24 +670,38 @@ retry: if (!signal_pending(current)) goto retry; - ret = -ERESTARTSYS; - if (!abs_time) - goto out; + return -ERESTARTSYS; +} - restart = ¤t->restart_block; - restart->futex.uaddr = uaddr; - restart->futex.val = val; - restart->futex.time = *abs_time; - restart->futex.bitset = bitset; - restart->futex.flags = flags | FLAGS_HAS_TIMEOUT; +int futex_wait(u32 __user *uaddr, unsigned int flags, u32 val, ktime_t *abs_time, u32 bitset) +{ + struct hrtimer_sleeper timeout, *to; + struct restart_block *restart; + int ret; + + to = futex_setup_timer(abs_time, &timeout, flags, + current->timer_slack_ns); + + ret = __futex_wait(uaddr, flags, val, to, bitset); + + /* No timeout, nothing to clean up. */ + if (!to) + return ret; + + hrtimer_cancel(&to->timer); + destroy_hrtimer_on_stack(&to->timer); - ret = set_restart_fn(restart, futex_wait_restart); + if (ret == -ERESTARTSYS) { + restart = ¤t->restart_block; + restart->futex.uaddr = uaddr; + restart->futex.val = val; + restart->futex.time = *abs_time; + restart->futex.bitset = bitset; + restart->futex.flags = flags | FLAGS_HAS_TIMEOUT; -out: - if (to) { - hrtimer_cancel(&to->timer); - destroy_hrtimer_on_stack(&to->timer); + return set_restart_fn(restart, futex_wait_restart); } + return ret; } diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 983c0583c627..13df391194e2 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -88,6 +88,7 @@ COND_SYSCALL(get_robust_list); COND_SYSCALL_COMPAT(get_robust_list); COND_SYSCALL(futex_waitv); COND_SYSCALL(futex_wake); +COND_SYSCALL(futex_wait); COND_SYSCALL(kexec_load); COND_SYSCALL_COMPAT(kexec_load); COND_SYSCALL(init_module); -- cgit v1.2.3 From 0f4b5f972216782a4acb1ae00dcb55173847c2ff Mon Sep 17 00:00:00 2001 From: "peterz@infradead.org" Date: Thu, 21 Sep 2023 12:45:15 +0200 Subject: futex: Add sys_futex_requeue() Finish off the 'simple' futex2 syscall group by adding sys_futex_requeue(). Unlike sys_futex_{wait,wake}() its arguments are too numerous to fit into a regular syscall. As such, use struct futex_waitv to pass the 'source' and 'destination' futexes to the syscall. This syscall implements what was previously known as FUTEX_CMP_REQUEUE and uses {val, uaddr, flags} for source and {uaddr, flags} for destination. This design explicitly allows requeueing between different types of futex by having a different flags word per uaddr. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Thomas Gleixner Acked-by: Geert Uytterhoeven Link: https://lore.kernel.org/r/20230921105248.511860556@noisy.programming.kicks-ass.net --- arch/alpha/kernel/syscalls/syscall.tbl | 1 + arch/arm/tools/syscall.tbl | 1 + arch/arm64/include/asm/unistd.h | 2 +- arch/arm64/include/asm/unistd32.h | 2 ++ arch/ia64/kernel/syscalls/syscall.tbl | 1 + arch/m68k/kernel/syscalls/syscall.tbl | 1 + arch/microblaze/kernel/syscalls/syscall.tbl | 1 + arch/mips/kernel/syscalls/syscall_n32.tbl | 1 + arch/mips/kernel/syscalls/syscall_n64.tbl | 1 + arch/mips/kernel/syscalls/syscall_o32.tbl | 1 + arch/parisc/kernel/syscalls/syscall.tbl | 1 + arch/powerpc/kernel/syscalls/syscall.tbl | 1 + arch/s390/kernel/syscalls/syscall.tbl | 1 + arch/sh/kernel/syscalls/syscall.tbl | 1 + arch/sparc/kernel/syscalls/syscall.tbl | 1 + arch/x86/entry/syscalls/syscall_32.tbl | 1 + arch/x86/entry/syscalls/syscall_64.tbl | 1 + arch/xtensa/kernel/syscalls/syscall.tbl | 1 + include/linux/syscalls.h | 3 +++ include/uapi/asm-generic/unistd.h | 4 ++- kernel/futex/syscalls.c | 38 +++++++++++++++++++++++++++++ kernel/sys_ni.c | 1 + 22 files changed, 64 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/arch/alpha/kernel/syscalls/syscall.tbl b/arch/alpha/kernel/syscalls/syscall.tbl index c49f12fd264e..b1865f9bb31e 100644 --- a/arch/alpha/kernel/syscalls/syscall.tbl +++ b/arch/alpha/kernel/syscalls/syscall.tbl @@ -494,3 +494,4 @@ 562 common fchmodat2 sys_fchmodat2 563 common futex_wake sys_futex_wake 564 common futex_wait sys_futex_wait +565 common futex_requeue sys_futex_requeue diff --git a/arch/arm/tools/syscall.tbl b/arch/arm/tools/syscall.tbl index a6cf56277327..93d0d46cbb15 100644 --- a/arch/arm/tools/syscall.tbl +++ b/arch/arm/tools/syscall.tbl @@ -468,3 +468,4 @@ 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue diff --git a/arch/arm64/include/asm/unistd.h b/arch/arm64/include/asm/unistd.h index f33190f17ebb..531effca5f1f 100644 --- a/arch/arm64/include/asm/unistd.h +++ b/arch/arm64/include/asm/unistd.h @@ -39,7 +39,7 @@ #define __ARM_NR_compat_set_tls (__ARM_NR_COMPAT_BASE + 5) #define __ARM_NR_COMPAT_END (__ARM_NR_COMPAT_BASE + 0x800) -#define __NR_compat_syscalls 456 +#define __NR_compat_syscalls 457 #endif #define __ARCH_WANT_SYS_CLONE diff --git a/arch/arm64/include/asm/unistd32.h b/arch/arm64/include/asm/unistd32.h index 6e7d37282ba1..c453291154fd 100644 --- a/arch/arm64/include/asm/unistd32.h +++ b/arch/arm64/include/asm/unistd32.h @@ -915,6 +915,8 @@ __SYSCALL(__NR_fchmodat2, sys_fchmodat2) __SYSCALL(__NR_futex_wake, sys_futex_wake) #define __NR_futex_wait 455 __SYSCALL(__NR_futex_wait, sys_futex_wait) +#define __NR_futex_requeue 456 +__SYSCALL(__NR_futex_requeue, sys_futex_requeue) /* * Please add new compat syscalls above this comment and update diff --git a/arch/ia64/kernel/syscalls/syscall.tbl b/arch/ia64/kernel/syscalls/syscall.tbl index 4043f0c55170..81375ea78288 100644 --- a/arch/ia64/kernel/syscalls/syscall.tbl +++ b/arch/ia64/kernel/syscalls/syscall.tbl @@ -375,3 +375,4 @@ 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue diff --git a/arch/m68k/kernel/syscalls/syscall.tbl b/arch/m68k/kernel/syscalls/syscall.tbl index 24841674acc5..f7f997a88bab 100644 --- a/arch/m68k/kernel/syscalls/syscall.tbl +++ b/arch/m68k/kernel/syscalls/syscall.tbl @@ -454,3 +454,4 @@ 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue diff --git a/arch/microblaze/kernel/syscalls/syscall.tbl b/arch/microblaze/kernel/syscalls/syscall.tbl index f03927ab0220..2967ec26b978 100644 --- a/arch/microblaze/kernel/syscalls/syscall.tbl +++ b/arch/microblaze/kernel/syscalls/syscall.tbl @@ -460,3 +460,4 @@ 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue diff --git a/arch/mips/kernel/syscalls/syscall_n32.tbl b/arch/mips/kernel/syscalls/syscall_n32.tbl index dbb5edfb667b..383abb1713f4 100644 --- a/arch/mips/kernel/syscalls/syscall_n32.tbl +++ b/arch/mips/kernel/syscalls/syscall_n32.tbl @@ -393,3 +393,4 @@ 452 n32 fchmodat2 sys_fchmodat2 454 n32 futex_wake sys_futex_wake 455 n32 futex_wait sys_futex_wait +456 n32 futex_requeue sys_futex_requeue diff --git a/arch/mips/kernel/syscalls/syscall_n64.tbl b/arch/mips/kernel/syscalls/syscall_n64.tbl index faff8dfd2983..c9bd09ba905f 100644 --- a/arch/mips/kernel/syscalls/syscall_n64.tbl +++ b/arch/mips/kernel/syscalls/syscall_n64.tbl @@ -369,3 +369,4 @@ 452 n64 fchmodat2 sys_fchmodat2 454 n64 futex_wake sys_futex_wake 455 n64 futex_wait sys_futex_wait +456 n64 futex_requeue sys_futex_requeue diff --git a/arch/mips/kernel/syscalls/syscall_o32.tbl b/arch/mips/kernel/syscalls/syscall_o32.tbl index 542f75605b3e..ba5ef6cea97a 100644 --- a/arch/mips/kernel/syscalls/syscall_o32.tbl +++ b/arch/mips/kernel/syscalls/syscall_o32.tbl @@ -442,3 +442,4 @@ 452 o32 fchmodat2 sys_fchmodat2 454 o32 futex_wake sys_futex_wake 455 o32 futex_wait sys_futex_wait +456 o32 futex_requeue sys_futex_requeue diff --git a/arch/parisc/kernel/syscalls/syscall.tbl b/arch/parisc/kernel/syscalls/syscall.tbl index 8e50e89551f7..9f0f6df55361 100644 --- a/arch/parisc/kernel/syscalls/syscall.tbl +++ b/arch/parisc/kernel/syscalls/syscall.tbl @@ -453,3 +453,4 @@ 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue diff --git a/arch/powerpc/kernel/syscalls/syscall.tbl b/arch/powerpc/kernel/syscalls/syscall.tbl index ad33a9993a6a..26fc41904266 100644 --- a/arch/powerpc/kernel/syscalls/syscall.tbl +++ b/arch/powerpc/kernel/syscalls/syscall.tbl @@ -541,3 +541,4 @@ 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue diff --git a/arch/s390/kernel/syscalls/syscall.tbl b/arch/s390/kernel/syscalls/syscall.tbl index 418853fd2a6b..31be90b241f7 100644 --- a/arch/s390/kernel/syscalls/syscall.tbl +++ b/arch/s390/kernel/syscalls/syscall.tbl @@ -457,3 +457,4 @@ 452 common fchmodat2 sys_fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue sys_futex_requeue diff --git a/arch/sh/kernel/syscalls/syscall.tbl b/arch/sh/kernel/syscalls/syscall.tbl index 8ef9557d2779..4bc5d488ab17 100644 --- a/arch/sh/kernel/syscalls/syscall.tbl +++ b/arch/sh/kernel/syscalls/syscall.tbl @@ -457,3 +457,4 @@ 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue diff --git a/arch/sparc/kernel/syscalls/syscall.tbl b/arch/sparc/kernel/syscalls/syscall.tbl index df59a9d5f109..8404c8e50394 100644 --- a/arch/sparc/kernel/syscalls/syscall.tbl +++ b/arch/sparc/kernel/syscalls/syscall.tbl @@ -500,3 +500,4 @@ 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue diff --git a/arch/x86/entry/syscalls/syscall_32.tbl b/arch/x86/entry/syscalls/syscall_32.tbl index 0f6616822bd5..31c48bc2c3d8 100644 --- a/arch/x86/entry/syscalls/syscall_32.tbl +++ b/arch/x86/entry/syscalls/syscall_32.tbl @@ -459,3 +459,4 @@ 452 i386 fchmodat2 sys_fchmodat2 454 i386 futex_wake sys_futex_wake 455 i386 futex_wait sys_futex_wait +456 i386 futex_requeue sys_futex_requeue diff --git a/arch/x86/entry/syscalls/syscall_64.tbl b/arch/x86/entry/syscalls/syscall_64.tbl index ddf6288823ad..a577bb27c16d 100644 --- a/arch/x86/entry/syscalls/syscall_64.tbl +++ b/arch/x86/entry/syscalls/syscall_64.tbl @@ -377,6 +377,7 @@ 453 64 map_shadow_stack sys_map_shadow_stack 454 common futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue # # Due to a historical design error, certain syscalls are numbered differently diff --git a/arch/xtensa/kernel/syscalls/syscall.tbl b/arch/xtensa/kernel/syscalls/syscall.tbl index ac278dbce2ee..dd71ecce8b86 100644 --- a/arch/xtensa/kernel/syscalls/syscall.tbl +++ b/arch/xtensa/kernel/syscalls/syscall.tbl @@ -425,3 +425,4 @@ 452 common fchmodat2 sys_fchmodat2 454 common futex_wake sys_futex_wake 455 common futex_wait sys_futex_wait +456 common futex_requeue sys_futex_requeue diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 11f3fdd1ee03..0901af60d971 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -556,6 +556,9 @@ asmlinkage long sys_futex_wait(void __user *uaddr, unsigned long val, unsigned l unsigned int flags, struct __kernel_timespec __user *timespec, clockid_t clockid); +asmlinkage long sys_futex_requeue(struct futex_waitv __user *waiters, + unsigned int flags, int nr_wake, int nr_requeue); + asmlinkage long sys_nanosleep(struct __kernel_timespec __user *rqtp, struct __kernel_timespec __user *rmtp); asmlinkage long sys_nanosleep_time32(struct old_timespec32 __user *rqtp, diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index f6553bd5d213..d9e9cd13e577 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -826,9 +826,11 @@ __SYSCALL(__NR_fchmodat2, sys_fchmodat2) __SYSCALL(__NR_futex_wake, sys_futex_wake) #define __NR_futex_wait 455 __SYSCALL(__NR_futex_wait, sys_futex_wait) +#define __NR_futex_requeue 456 +__SYSCALL(__NR_futex_requeue, sys_futex_requeue) #undef __NR_syscalls -#define __NR_syscalls 456 +#define __NR_syscalls 457 /* * 32 bit systems traditionally used different diff --git a/kernel/futex/syscalls.c b/kernel/futex/syscalls.c index dde9b74db9af..8200d86d30e1 100644 --- a/kernel/futex/syscalls.c +++ b/kernel/futex/syscalls.c @@ -396,6 +396,44 @@ SYSCALL_DEFINE6(futex_wait, return ret; } +/* + * sys_futex_requeue - Requeue a waiter from one futex to another + * @waiters: array describing the source and destination futex + * @flags: unused + * @nr_wake: number of futexes to wake + * @nr_requeue: number of futexes to requeue + * + * Identical to the traditional FUTEX_CMP_REQUEUE op, except it is part of the + * futex2 family of calls. + */ + +SYSCALL_DEFINE4(futex_requeue, + struct futex_waitv __user *, waiters, + unsigned int, flags, + int, nr_wake, + int, nr_requeue) +{ + struct futex_vector futexes[2]; + u32 cmpval; + int ret; + + if (flags) + return -EINVAL; + + if (!waiters) + return -EINVAL; + + ret = futex_parse_waitv(futexes, waiters, 2); + if (ret) + return ret; + + cmpval = futexes[0].w.val; + + return futex_requeue(u64_to_user_ptr(futexes[0].w.uaddr), futexes[0].w.flags, + u64_to_user_ptr(futexes[1].w.uaddr), futexes[1].w.flags, + nr_wake, nr_requeue, &cmpval, 0); +} + #ifdef CONFIG_COMPAT COMPAT_SYSCALL_DEFINE2(set_robust_list, struct compat_robust_list_head __user *, head, diff --git a/kernel/sys_ni.c b/kernel/sys_ni.c index 13df391194e2..9db51ea373b0 100644 --- a/kernel/sys_ni.c +++ b/kernel/sys_ni.c @@ -89,6 +89,7 @@ COND_SYSCALL_COMPAT(get_robust_list); COND_SYSCALL(futex_waitv); COND_SYSCALL(futex_wake); COND_SYSCALL(futex_wait); +COND_SYSCALL(futex_requeue); COND_SYSCALL(kexec_load); COND_SYSCALL_COMPAT(kexec_load); COND_SYSCALL(init_module); -- cgit v1.2.3 From 0cff993e08a7578e2c1df93a95fc5059f447e7ae Mon Sep 17 00:00:00 2001 From: "pangzizhen001@208suo.com" Date: Thu, 20 Jul 2023 23:45:39 +0800 Subject: locking/seqlock: Fix typo in comment s/the the /the [ mingo: Cleaned up the changelog. ] Signed-off-by: Zizhen Pang Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/70293ecd5bb7a1cd370fd4d95c35f936@208suo.com --- include/linux/seqlock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 987a59d977c5..ea7a58258af6 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -864,7 +864,7 @@ static inline unsigned read_seqretry(const seqlock_t *sl, unsigned start) } /* - * For all seqlock_t write side functions, use the the internal + * For all seqlock_t write side functions, use the internal * do_write_seqcount_begin() instead of generic write_seqcount_begin(). * This way, no redundant lockdep_assert_held() checks are added. */ -- cgit v1.2.3 From e01cc1e8c2ad73cebb980878ede5584e0f2688f7 Mon Sep 17 00:00:00 2001 From: Uros Bizjak Date: Mon, 25 Sep 2023 16:50:23 +0200 Subject: locking/atomic: Add generic support for sync_try_cmpxchg() and its fallback Provide the generic sync_try_cmpxchg() function from the raw_ prefixed version, also adding explicit instrumentation. The patch amends existing scripts to generate sync_try_cmpxchg() locking primitive and its raw_sync_try_cmpxchg() fallback, while leaving existing macros from the try_cmpxchg() family unchanged. The target can define its own arch_sync_try_cmpxchg() to override the generic version of raw_sync_try_cmpxchg(). This allows the target to generate more optimal assembly than the generic version. Additionally, the patch renames two scripts to better reflect whet they really do. Signed-off-by: Uros Bizjak Signed-off-by: Ingo Molnar Cc: Will Deacon Cc: Peter Zijlstra Cc: Boqun Feng Cc: Mark Rutland Cc: Linus Torvalds Cc: linux-kernel@vger.kernel.org --- include/linux/atomic/atomic-arch-fallback.h | 15 ++++++++++++- include/linux/atomic/atomic-instrumented.h | 10 ++++++++- scripts/atomic/gen-atomic-fallback.sh | 33 ++++++++++++++++------------- scripts/atomic/gen-atomic-instrumented.sh | 3 ++- 4 files changed, 43 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/atomic/atomic-arch-fallback.h b/include/linux/atomic/atomic-arch-fallback.h index b83ef19da13d..5e95faa959c4 100644 --- a/include/linux/atomic/atomic-arch-fallback.h +++ b/include/linux/atomic/atomic-arch-fallback.h @@ -428,6 +428,19 @@ extern void raw_cmpxchg128_relaxed_not_implemented(void); #define raw_sync_cmpxchg arch_sync_cmpxchg +#ifdef arch_sync_try_cmpxchg +#define raw_sync_try_cmpxchg arch_sync_try_cmpxchg +#else +#define raw_sync_try_cmpxchg(_ptr, _oldp, _new) \ +({ \ + typeof(*(_ptr)) *___op = (_oldp), ___o = *___op, ___r; \ + ___r = raw_sync_cmpxchg((_ptr), ___o, (_new)); \ + if (unlikely(___r != ___o)) \ + *___op = ___r; \ + likely(___r == ___o); \ +}) +#endif + /** * raw_atomic_read() - atomic load with relaxed ordering * @v: pointer to atomic_t @@ -4649,4 +4662,4 @@ raw_atomic64_dec_if_positive(atomic64_t *v) } #endif /* _LINUX_ATOMIC_FALLBACK_H */ -// 2fdd6702823fa842f9cea57a002e6e4476ae780c +// eec048affea735b8464f58e6d96992101f8f85f1 diff --git a/include/linux/atomic/atomic-instrumented.h b/include/linux/atomic/atomic-instrumented.h index d401b406ef7c..54d7bbe0aeaa 100644 --- a/include/linux/atomic/atomic-instrumented.h +++ b/include/linux/atomic/atomic-instrumented.h @@ -4998,6 +4998,14 @@ atomic_long_dec_if_positive(atomic_long_t *v) raw_try_cmpxchg128_local(__ai_ptr, __ai_oldp, __VA_ARGS__); \ }) +#define sync_try_cmpxchg(ptr, ...) \ +({ \ + typeof(ptr) __ai_ptr = (ptr); \ + kcsan_mb(); \ + instrument_atomic_read_write(__ai_ptr, sizeof(*__ai_ptr)); \ + raw_sync_try_cmpxchg(__ai_ptr, __VA_ARGS__); \ +}) + #endif /* _LINUX_ATOMIC_INSTRUMENTED_H */ -// 1568f875fef72097413caab8339120c065a39aa4 +// 2cc4bc990fef44d3836ec108f11b610f3f438184 diff --git a/scripts/atomic/gen-atomic-fallback.sh b/scripts/atomic/gen-atomic-fallback.sh index a45154cefa48..f80d69cfeb1f 100755 --- a/scripts/atomic/gen-atomic-fallback.sh +++ b/scripts/atomic/gen-atomic-fallback.sh @@ -223,14 +223,15 @@ gen_xchg_fallbacks() gen_try_cmpxchg_fallback() { + local prefix="$1"; shift local cmpxchg="$1"; shift; - local order="$1"; shift; + local suffix="$1"; shift; cat < Date: Thu, 12 Oct 2023 16:31:58 +0200 Subject: locking/seqlock: Simplify SEQCOUNT_LOCKNAME() 1. Kill the "lockmember" argument. It is always s->lock plus __seqprop_##lockname##_sequence() already uses s->lock and ignores "lockmember". 2. Kill the "lock_acquire" argument. __seqprop_##lockname##_sequence() can use the same "lockbase" prefix for _lock and _unlock. Apart from line numbers, gcc -E outputs the same code. Signed-off-by: Oleg Nesterov Signed-off-by: Ingo Molnar Cc: Peter Zijlstra Cc: Waiman Long Cc: Will Deacon Cc: Thomas Gleixner Cc: Linus Torvalds Cc: Paul E. McKenney Link: https://lore.kernel.org/r/20231012143158.GA16133@redhat.com --- include/linux/seqlock.h | 18 ++++++++---------- 1 file changed, 8 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index af518e4d0c6b..7e7109dbc3f5 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -191,11 +191,9 @@ static inline void seqcount_lockdep_reader_access(const seqcount_t *s) * @lockname: "LOCKNAME" part of seqcount_LOCKNAME_t * @locktype: LOCKNAME canonical C data type * @preemptible: preemptibility of above locktype - * @lockmember: argument for lockdep_assert_held() - * @lockbase: associated lock release function (prefix only) - * @lock_acquire: associated lock acquisition function (full call) + * @lockbase: prefix for associated lock/unlock */ -#define SEQCOUNT_LOCKNAME(lockname, locktype, preemptible, lockmember, lockbase, lock_acquire) \ +#define SEQCOUNT_LOCKNAME(lockname, locktype, preemptible, lockbase) \ typedef struct seqcount_##lockname { \ seqcount_t seqcount; \ __SEQ_LOCK(locktype *lock); \ @@ -216,7 +214,7 @@ __seqprop_##lockname##_sequence(const seqcount_##lockname##_t *s) \ return seq; \ \ if (preemptible && unlikely(seq & 1)) { \ - __SEQ_LOCK(lock_acquire); \ + __SEQ_LOCK(lockbase##_lock(s->lock)); \ __SEQ_LOCK(lockbase##_unlock(s->lock)); \ \ /* \ @@ -242,7 +240,7 @@ __seqprop_##lockname##_preemptible(const seqcount_##lockname##_t *s) \ static __always_inline void \ __seqprop_##lockname##_assert(const seqcount_##lockname##_t *s) \ { \ - __SEQ_LOCK(lockdep_assert_held(lockmember)); \ + __SEQ_LOCK(lockdep_assert_held(s->lock)); \ } /* @@ -271,10 +269,10 @@ static inline void __seqprop_assert(const seqcount_t *s) #define __SEQ_RT IS_ENABLED(CONFIG_PREEMPT_RT) -SEQCOUNT_LOCKNAME(raw_spinlock, raw_spinlock_t, false, s->lock, raw_spin, raw_spin_lock(s->lock)) -SEQCOUNT_LOCKNAME(spinlock, spinlock_t, __SEQ_RT, s->lock, spin, spin_lock(s->lock)) -SEQCOUNT_LOCKNAME(rwlock, rwlock_t, __SEQ_RT, s->lock, read, read_lock(s->lock)) -SEQCOUNT_LOCKNAME(mutex, struct mutex, true, s->lock, mutex, mutex_lock(s->lock)) +SEQCOUNT_LOCKNAME(raw_spinlock, raw_spinlock_t, false, raw_spin) +SEQCOUNT_LOCKNAME(spinlock, spinlock_t, __SEQ_RT, spin) +SEQCOUNT_LOCKNAME(rwlock, rwlock_t, __SEQ_RT, read) +SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex) /* * SEQCNT_LOCKNAME_ZERO - static initializer for seqcount_LOCKNAME_t -- cgit v1.2.3 From e6115c6f7a0ce3388cc60b69a284facf78b5dbfd Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 12 Oct 2023 16:32:27 +0200 Subject: locking/seqlock: Change __seqprop() to return the function pointer This simplifies the macro and makes it easy to add the new seqprop's with 2 or more args. Plus this way we do not lose the type info, the (void*) type cast is no longer needed. And the latter reveals the problem: a lot of seqcount_t helpers pass the "const seqcount_t *s" argument to __seqprop_ptr(seqcount_t *s) but (before this patch) "(void *)(s)" masked the problem. So this patch changes __seqprop_ptr() and __seqprop_##lockname##_ptr() to accept the "const LOCKNAME *s" argument. This is not nice either, they need to drop the constness on return because these helpers are used by both the readers and writers, but at least it is clear what's going on. Signed-off-by: Oleg Nesterov Signed-off-by: Ingo Molnar Cc: Peter Zijlstra Cc: Waiman Long Cc: Will Deacon Cc: Thomas Gleixner Cc: Linus Torvalds Cc: Paul E. McKenney Link: https://lore.kernel.org/r/20231012143227.GA16143@redhat.com --- include/linux/seqlock.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 7e7109dbc3f5..4b8dcd3a0d93 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -200,9 +200,9 @@ typedef struct seqcount_##lockname { \ } seqcount_##lockname##_t; \ \ static __always_inline seqcount_t * \ -__seqprop_##lockname##_ptr(seqcount_##lockname##_t *s) \ +__seqprop_##lockname##_ptr(const seqcount_##lockname##_t *s) \ { \ - return &s->seqcount; \ + return (void *)&s->seqcount; /* drop const */ \ } \ \ static __always_inline unsigned \ @@ -247,9 +247,9 @@ __seqprop_##lockname##_assert(const seqcount_##lockname##_t *s) \ * __seqprop() for seqcount_t */ -static inline seqcount_t *__seqprop_ptr(seqcount_t *s) +static inline seqcount_t *__seqprop_ptr(const seqcount_t *s) { - return s; + return (void *)s; /* drop const */ } static inline unsigned __seqprop_sequence(const seqcount_t *s) @@ -292,19 +292,19 @@ SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex) #define SEQCNT_WW_MUTEX_ZERO(name, lock) SEQCOUNT_LOCKNAME_ZERO(name, lock) #define __seqprop_case(s, lockname, prop) \ - seqcount_##lockname##_t: __seqprop_##lockname##_##prop((void *)(s)) + seqcount_##lockname##_t: __seqprop_##lockname##_##prop #define __seqprop(s, prop) _Generic(*(s), \ - seqcount_t: __seqprop_##prop((void *)(s)), \ + seqcount_t: __seqprop_##prop, \ __seqprop_case((s), raw_spinlock, prop), \ __seqprop_case((s), spinlock, prop), \ __seqprop_case((s), rwlock, prop), \ __seqprop_case((s), mutex, prop)) -#define seqprop_ptr(s) __seqprop(s, ptr) -#define seqprop_sequence(s) __seqprop(s, sequence) -#define seqprop_preemptible(s) __seqprop(s, preemptible) -#define seqprop_assert(s) __seqprop(s, assert) +#define seqprop_ptr(s) __seqprop(s, ptr)(s) +#define seqprop_sequence(s) __seqprop(s, sequence)(s) +#define seqprop_preemptible(s) __seqprop(s, preemptible)(s) +#define seqprop_assert(s) __seqprop(s, assert)(s) /** * __read_seqcount_begin() - begin a seqcount_t read section w/o barrier -- cgit v1.2.3 From 886ee55eabac0d46faf8bc0b22207ca2740847ba Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Fri, 13 Oct 2023 10:15:46 +0200 Subject: locking/seqlock: Propagate 'const' pointers within read-only methods, remove forced type casts Currently __seqprop_ptr() is an inline function that must chose to either use 'const' or non-const seqcount related pointers - but this results in the undesirable loss of 'const' propagation, via a forced type cast. The easiest solution would be to turn the pointer wrappers into macros that pass through whatever type is passed to them - but the clever maze of seqlock API instantiation macros relies on the GCC CPP '##' macro extension, which isn't recursive, so inline functions must be used here. So create two wrapper variants instead: 'ptr' and 'const_ptr', and pick the right one for the codepaths that are const: read_seqcount_begin() and read_seqcount_retry(). This cleans up type handling and allows the removal of all type forcing. No change in functionality. Signed-off-by: Ingo Molnar Reviewed-by: Oleg Nesterov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Waiman Long Cc: Will Deacon Cc: Thomas Gleixner Cc: Paul E. McKenney --- include/linux/seqlock.h | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 4b8dcd3a0d93..80f21d2ca2aa 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -200,9 +200,15 @@ typedef struct seqcount_##lockname { \ } seqcount_##lockname##_t; \ \ static __always_inline seqcount_t * \ -__seqprop_##lockname##_ptr(const seqcount_##lockname##_t *s) \ +__seqprop_##lockname##_ptr(seqcount_##lockname##_t *s) \ { \ - return (void *)&s->seqcount; /* drop const */ \ + return &s->seqcount; \ +} \ + \ +static __always_inline const seqcount_t * \ +__seqprop_##lockname##_const_ptr(const seqcount_##lockname##_t *s) \ +{ \ + return &s->seqcount; \ } \ \ static __always_inline unsigned \ @@ -247,9 +253,14 @@ __seqprop_##lockname##_assert(const seqcount_##lockname##_t *s) \ * __seqprop() for seqcount_t */ -static inline seqcount_t *__seqprop_ptr(const seqcount_t *s) +static inline seqcount_t *__seqprop_ptr(seqcount_t *s) +{ + return s; +} + +static inline const seqcount_t *__seqprop_const_ptr(const seqcount_t *s) { - return (void *)s; /* drop const */ + return s; } static inline unsigned __seqprop_sequence(const seqcount_t *s) @@ -302,6 +313,7 @@ SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex) __seqprop_case((s), mutex, prop)) #define seqprop_ptr(s) __seqprop(s, ptr)(s) +#define seqprop_const_ptr(s) __seqprop(s, const_ptr)(s) #define seqprop_sequence(s) __seqprop(s, sequence)(s) #define seqprop_preemptible(s) __seqprop(s, preemptible)(s) #define seqprop_assert(s) __seqprop(s, assert)(s) @@ -353,7 +365,7 @@ SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex) */ #define read_seqcount_begin(s) \ ({ \ - seqcount_lockdep_reader_access(seqprop_ptr(s)); \ + seqcount_lockdep_reader_access(seqprop_const_ptr(s)); \ raw_read_seqcount_begin(s); \ }) @@ -419,7 +431,7 @@ SEQCOUNT_LOCKNAME(mutex, struct mutex, true, mutex) * Return: true if a read section retry is required, else false */ #define __read_seqcount_retry(s, start) \ - do___read_seqcount_retry(seqprop_ptr(s), start) + do___read_seqcount_retry(seqprop_const_ptr(s), start) static inline int do___read_seqcount_retry(const seqcount_t *s, unsigned start) { @@ -439,7 +451,7 @@ static inline int do___read_seqcount_retry(const seqcount_t *s, unsigned start) * Return: true if a read section retry is required, else false */ #define read_seqcount_retry(s, start) \ - do_read_seqcount_retry(seqprop_ptr(s), start) + do_read_seqcount_retry(seqprop_const_ptr(s), start) static inline int do_read_seqcount_retry(const seqcount_t *s, unsigned start) { -- cgit v1.2.3 From 184fdf9fc7ae6ae7155768faa48fc609d1a24b7e Mon Sep 17 00:00:00 2001 From: Cuda-Chen Date: Tue, 17 Oct 2023 13:37:03 +0800 Subject: locking/seqlock: Fix grammar in comment The "neither writes before and after ..." for the description of do_write_seqcount_end() should be "neither writes before nor after". Signed-off-by: Cuda-Chen Signed-off-by: Ingo Molnar Link: https://lore.kernel.org/r/20231017053703.11312-1-clh960524@gmail.com --- include/linux/seqlock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/seqlock.h b/include/linux/seqlock.h index 80f21d2ca2aa..e92f9d5577ba 100644 --- a/include/linux/seqlock.h +++ b/include/linux/seqlock.h @@ -584,7 +584,7 @@ static inline void do_write_seqcount_end(seqcount_t *s) * via WRITE_ONCE): a) to ensure the writes become visible to other threads * atomically, avoiding compiler optimizations; b) to document which writes are * meant to propagate to the reader critical section. This is necessary because - * neither writes before and after the barrier are enclosed in a seq-writer + * neither writes before nor after the barrier are enclosed in a seq-writer * critical section that would ensure readers are aware of ongoing writes:: * * seqcount_t seq; -- cgit v1.2.3