From 3eb6660f26d13acdbcb9241ac3e95d44419f2284 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 29 Oct 2025 10:40:52 +0100 Subject: uaccess: Provide ASM GOTO safe wrappers for unsafe_*_user() ASM GOTO is miscompiled by GCC when it is used inside a auto cleanup scope: bool foo(u32 __user *p, u32 val) { scoped_guard(pagefault) unsafe_put_user(val, p, efault); return true; efault: return false; } e80: e8 00 00 00 00 call e85 e85: 65 48 8b 05 00 00 00 00 mov %gs:0x0(%rip),%rax e8d: 83 80 04 14 00 00 01 addl $0x1,0x1404(%rax) // pf_disable++ e94: 89 37 mov %esi,(%rdi) e96: 83 a8 04 14 00 00 01 subl $0x1,0x1404(%rax) // pf_disable-- e9d: b8 01 00 00 00 mov $0x1,%eax // success ea2: e9 00 00 00 00 jmp ea7 // ret ea7: 31 c0 xor %eax,%eax // fail ea9: e9 00 00 00 00 jmp eae // ret which is broken as it leaks the pagefault disable counter on failure. Clang at least fails the build. Linus suggested to add a local label into the macro scope and let that jump to the actual caller supplied error label. __label__ local_label; \ arch_unsafe_get_user(x, ptr, local_label); \ if (0) { \ local_label: \ goto label; \ That works for both GCC and clang. clang: c80: 0f 1f 44 00 00 nopl 0x0(%rax,%rax,1) c85: 65 48 8b 0c 25 00 00 00 00 mov %gs:0x0,%rcx c8e: ff 81 04 14 00 00 incl 0x1404(%rcx) // pf_disable++ c94: 31 c0 xor %eax,%eax // set retval to false c96: 89 37 mov %esi,(%rdi) // write c98: b0 01 mov $0x1,%al // set retval to true c9a: ff 89 04 14 00 00 decl 0x1404(%rcx) // pf_disable-- ca0: 2e e9 00 00 00 00 cs jmp ca6 // ret The exception table entry points correctly to c9a GCC: f70: e8 00 00 00 00 call f75 f75: 65 48 8b 05 00 00 00 00 mov %gs:0x0(%rip),%rax f7d: 83 80 04 14 00 00 01 addl $0x1,0x1404(%rax) // pf_disable++ f84: 8b 17 mov (%rdi),%edx f86: 89 16 mov %edx,(%rsi) f88: 83 a8 04 14 00 00 01 subl $0x1,0x1404(%rax) // pf_disable-- f8f: b8 01 00 00 00 mov $0x1,%eax // success f94: e9 00 00 00 00 jmp f99 // ret f99: 83 a8 04 14 00 00 01 subl $0x1,0x1404(%rax) // pf_disable-- fa0: 31 c0 xor %eax,%eax // fail fa2: e9 00 00 00 00 jmp fa7 // ret The exception table entry points correctly to f99 So both compilers optimize out the extra goto and emit correct and efficient code. Provide a generic wrapper to do that to avoid modifying all the affected architecture specific implementation with that workaround. The only change required for architectures is to rename unsafe_*_user() to arch_unsafe_*_user(). That's done in subsequent changes. Suggested-by: Linus Torvalds Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Mathieu Desnoyers Link: https://patch.msgid.link/877bweujtn.ffs@tglx --- include/linux/uaccess.h | 72 ++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 68 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 1beb5b395d81..8aa82b1d6013 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -518,7 +518,34 @@ long strncpy_from_user_nofault(char *dst, const void __user *unsafe_addr, long count); long strnlen_user_nofault(const void __user *unsafe_addr, long count); -#ifndef __get_kernel_nofault +#ifdef arch_get_kernel_nofault +/* + * Wrap the architecture implementation so that @label can be outside of a + * cleanup() scope. A regular C goto works correctly, but ASM goto does + * not. Clang rejects such an attempt, but GCC silently emits buggy code. + */ +#define __get_kernel_nofault(dst, src, type, label) \ +do { \ + __label__ local_label; \ + arch_get_kernel_nofault(dst, src, type, local_label); \ + if (0) { \ + local_label: \ + goto label; \ + } \ +} while (0) + +#define __put_kernel_nofault(dst, src, type, label) \ +do { \ + __label__ local_label; \ + arch_put_kernel_nofault(dst, src, type, local_label); \ + if (0) { \ + local_label: \ + goto label; \ + } \ +} while (0) + +#elif !defined(__get_kernel_nofault) /* arch_get_kernel_nofault */ + #define __get_kernel_nofault(dst, src, type, label) \ do { \ type __user *p = (type __force __user *)(src); \ @@ -535,7 +562,8 @@ do { \ if (__put_user(data, p)) \ goto label; \ } while (0) -#endif + +#endif /* !__get_kernel_nofault */ /** * get_kernel_nofault(): safely attempt to read from a location @@ -549,7 +577,42 @@ do { \ copy_from_kernel_nofault(&(val), __gk_ptr, sizeof(val));\ }) -#ifndef user_access_begin +#ifdef user_access_begin + +#ifdef arch_unsafe_get_user +/* + * Wrap the architecture implementation so that @label can be outside of a + * cleanup() scope. A regular C goto works correctly, but ASM goto does + * not. Clang rejects such an attempt, but GCC silently emits buggy code. + * + * Some architectures use internal local labels already, but this extra + * indirection here is harmless because the compiler optimizes it out + * completely in any case. This construct just ensures that the ASM GOTO + * target is always in the local scope. The C goto 'label' works correctly + * when leaving a cleanup() scope. + */ +#define unsafe_get_user(x, ptr, label) \ +do { \ + __label__ local_label; \ + arch_unsafe_get_user(x, ptr, local_label); \ + if (0) { \ + local_label: \ + goto label; \ + } \ +} while (0) + +#define unsafe_put_user(x, ptr, label) \ +do { \ + __label__ local_label; \ + arch_unsafe_put_user(x, ptr, local_label); \ + if (0) { \ + local_label: \ + goto label; \ + } \ +} while (0) +#endif /* arch_unsafe_get_user */ + +#else /* user_access_begin */ #define user_access_begin(ptr,len) access_ok(ptr, len) #define user_access_end() do { } while (0) #define unsafe_op_wrap(op, err) do { if (unlikely(op)) goto err; } while (0) @@ -559,7 +622,8 @@ do { \ #define unsafe_copy_from_user(d,s,l,e) unsafe_op_wrap(__copy_from_user(d,s,l),e) static inline unsigned long user_access_save(void) { return 0UL; } static inline void user_access_restore(unsigned long flags) { } -#endif +#endif /* !user_access_begin */ + #ifndef user_write_access_begin #define user_write_access_begin user_access_begin #define user_write_access_end user_access_end -- cgit v1.2.3 From e497310b4ffb559e1149ee89470d5c518d234ddf Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 27 Oct 2025 09:43:55 +0100 Subject: uaccess: Provide scoped user access regions User space access regions are tedious and require similar code patterns all over the place: if (!user_read_access_begin(from, sizeof(*from))) return -EFAULT; unsafe_get_user(val, from, Efault); user_read_access_end(); return 0; Efault: user_read_access_end(); return -EFAULT; This got worse with the recent addition of masked user access, which optimizes the speculation prevention: if (can_do_masked_user_access()) from = masked_user_read_access_begin((from)); else if (!user_read_access_begin(from, sizeof(*from))) return -EFAULT; unsafe_get_user(val, from, Efault); user_read_access_end(); return 0; Efault: user_read_access_end(); return -EFAULT; There have been issues with using the wrong user_*_access_end() variant in the error path and other typical Copy&Pasta problems, e.g. using the wrong fault label in the user accessor which ends up using the wrong accesss end variant. These patterns beg for scopes with automatic cleanup. The resulting outcome is: scoped_user_read_access(from, Efault) unsafe_get_user(val, from, Efault); return 0; Efault: return -EFAULT; The scope guarantees the proper cleanup for the access mode is invoked both in the success and the failure (fault) path. The scoped_user_$MODE_access() macros are implemented as self terminating nested for() loops. Thanks to Andrew Cooper for pointing me at them. The scope can therefore be left with 'break', 'goto' and 'return'. Even 'continue' "works" due to the self termination mechanism. Both GCC and clang optimize all the convoluted macro maze out and the above results with clang in: b80: f3 0f 1e fa endbr64 b84: 48 b8 ef cd ab 89 67 45 23 01 movabs $0x123456789abcdef,%rax b8e: 48 39 c7 cmp %rax,%rdi b91: 48 0f 47 f8 cmova %rax,%rdi b95: 90 nop b96: 90 nop b97: 90 nop b98: 31 c9 xor %ecx,%ecx b9a: 8b 07 mov (%rdi),%eax b9c: 89 06 mov %eax,(%rsi) b9e: 85 c9 test %ecx,%ecx ba0: 0f 94 c0 sete %al ba3: 90 nop ba4: 90 nop ba5: 90 nop ba6: c3 ret Which looks as compact as it gets. The NOPs are placeholder for STAC/CLAC. GCC emits the fault path seperately: bf0: f3 0f 1e fa endbr64 bf4: 48 b8 ef cd ab 89 67 45 23 01 movabs $0x123456789abcdef,%rax bfe: 48 39 c7 cmp %rax,%rdi c01: 48 0f 47 f8 cmova %rax,%rdi c05: 90 nop c06: 90 nop c07: 90 nop c08: 31 d2 xor %edx,%edx c0a: 8b 07 mov (%rdi),%eax c0c: 89 06 mov %eax,(%rsi) c0e: 85 d2 test %edx,%edx c10: 75 09 jne c1b c12: 90 nop c13: 90 nop c14: 90 nop c15: b8 01 00 00 00 mov $0x1,%eax c1a: c3 ret c1b: 90 nop c1c: 90 nop c1d: 90 nop c1e: 31 c0 xor %eax,%eax c20: c3 ret The fault labels for the scoped*() macros and the fault labels for the actual user space accessors can be shared and must be placed outside of the scope. If masked user access is enabled on an architecture, then the pointer handed in to scoped_user_$MODE_access() can be modified to point to a guaranteed faulting user address. This modification is only scope local as the pointer is aliased inside the scope. When the scope is left the alias is not longer in effect. IOW the original pointer value is preserved so it can be used e.g. for fixup or diagnostic purposes in the fault path. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Reviewed-by: Mathieu Desnoyers Link: https://patch.msgid.link/20251027083745.546420421@linutronix.de --- include/linux/uaccess.h | 192 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 192 insertions(+) (limited to 'include/linux') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 8aa82b1d6013..5f142c05b0dc 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -2,6 +2,7 @@ #ifndef __LINUX_UACCESS_H__ #define __LINUX_UACCESS_H__ +#include #include #include #include @@ -35,9 +36,17 @@ #ifdef masked_user_access_begin #define can_do_masked_user_access() 1 +# ifndef masked_user_write_access_begin +# define masked_user_write_access_begin masked_user_access_begin +# endif +# ifndef masked_user_read_access_begin +# define masked_user_read_access_begin masked_user_access_begin +#endif #else #define can_do_masked_user_access() 0 #define masked_user_access_begin(src) NULL + #define masked_user_read_access_begin(src) NULL + #define masked_user_write_access_begin(src) NULL #define mask_user_address(src) (src) #endif @@ -633,6 +642,189 @@ static inline void user_access_restore(unsigned long flags) { } #define user_read_access_end user_access_end #endif +/* Define RW variant so the below _mode macro expansion works */ +#define masked_user_rw_access_begin(u) masked_user_access_begin(u) +#define user_rw_access_begin(u, s) user_access_begin(u, s) +#define user_rw_access_end() user_access_end() + +/* Scoped user access */ +#define USER_ACCESS_GUARD(_mode) \ +static __always_inline void __user * \ +class_user_##_mode##_begin(void __user *ptr) \ +{ \ + return ptr; \ +} \ + \ +static __always_inline void \ +class_user_##_mode##_end(void __user *ptr) \ +{ \ + user_##_mode##_access_end(); \ +} \ + \ +DEFINE_CLASS(user_ ##_mode## _access, void __user *, \ + class_user_##_mode##_end(_T), \ + class_user_##_mode##_begin(ptr), void __user *ptr) \ + \ +static __always_inline class_user_##_mode##_access_t \ +class_user_##_mode##_access_ptr(void __user *scope) \ +{ \ + return scope; \ +} + +USER_ACCESS_GUARD(read) +USER_ACCESS_GUARD(write) +USER_ACCESS_GUARD(rw) +#undef USER_ACCESS_GUARD + +/** + * __scoped_user_access_begin - Start a scoped user access + * @mode: The mode of the access class (read, write, rw) + * @uptr: The pointer to access user space memory + * @size: Size of the access + * @elbl: Error label to goto when the access region is rejected + * + * Internal helper for __scoped_user_access(). Don't use directly. + */ +#define __scoped_user_access_begin(mode, uptr, size, elbl) \ +({ \ + typeof(uptr) __retptr; \ + \ + if (can_do_masked_user_access()) { \ + __retptr = masked_user_##mode##_access_begin(uptr); \ + } else { \ + __retptr = uptr; \ + if (!user_##mode##_access_begin(uptr, size)) \ + goto elbl; \ + } \ + __retptr; \ +}) + +/** + * __scoped_user_access - Open a scope for user access + * @mode: The mode of the access class (read, write, rw) + * @uptr: The pointer to access user space memory + * @size: Size of the access + * @elbl: Error label to goto when the access region is rejected. It + * must be placed outside the scope + * + * If the user access function inside the scope requires a fault label, it + * can use @elbl or a different label outside the scope, which requires + * that user access which is implemented with ASM GOTO has been properly + * wrapped. See unsafe_get_user() for reference. + * + * scoped_user_rw_access(ptr, efault) { + * unsafe_get_user(rval, &ptr->rval, efault); + * unsafe_put_user(wval, &ptr->wval, efault); + * } + * return 0; + * efault: + * return -EFAULT; + * + * The scope is internally implemented as a autoterminating nested for() + * loop, which can be left with 'return', 'break' and 'goto' at any + * point. + * + * When the scope is left user_##@_mode##_access_end() is automatically + * invoked. + * + * When the architecture supports masked user access and the access region + * which is determined by @uptr and @size is not a valid user space + * address, i.e. < TASK_SIZE, the scope sets the pointer to a faulting user + * space address and does not terminate early. This optimizes for the good + * case and lets the performance uncritical bad case go through the fault. + * + * The eventual modification of the pointer is limited to the scope. + * Outside of the scope the original pointer value is unmodified, so that + * the original pointer value is available for diagnostic purposes in an + * out of scope fault path. + * + * Nesting scoped user access into a user access scope is invalid and fails + * the build. Nesting into other guards, e.g. pagefault is safe. + * + * The masked variant does not check the size of the access and relies on a + * mapping hole (e.g. guard page) to catch an out of range pointer, the + * first access to user memory inside the scope has to be within + * @uptr ... @uptr + PAGE_SIZE - 1 + * + * Don't use directly. Use scoped_masked_user_$MODE_access() instead. + */ +#define __scoped_user_access(mode, uptr, size, elbl) \ +for (bool done = false; !done; done = true) \ + for (void __user *_tmpptr = __scoped_user_access_begin(mode, uptr, size, elbl); \ + !done; done = true) \ + for (CLASS(user_##mode##_access, scope)(_tmpptr); !done; done = true) \ + /* Force modified pointer usage within the scope */ \ + for (const typeof(uptr) uptr = _tmpptr; !done; done = true) + +/** + * scoped_user_read_access_size - Start a scoped user read access with given size + * @usrc: Pointer to the user space address to read from + * @size: Size of the access starting from @usrc + * @elbl: Error label to goto when the access region is rejected + * + * For further information see __scoped_user_access() above. + */ +#define scoped_user_read_access_size(usrc, size, elbl) \ + __scoped_user_access(read, usrc, size, elbl) + +/** + * scoped_user_read_access - Start a scoped user read access + * @usrc: Pointer to the user space address to read from + * @elbl: Error label to goto when the access region is rejected + * + * The size of the access starting from @usrc is determined via sizeof(*@usrc)). + * + * For further information see __scoped_user_access() above. + */ +#define scoped_user_read_access(usrc, elbl) \ + scoped_user_read_access_size(usrc, sizeof(*(usrc)), elbl) + +/** + * scoped_user_write_access_size - Start a scoped user write access with given size + * @udst: Pointer to the user space address to write to + * @size: Size of the access starting from @udst + * @elbl: Error label to goto when the access region is rejected + * + * For further information see __scoped_user_access() above. + */ +#define scoped_user_write_access_size(udst, size, elbl) \ + __scoped_user_access(write, udst, size, elbl) + +/** + * scoped_user_write_access - Start a scoped user write access + * @udst: Pointer to the user space address to write to + * @elbl: Error label to goto when the access region is rejected + * + * The size of the access starting from @udst is determined via sizeof(*@udst)). + * + * For further information see __scoped_user_access() above. + */ +#define scoped_user_write_access(udst, elbl) \ + scoped_user_write_access_size(udst, sizeof(*(udst)), elbl) + +/** + * scoped_user_rw_access_size - Start a scoped user read/write access with given size + * @uptr Pointer to the user space address to read from and write to + * @size: Size of the access starting from @uptr + * @elbl: Error label to goto when the access region is rejected + * + * For further information see __scoped_user_access() above. + */ +#define scoped_user_rw_access_size(uptr, size, elbl) \ + __scoped_user_access(rw, uptr, size, elbl) + +/** + * scoped_user_rw_access - Start a scoped user read/write access + * @uptr Pointer to the user space address to read from and write to + * @elbl: Error label to goto when the access region is rejected + * + * The size of the access starting from @uptr is determined via sizeof(*@uptr)). + * + * For further information see __scoped_user_access() above. + */ +#define scoped_user_rw_access(uptr, elbl) \ + scoped_user_rw_access_size(uptr, sizeof(*(uptr)), elbl) + #ifdef CONFIG_HARDENED_USERCOPY void __noreturn usercopy_abort(const char *name, const char *detail, bool to_user, unsigned long offset, -- cgit v1.2.3 From b2cfc0cd68b830dde80fce2406580e258a1e976d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 27 Oct 2025 09:43:56 +0100 Subject: uaccess: Provide put/get_user_inline() Provide convenience wrappers around scoped user access similar to put/get_user(), which reduce the usage sites to: if (!get_user_inline(val, ptr)) return -EFAULT; Should only be used if there is a demonstrable performance benefit. Signed-off-by: Thomas Gleixner Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Ingo Molnar Reviewed-by: Mathieu Desnoyers Link: https://patch.msgid.link/20251027083745.609031602@linutronix.de --- include/linux/uaccess.h | 50 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 50 insertions(+) (limited to 'include/linux') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 5f142c05b0dc..be395f5f7ee3 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -825,6 +825,56 @@ for (bool done = false; !done; done = true) \ #define scoped_user_rw_access(uptr, elbl) \ scoped_user_rw_access_size(uptr, sizeof(*(uptr)), elbl) +/** + * get_user_inline - Read user data inlined + * @val: The variable to store the value read from user memory + * @usrc: Pointer to the user space memory to read from + * + * Return: 0 if successful, -EFAULT when faulted + * + * Inlined variant of get_user(). Only use when there is a demonstrable + * performance reason. + */ +#define get_user_inline(val, usrc) \ +({ \ + __label__ efault; \ + typeof(usrc) _tmpsrc = usrc; \ + int _ret = 0; \ + \ + scoped_user_read_access(_tmpsrc, efault) \ + unsafe_get_user(val, _tmpsrc, efault); \ + if (0) { \ + efault: \ + _ret = -EFAULT; \ + } \ + _ret; \ +}) + +/** + * put_user_inline - Write to user memory inlined + * @val: The value to write + * @udst: Pointer to the user space memory to write to + * + * Return: 0 if successful, -EFAULT when faulted + * + * Inlined variant of put_user(). Only use when there is a demonstrable + * performance reason. + */ +#define put_user_inline(val, udst) \ +({ \ + __label__ efault; \ + typeof(udst) _tmpdst = udst; \ + int _ret = 0; \ + \ + scoped_user_write_access(_tmpdst, efault) \ + unsafe_put_user(val, _tmpdst, efault); \ + if (0) { \ + efault: \ + _ret = -EFAULT; \ + } \ + _ret; \ +}) + #ifdef CONFIG_HARDENED_USERCOPY void __noreturn usercopy_abort(const char *name, const char *detail, bool to_user, unsigned long offset, -- cgit v1.2.3