From b5709f6d26d65f6bb9711f4b5f98469fd507cb5b Mon Sep 17 00:00:00 2001 From: Amery Hung Date: Wed, 3 Dec 2025 15:37:44 -0800 Subject: bpf: Support associating BPF program with struct_ops Add a new BPF command BPF_PROG_ASSOC_STRUCT_OPS to allow associating a BPF program with a struct_ops map. This command takes a file descriptor of a struct_ops map and a BPF program and set prog->aux->st_ops_assoc to the kdata of the struct_ops map. The command does not accept a struct_ops program nor a non-struct_ops map. Programs of a struct_ops map is automatically associated with the map during map update. If a program is shared between two struct_ops maps, prog->aux->st_ops_assoc will be poisoned to indicate that the associated struct_ops is ambiguous. The pointer, once poisoned, cannot be reset since we have lost track of associated struct_ops. For other program types, the associated struct_ops map, once set, cannot be changed later. This restriction may be lifted in the future if there is a use case. A kernel helper bpf_prog_get_assoc_struct_ops() can be used to retrieve the associated struct_ops pointer. The returned pointer, if not NULL, is guaranteed to be valid and point to a fully updated struct_ops struct. For struct_ops program reused in multiple struct_ops map, the return will be NULL. prog->aux->st_ops_assoc is protected by bumping the refcount for non-struct_ops programs and RCU for struct_ops programs. Since it would be inefficient to track programs associated with a struct_ops map, every non-struct_ops program will bump the refcount of the map to make sure st_ops_assoc stays valid. For a struct_ops program, it is protected by RCU as map_free will wait for an RCU grace period before disassociating the program with the map. The helper must be called in BPF program context or RCU read-side critical section. struct_ops implementers should note that the struct_ops returned may not be initialized nor attached yet. The struct_ops implementer will be responsible for tracking and checking the state of the associated struct_ops map if the use case expects an initialized or attached struct_ops. Signed-off-by: Amery Hung Signed-off-by: Andrii Nakryiko Acked-by: Andrii Nakryiko Acked-by: Martin KaFai Lau Link: https://lore.kernel.org/bpf/20251203233748.668365-3-ameryhung@gmail.com --- tools/include/uapi/linux/bpf.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'tools/include') diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index be7d8e060e10..6b92b0847ec2 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -918,6 +918,16 @@ union bpf_iter_link_info { * Number of bytes read from the stream on success, or -1 if an * error occurred (in which case, *errno* is set appropriately). * + * BPF_PROG_ASSOC_STRUCT_OPS + * Description + * Associate a BPF program with a struct_ops map. The struct_ops + * map is identified by *map_fd* and the BPF program is + * identified by *prog_fd*. + * + * Return + * 0 on success or -1 if an error occurred (in which case, + * *errno* is set appropriately). + * * NOTES * eBPF objects (maps and programs) can be shared between processes. * @@ -974,6 +984,7 @@ enum bpf_cmd { BPF_PROG_BIND_MAP, BPF_TOKEN_CREATE, BPF_PROG_STREAM_READ_BY_FD, + BPF_PROG_ASSOC_STRUCT_OPS, __MAX_BPF_CMD, }; @@ -1894,6 +1905,12 @@ union bpf_attr { __u32 prog_fd; } prog_stream_read; + struct { + __u32 map_fd; + __u32 prog_fd; + __u32 flags; + } prog_assoc_struct_ops; + } __attribute__((aligned(8))); /* The description below is an attempt at providing documentation to eBPF -- cgit v1.2.3 From ec4bb8e8dfa060c699b548f62e4d56133aafbbec Mon Sep 17 00:00:00 2001 From: Benjamin Berg Date: Wed, 24 Sep 2025 16:20:58 +0200 Subject: tools/nolibc: add ptrace support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add ptrace support, as it will be useful in UML. Signed-off-by: Benjamin Berg [Thomas: drop va_args usage and linux/uio.h inclusion] Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/Makefile | 1 + tools/include/nolibc/nolibc.h | 1 + tools/include/nolibc/sys/ptrace.h | 33 ++++++++++++++++++++++++++++ tools/testing/selftests/nolibc/nolibc-test.c | 2 ++ 4 files changed, 37 insertions(+) create mode 100644 tools/include/nolibc/sys/ptrace.h (limited to 'tools/include') diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile index 8118e22844f1..8b883a6fe580 100644 --- a/tools/include/nolibc/Makefile +++ b/tools/include/nolibc/Makefile @@ -54,6 +54,7 @@ all_files := \ sys/mman.h \ sys/mount.h \ sys/prctl.h \ + sys/ptrace.h \ sys/random.h \ sys/reboot.h \ sys/resource.h \ diff --git a/tools/include/nolibc/nolibc.h b/tools/include/nolibc/nolibc.h index 272dfc961158..9c7f43b9218b 100644 --- a/tools/include/nolibc/nolibc.h +++ b/tools/include/nolibc/nolibc.h @@ -101,6 +101,7 @@ #include "sys/mman.h" #include "sys/mount.h" #include "sys/prctl.h" +#include "sys/ptrace.h" #include "sys/random.h" #include "sys/reboot.h" #include "sys/resource.h" diff --git a/tools/include/nolibc/sys/ptrace.h b/tools/include/nolibc/sys/ptrace.h new file mode 100644 index 000000000000..72ca28541633 --- /dev/null +++ b/tools/include/nolibc/sys/ptrace.h @@ -0,0 +1,33 @@ +/* SPDX-License-Identifier: LGPL-2.1 OR MIT */ +/* + * ptrace for NOLIBC + * Copyright (C) 2017-2021 Willy Tarreau + * Copyright (C) 2025 Intel Corporation + */ + +/* make sure to include all global symbols */ +#include "../nolibc.h" + +#ifndef _NOLIBC_SYS_PTRACE_H +#define _NOLIBC_SYS_PTRACE_H + +#include "../sys.h" + +#include + +/* + * long ptrace(int op, pid_t pid, void *addr, void *data); + */ +static __attribute__((unused)) +long sys_ptrace(int op, pid_t pid, void *addr, void *data) +{ + return my_syscall4(__NR_ptrace, op, pid, addr, data); +} + +static __attribute__((unused)) +ssize_t ptrace(int op, pid_t pid, void *addr, void *data) +{ + return __sysret(sys_ptrace(op, pid, addr, data)); +} + +#endif /* _NOLIBC_SYS_PTRACE_H */ diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index 3c5a226dad3a..6888b20af259 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -1406,6 +1407,7 @@ int run_syscall(int min, int max) CASE_TEST(readv_zero); EXPECT_SYSZR(1, readv(1, NULL, 0)); break; CASE_TEST(writev_badf); EXPECT_SYSER(1, writev(-1, &iov_one, 1), -1, EBADF); break; CASE_TEST(writev_zero); EXPECT_SYSZR(1, writev(1, NULL, 0)); break; + CASE_TEST(ptrace); EXPECT_SYSER(1, ptrace(PTRACE_CONT, getpid(), NULL, NULL), -1, ESRCH); break; CASE_TEST(syscall_noargs); EXPECT_SYSEQ(1, syscall(__NR_getpid), getpid()); break; CASE_TEST(syscall_args); EXPECT_SYSER(1, syscall(__NR_statx, 0, NULL, 0, 0, NULL), -1, EFAULT); break; CASE_TEST(namespace); EXPECT_SYSZR(euid0 && proc, test_namespace()); break; -- cgit v1.2.3 From cc6809f6728456c03db6750fcc94ed8b581a2cf8 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Wed, 3 Dec 2025 20:08:00 +0100 Subject: tools/nolibc: always use 64-bit mode for s390 header checks MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 32-bit s390 support was recently removed from nolibc. If the compiler defaults to 32-bit during the header checks, they fail. Make sure to always use 64-bit mode for s390 heafer checks. Fixes: 169ebcbb9082 ("tools: Remove s390 compat support") Acked-by: Willy Tarreau Acked-by: Heiko Carstens Link: https://patch.msgid.link/20251203-nolibc-headers-check-s390-v1-1-5d35e52a83ba@weissschuh.net Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/Makefile | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'tools/include') diff --git a/tools/include/nolibc/Makefile b/tools/include/nolibc/Makefile index 8b883a6fe580..1958dda98895 100644 --- a/tools/include/nolibc/Makefile +++ b/tools/include/nolibc/Makefile @@ -104,9 +104,12 @@ headers_standalone: headers $(Q)$(MAKE) -C $(srctree) headers $(Q)$(MAKE) -C $(srctree) headers_install INSTALL_HDR_PATH=$(OUTPUT)sysroot +CFLAGS_s390 := -m64 +CFLAGS := $(CFLAGS_$(ARCH)) + headers_check: headers_standalone $(Q)for header in $(filter-out crt.h std.h,$(all_files)); do \ - $(CC) $(CLANG_CROSS_FLAGS) -Wall -Werror -nostdinc -fsyntax-only -x c /dev/null \ + $(CC) $(CFLAGS) $(CLANG_CROSS_FLAGS) -Wall -Werror -nostdinc -fsyntax-only -x c /dev/null \ -I$(or $(objtree),$(srctree))/usr/include -include $$header -include $$header || exit 1; \ done -- cgit v1.2.3 From f675e35dd28f1ac326b1d6520fee3605019b381b Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Sat, 20 Dec 2025 14:55:45 +0100 Subject: tools/nolibc/poll: use kernel types for system call invocations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The system calls expect 'struct __kernel_old_timespec'. While currently 'struct __kernel_old_timespec' and 'struct timespec' are compatible, this is confusing. Especially as future patches will change the definition of 'struct timespec'. Use the correct kernel type instead. Suggested-by: Arnd Bergmann Link: https://lore.kernel.org/lkml/fbca1d3e-12e4-4c4e-8091-87464035fe39@app.fastmail.com/ Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Reviewed-by: Arnd Bergmann Link: https://patch.msgid.link/20251220-nolibc-uapi-types-v3-1-c662992f75d7@weissschuh.net --- tools/include/nolibc/poll.h | 2 +- tools/include/nolibc/sys/select.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/include') diff --git a/tools/include/nolibc/poll.h b/tools/include/nolibc/poll.h index 0d053f93ea99..df952bcf0905 100644 --- a/tools/include/nolibc/poll.h +++ b/tools/include/nolibc/poll.h @@ -24,7 +24,7 @@ static __attribute__((unused)) int sys_poll(struct pollfd *fds, int nfds, int timeout) { #if defined(__NR_ppoll) - struct timespec t; + struct __kernel_old_timespec t; if (timeout >= 0) { t.tv_sec = timeout / 1000; diff --git a/tools/include/nolibc/sys/select.h b/tools/include/nolibc/sys/select.h index 2a5619c01277..9a29e5b98a3c 100644 --- a/tools/include/nolibc/sys/select.h +++ b/tools/include/nolibc/sys/select.h @@ -75,7 +75,7 @@ int sys_select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeva #elif defined(__NR_select) return my_syscall5(__NR_select, nfds, rfds, wfds, efds, timeout); #elif defined(__NR_pselect6) - struct timespec t; + struct __kernel_old_timespec t; if (timeout) { t.tv_sec = timeout->tv_sec; -- cgit v1.2.3 From 548d682649f02f4240e8ea4e99f1899978e1cfe4 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Sat, 20 Dec 2025 14:55:46 +0100 Subject: tools/nolibc/poll: drop __NR_poll fallback MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This fallback is never used, remove it. Suggested-by: Arnd Bergmann Link: https://lore.kernel.org/lkml/fbca1d3e-12e4-4c4e-8091-87464035fe39@app.fastmail.com/ Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Reviewed-by: Arnd Bergmann Link: https://patch.msgid.link/20251220-nolibc-uapi-types-v3-2-c662992f75d7@weissschuh.net --- tools/include/nolibc/poll.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'tools/include') diff --git a/tools/include/nolibc/poll.h b/tools/include/nolibc/poll.h index df952bcf0905..5b4fa339fbb5 100644 --- a/tools/include/nolibc/poll.h +++ b/tools/include/nolibc/poll.h @@ -31,7 +31,7 @@ int sys_poll(struct pollfd *fds, int nfds, int timeout) t.tv_nsec = (timeout % 1000) * 1000000; } return my_syscall5(__NR_ppoll, fds, nfds, (timeout >= 0) ? &t : NULL, NULL, 0); -#elif defined(__NR_ppoll_time64) +#else struct __kernel_timespec t; if (timeout >= 0) { @@ -39,8 +39,6 @@ int sys_poll(struct pollfd *fds, int nfds, int timeout) t.tv_nsec = (timeout % 1000) * 1000000; } return my_syscall5(__NR_ppoll_time64, fds, nfds, (timeout >= 0) ? &t : NULL, NULL, 0); -#else - return my_syscall3(__NR_poll, fds, nfds, timeout); #endif } -- cgit v1.2.3 From 668e43737279284318064bdd4eab689a3aaed652 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Sat, 20 Dec 2025 14:55:47 +0100 Subject: tools/nolibc/select: drop non-pselect based implementations MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These implementations use the libc 'struct timeval' with system calls which can lead to type mismatches. Currently this is fine, but will break with upcoming changes to 'struct timeval'. If the structure needs to be converted anyways, the implementations based on pselect can be used for all architectures. This simplifies the logic. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Reviewed-by: Arnd Bergmann Link: https://patch.msgid.link/20251220-nolibc-uapi-types-v3-3-c662992f75d7@weissschuh.net --- tools/include/nolibc/sys/select.h | 13 +------------ 1 file changed, 1 insertion(+), 12 deletions(-) (limited to 'tools/include') diff --git a/tools/include/nolibc/sys/select.h b/tools/include/nolibc/sys/select.h index 9a29e5b98a3c..50b77dace7ef 100644 --- a/tools/include/nolibc/sys/select.h +++ b/tools/include/nolibc/sys/select.h @@ -63,18 +63,7 @@ typedef struct { static __attribute__((unused)) int sys_select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout) { -#if defined(__ARCH_WANT_SYS_OLD_SELECT) && !defined(__NR__newselect) - struct sel_arg_struct { - unsigned long n; - fd_set *r, *w, *e; - struct timeval *t; - } arg = { .n = nfds, .r = rfds, .w = wfds, .e = efds, .t = timeout }; - return my_syscall1(__NR_select, &arg); -#elif defined(__NR__newselect) - return my_syscall5(__NR__newselect, nfds, rfds, wfds, efds, timeout); -#elif defined(__NR_select) - return my_syscall5(__NR_select, nfds, rfds, wfds, efds, timeout); -#elif defined(__NR_pselect6) +#if defined(__NR_pselect6) struct __kernel_old_timespec t; if (timeout) { -- cgit v1.2.3 From b8f4f5d1b99e2ae73fd448e9bbd16dc244e6586c Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Sat, 20 Dec 2025 14:55:48 +0100 Subject: tools/nolibc/time: drop invocation of gettimeofday system call MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This invocation uses libc types with a system call. While this works now, upcoming changes to 'struct timeval' would require type conversions. If types are converted anyways, the clock_gettime() based fallback can be used everywhere, simplifying the code. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Reviewed-by: Arnd Bergmann Link: https://patch.msgid.link/20251220-nolibc-uapi-types-v3-4-c662992f75d7@weissschuh.net --- tools/include/nolibc/sys/time.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'tools/include') diff --git a/tools/include/nolibc/sys/time.h b/tools/include/nolibc/sys/time.h index 33782a19aae9..171187836e6d 100644 --- a/tools/include/nolibc/sys/time.h +++ b/tools/include/nolibc/sys/time.h @@ -22,9 +22,6 @@ static int sys_clock_gettime(clockid_t clockid, struct timespec *tp); static __attribute__((unused)) int sys_gettimeofday(struct timeval *tv, struct timezone *tz) { -#ifdef __NR_gettimeofday - return my_syscall2(__NR_gettimeofday, tv, tz); -#else (void) tz; /* Non-NULL tz is undefined behaviour */ struct timespec tp; @@ -37,7 +34,6 @@ int sys_gettimeofday(struct timeval *tv, struct timezone *tz) } return ret; -#endif } static __attribute__((unused)) -- cgit v1.2.3 From ba7fd0384530e3dd20ea873aac21c473e3e461ae Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Sat, 20 Dec 2025 14:55:49 +0100 Subject: tools/nolibc: prefer explicit 64-bit time-related system calls MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Make sure to always use the 64-bit safe system calls in preparation for 64-bit time_t on 32-bit architectures. Also prevent issues on kernels which disable CONFIG_COMPAT_32BIT_TIME and therefore don't provide the 32-bit system calls anymore. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Reviewed-by: Arnd Bergmann Link: https://patch.msgid.link/20251220-nolibc-uapi-types-v3-5-c662992f75d7@weissschuh.net --- tools/include/nolibc/poll.h | 10 +++++----- tools/include/nolibc/sys/select.h | 10 +++++----- tools/include/nolibc/sys/timerfd.h | 12 ++++++------ tools/include/nolibc/time.h | 36 ++++++++++++++++++------------------ 4 files changed, 34 insertions(+), 34 deletions(-) (limited to 'tools/include') diff --git a/tools/include/nolibc/poll.h b/tools/include/nolibc/poll.h index 5b4fa339fbb5..e854c94647b1 100644 --- a/tools/include/nolibc/poll.h +++ b/tools/include/nolibc/poll.h @@ -23,22 +23,22 @@ static __attribute__((unused)) int sys_poll(struct pollfd *fds, int nfds, int timeout) { -#if defined(__NR_ppoll) - struct __kernel_old_timespec t; +#if defined(__NR_ppoll_time64) + struct __kernel_timespec t; if (timeout >= 0) { t.tv_sec = timeout / 1000; t.tv_nsec = (timeout % 1000) * 1000000; } - return my_syscall5(__NR_ppoll, fds, nfds, (timeout >= 0) ? &t : NULL, NULL, 0); + return my_syscall5(__NR_ppoll_time64, fds, nfds, (timeout >= 0) ? &t : NULL, NULL, 0); #else - struct __kernel_timespec t; + struct __kernel_old_timespec t; if (timeout >= 0) { t.tv_sec = timeout / 1000; t.tv_nsec = (timeout % 1000) * 1000000; } - return my_syscall5(__NR_ppoll_time64, fds, nfds, (timeout >= 0) ? &t : NULL, NULL, 0); + return my_syscall5(__NR_ppoll, fds, nfds, (timeout >= 0) ? &t : NULL, NULL, 0); #endif } diff --git a/tools/include/nolibc/sys/select.h b/tools/include/nolibc/sys/select.h index 50b77dace7ef..f8870ad49687 100644 --- a/tools/include/nolibc/sys/select.h +++ b/tools/include/nolibc/sys/select.h @@ -63,22 +63,22 @@ typedef struct { static __attribute__((unused)) int sys_select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeval *timeout) { -#if defined(__NR_pselect6) - struct __kernel_old_timespec t; +#if defined(__NR_pselect6_time64) + struct __kernel_timespec t; if (timeout) { t.tv_sec = timeout->tv_sec; t.tv_nsec = timeout->tv_usec * 1000; } - return my_syscall6(__NR_pselect6, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL); + return my_syscall6(__NR_pselect6_time64, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL); #else - struct __kernel_timespec t; + struct __kernel_old_timespec t; if (timeout) { t.tv_sec = timeout->tv_sec; t.tv_nsec = timeout->tv_usec * 1000; } - return my_syscall6(__NR_pselect6_time64, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL); + return my_syscall6(__NR_pselect6, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL); #endif } diff --git a/tools/include/nolibc/sys/timerfd.h b/tools/include/nolibc/sys/timerfd.h index 5dd61030c991..66f779553d31 100644 --- a/tools/include/nolibc/sys/timerfd.h +++ b/tools/include/nolibc/sys/timerfd.h @@ -32,9 +32,7 @@ int timerfd_create(int clockid, int flags) static __attribute__((unused)) int sys_timerfd_gettime(int fd, struct itimerspec *curr_value) { -#if defined(__NR_timerfd_gettime) - return my_syscall2(__NR_timerfd_gettime, fd, curr_value); -#else +#if defined(__NR_timerfd_gettime64) struct __kernel_itimerspec kcurr_value; int ret; @@ -42,6 +40,8 @@ int sys_timerfd_gettime(int fd, struct itimerspec *curr_value) __nolibc_timespec_kernel_to_user(&kcurr_value.it_interval, &curr_value->it_interval); __nolibc_timespec_kernel_to_user(&kcurr_value.it_value, &curr_value->it_value); return ret; +#else + return my_syscall2(__NR_timerfd_gettime, fd, curr_value); #endif } @@ -56,9 +56,7 @@ static __attribute__((unused)) int sys_timerfd_settime(int fd, int flags, const struct itimerspec *new_value, struct itimerspec *old_value) { -#if defined(__NR_timerfd_settime) - return my_syscall4(__NR_timerfd_settime, fd, flags, new_value, old_value); -#else +#if defined(__NR_timerfd_settime64) struct __kernel_itimerspec knew_value, kold_value; int ret; @@ -70,6 +68,8 @@ int sys_timerfd_settime(int fd, int flags, __nolibc_timespec_kernel_to_user(&kold_value.it_value, &old_value->it_value); } return ret; +#else + return my_syscall4(__NR_timerfd_settime, fd, flags, new_value, old_value); #endif } diff --git a/tools/include/nolibc/time.h b/tools/include/nolibc/time.h index 48e78f8becf9..45df9b09d7b6 100644 --- a/tools/include/nolibc/time.h +++ b/tools/include/nolibc/time.h @@ -43,9 +43,7 @@ void __nolibc_timespec_kernel_to_user(const struct __kernel_timespec *kts, struc static __attribute__((unused)) int sys_clock_getres(clockid_t clockid, struct timespec *res) { -#if defined(__NR_clock_getres) - return my_syscall2(__NR_clock_getres, clockid, res); -#else +#if defined(__NR_clock_getres_time64) struct __kernel_timespec kres; int ret; @@ -53,6 +51,8 @@ int sys_clock_getres(clockid_t clockid, struct timespec *res) if (res) __nolibc_timespec_kernel_to_user(&kres, res); return ret; +#else + return my_syscall2(__NR_clock_getres, clockid, res); #endif } @@ -65,9 +65,7 @@ int clock_getres(clockid_t clockid, struct timespec *res) static __attribute__((unused)) int sys_clock_gettime(clockid_t clockid, struct timespec *tp) { -#if defined(__NR_clock_gettime) - return my_syscall2(__NR_clock_gettime, clockid, tp); -#else +#if defined(__NR_clock_gettime64) struct __kernel_timespec ktp; int ret; @@ -75,6 +73,8 @@ int sys_clock_gettime(clockid_t clockid, struct timespec *tp) if (tp) __nolibc_timespec_kernel_to_user(&ktp, tp); return ret; +#else + return my_syscall2(__NR_clock_gettime, clockid, tp); #endif } @@ -87,13 +87,13 @@ int clock_gettime(clockid_t clockid, struct timespec *tp) static __attribute__((unused)) int sys_clock_settime(clockid_t clockid, struct timespec *tp) { -#if defined(__NR_clock_settime) - return my_syscall2(__NR_clock_settime, clockid, tp); -#else +#if defined(__NR_clock_settime64) struct __kernel_timespec ktp; __nolibc_timespec_user_to_kernel(tp, &ktp); return my_syscall2(__NR_clock_settime64, clockid, &ktp); +#else + return my_syscall2(__NR_clock_settime, clockid, tp); #endif } @@ -107,9 +107,7 @@ static __attribute__((unused)) int sys_clock_nanosleep(clockid_t clockid, int flags, const struct timespec *rqtp, struct timespec *rmtp) { -#if defined(__NR_clock_nanosleep) - return my_syscall4(__NR_clock_nanosleep, clockid, flags, rqtp, rmtp); -#else +#if defined(__NR_clock_nanosleep_time64) struct __kernel_timespec krqtp, krmtp; int ret; @@ -118,6 +116,8 @@ int sys_clock_nanosleep(clockid_t clockid, int flags, const struct timespec *rqt if (rmtp) __nolibc_timespec_kernel_to_user(&krmtp, rmtp); return ret; +#else + return my_syscall4(__NR_clock_nanosleep, clockid, flags, rqtp, rmtp); #endif } @@ -189,9 +189,7 @@ int timer_delete(timer_t timerid) static __attribute__((unused)) int sys_timer_gettime(timer_t timerid, struct itimerspec *curr_value) { -#if defined(__NR_timer_gettime) - return my_syscall2(__NR_timer_gettime, timerid, curr_value); -#else +#if defined(__NR_timer_gettime64) struct __kernel_itimerspec kcurr_value; int ret; @@ -199,6 +197,8 @@ int sys_timer_gettime(timer_t timerid, struct itimerspec *curr_value) __nolibc_timespec_kernel_to_user(&kcurr_value.it_interval, &curr_value->it_interval); __nolibc_timespec_kernel_to_user(&kcurr_value.it_value, &curr_value->it_value); return ret; +#else + return my_syscall2(__NR_timer_gettime, timerid, curr_value); #endif } @@ -212,9 +212,7 @@ static __attribute__((unused)) int sys_timer_settime(timer_t timerid, int flags, const struct itimerspec *new_value, struct itimerspec *old_value) { -#if defined(__NR_timer_settime) - return my_syscall4(__NR_timer_settime, timerid, flags, new_value, old_value); -#else +#if defined(__NR_timer_settime64) struct __kernel_itimerspec knew_value, kold_value; int ret; @@ -226,6 +224,8 @@ int sys_timer_settime(timer_t timerid, int flags, __nolibc_timespec_kernel_to_user(&kold_value.it_value, &old_value->it_value); } return ret; +#else + return my_syscall4(__NR_timer_settime, timerid, flags, new_value, old_value); #endif } -- cgit v1.2.3 From 7efd15d22a9b7e62d0659471bde25c35ef50c9e5 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Sat, 20 Dec 2025 14:55:50 +0100 Subject: tools/nolibc/gettimeofday: avoid libgcc 64-bit divisions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit timespec::tv_nsec is going to be 64-bit wide even on 32-bit architectures. As not all architectures support 64-bit division instructions, calls to libgcc (__divdi3()) may be emitted by the compiler which are not provided by nolibc. As tv_nsec is guaranteed to always fit into an uint32_t, perform a 32-bit division instead. Signed-off-by: Thomas Weißschuh Reviewed-by: Arnd Bergmann Acked-by: Willy Tarreau Link: https://patch.msgid.link/20251220-nolibc-uapi-types-v3-6-c662992f75d7@weissschuh.net --- tools/include/nolibc/sys/time.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/include') diff --git a/tools/include/nolibc/sys/time.h b/tools/include/nolibc/sys/time.h index 171187836e6d..afdb7e326df1 100644 --- a/tools/include/nolibc/sys/time.h +++ b/tools/include/nolibc/sys/time.h @@ -30,7 +30,7 @@ int sys_gettimeofday(struct timeval *tv, struct timezone *tz) ret = sys_clock_gettime(CLOCK_REALTIME, &tp); if (!ret && tv) { tv->tv_sec = tp.tv_sec; - tv->tv_usec = tp.tv_nsec / 1000; + tv->tv_usec = (uint32_t)tp.tv_nsec / 1000; } return ret; -- cgit v1.2.3 From 47c17d97681d9c5d080acfdb273fa0856c930e74 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Sat, 20 Dec 2025 14:55:51 +0100 Subject: tools/nolibc/select: avoid libgcc 64-bit multiplications MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit timeval::tv_usec is going to be 64-bit wide even on 32-bit architectures. As not all architectures support 64-bit multiplications instructions, calls to libgcc (__multi3()) may be emitted by the compiler which are not provided by nolibc. As tv_usec and tv_nsec are guaranteed to always fit into an uint32_t, perform a 32-bit multiplication instead. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Reviewed-by: Arnd Bergmann Link: https://patch.msgid.link/20251220-nolibc-uapi-types-v3-7-c662992f75d7@weissschuh.net --- tools/include/nolibc/sys/select.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/include') diff --git a/tools/include/nolibc/sys/select.h b/tools/include/nolibc/sys/select.h index f8870ad49687..80cb3755ba18 100644 --- a/tools/include/nolibc/sys/select.h +++ b/tools/include/nolibc/sys/select.h @@ -68,7 +68,7 @@ int sys_select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeva if (timeout) { t.tv_sec = timeout->tv_sec; - t.tv_nsec = timeout->tv_usec * 1000; + t.tv_nsec = (uint32_t)timeout->tv_usec * 1000; } return my_syscall6(__NR_pselect6_time64, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL); #else @@ -76,7 +76,7 @@ int sys_select(int nfds, fd_set *rfds, fd_set *wfds, fd_set *efds, struct timeva if (timeout) { t.tv_sec = timeout->tv_sec; - t.tv_nsec = timeout->tv_usec * 1000; + t.tv_nsec = (uint32_t)timeout->tv_usec * 1000; } return my_syscall6(__NR_pselect6, nfds, rfds, wfds, efds, timeout ? &t : NULL, NULL); #endif -- cgit v1.2.3 From f5aa863aea6c1ec20b85cc0b0a22e99597f0cb50 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Sat, 20 Dec 2025 14:55:52 +0100 Subject: tools/nolibc: use custom structs timespec and timeval MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A custom 'struct timespec' and 'struct timeval' will be necessary for 64-bit time types on 32-bit architectures. will define other time-related types in terms of the custom 'struct timespec'. Add custom struct definitions which for now mirror exactly the ones from the UAPI headers, but provide the foundation for further changes. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Reviewed-by: Arnd Bergmann Link: https://patch.msgid.link/20251220-nolibc-uapi-types-v3-8-c662992f75d7@weissschuh.net --- tools/include/nolibc/arch-s390.h | 3 +++ tools/include/nolibc/types.h | 16 +++++++++++++++- 2 files changed, 18 insertions(+), 1 deletion(-) (limited to 'tools/include') diff --git a/tools/include/nolibc/arch-s390.h b/tools/include/nolibc/arch-s390.h index 74125a254ce3..5bee6ecbde0a 100644 --- a/tools/include/nolibc/arch-s390.h +++ b/tools/include/nolibc/arch-s390.h @@ -5,6 +5,9 @@ #ifndef _NOLIBC_ARCH_S390_H #define _NOLIBC_ARCH_S390_H + +#include "types.h" + #include #include diff --git a/tools/include/nolibc/types.h b/tools/include/nolibc/types.h index 470a5f77bc0f..c8ed4d9cae8a 100644 --- a/tools/include/nolibc/types.h +++ b/tools/include/nolibc/types.h @@ -13,9 +13,23 @@ #include "std.h" #include #include -#include +#include #include +struct timespec { + __kernel_time_t tv_sec; + long tv_nsec; +}; +#define _STRUCT_TIMESPEC + +struct timeval { + __kernel_time_t tv_sec; + __kernel_suseconds_t tv_usec; +}; + +#define timeval __nolibc_kernel_timeval +#include +#undef timeval /* Only the generic macros and types may be defined here. The arch-specific * ones such as the O_RDONLY and related macros used by fcntl() and open() -- cgit v1.2.3 From bdcfc417f26ffd1a7e214d1cce78500dc4dbc2d5 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Sat, 20 Dec 2025 14:55:53 +0100 Subject: tools/nolibc: always use 64-bit time types MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit 32-bit time types will stop working in 2038. Switch to 64-bit time types everywhere. Suggested-by: Arnd Bergmann Link: https://lore.kernel.org/lkml/cec27d94-c99d-4c57-9a12-275ea663dda8@app.fastmail.com/ Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Reviewed-by: Arnd Bergmann Link: https://patch.msgid.link/20251220-nolibc-uapi-types-v3-9-c662992f75d7@weissschuh.net --- tools/include/nolibc/std.h | 2 +- tools/include/nolibc/types.h | 9 +++++---- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'tools/include') diff --git a/tools/include/nolibc/std.h b/tools/include/nolibc/std.h index 392f4dd94158..b9a116123902 100644 --- a/tools/include/nolibc/std.h +++ b/tools/include/nolibc/std.h @@ -29,6 +29,6 @@ typedef unsigned long nlink_t; typedef int64_t off_t; typedef signed long blksize_t; typedef signed long blkcnt_t; -typedef __kernel_time_t time_t; +typedef __kernel_time64_t time_t; #endif /* _NOLIBC_STD_H */ diff --git a/tools/include/nolibc/types.h b/tools/include/nolibc/types.h index c8ed4d9cae8a..8f3cb18df7f1 100644 --- a/tools/include/nolibc/types.h +++ b/tools/include/nolibc/types.h @@ -17,14 +17,15 @@ #include struct timespec { - __kernel_time_t tv_sec; - long tv_nsec; + time_t tv_sec; + int64_t tv_nsec; }; #define _STRUCT_TIMESPEC +/* Never use with system calls */ struct timeval { - __kernel_time_t tv_sec; - __kernel_suseconds_t tv_usec; + time_t tv_sec; + int64_t tv_usec; }; #define timeval __nolibc_kernel_timeval -- cgit v1.2.3 From be05f571464404432a0f8fe1c81a86a0862da283 Mon Sep 17 00:00:00 2001 From: "Mike Rapoport (Microsoft)" Date: Sun, 28 Dec 2025 20:39:42 +0200 Subject: memblock test: include from tools mm.h stub MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit memblock test compilation fails: memblock.c: In function ‘memblock_validate_numa_coverage’: memblock.c:784:58: error: ‘SZ_1M’ undeclared (first use in this function) 784 | mem_size_mb = memblock_phys_mem_size() / SZ_1M; | ^~~~~ The SZ_1M is defined in sizes.h, but it is not included by stub version of mm.h in tools/include/linux. Add include of sizes.h to tools/include/linux/mm.h to fix the compilation of memblock tests. Link: https://patch.msgid.link/20251228183942.3628918-1-rppt@kernel.org Signed-off-by: Mike Rapoport (Microsoft) Reviewed-by: Anshuman Khandual --- tools/include/linux/mm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/include') diff --git a/tools/include/linux/mm.h b/tools/include/linux/mm.h index 677c37e4a18c..028f3faf46e7 100644 --- a/tools/include/linux/mm.h +++ b/tools/include/linux/mm.h @@ -4,6 +4,7 @@ #include #include +#include #define PAGE_SHIFT 12 #define PAGE_SIZE (_AC(1, UL) << PAGE_SHIFT) -- cgit v1.2.3 From e4588c25c9d122b5847b88e18b184404b6959160 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Fri, 19 Dec 2025 16:40:13 +0100 Subject: compiler-context-analysis: Remove __cond_lock() function-like helper As discussed in [1], removing __cond_lock() will improve the readability of trylock code. Now that Sparse context tracking support has been removed, we can also remove __cond_lock(). Change existing APIs to either drop __cond_lock() completely, or make use of the __cond_acquires() function attribute instead. In particular, spinlock and rwlock implementations required switching over to inline helpers rather than statement-expressions for their trylock_* variants. Suggested-by: Peter Zijlstra Signed-off-by: Marco Elver Signed-off-by: Peter Zijlstra (Intel) Link: https://lore.kernel.org/all/20250207082832.GU7145@noisy.programming.kicks-ass.net/ [1] Link: https://patch.msgid.link/20251219154418.3592607-25-elver@google.com --- Documentation/dev-tools/context-analysis.rst | 2 - Documentation/mm/process_addrs.rst | 6 +-- drivers/net/wireless/intel/iwlwifi/iwl-trans.c | 4 +- drivers/net/wireless/intel/iwlwifi/iwl-trans.h | 6 +-- .../wireless/intel/iwlwifi/pcie/gen1_2/internal.h | 5 +- .../net/wireless/intel/iwlwifi/pcie/gen1_2/trans.c | 4 +- include/linux/compiler-context-analysis.h | 31 ----------- include/linux/lockref.h | 4 +- include/linux/mm.h | 33 ++---------- include/linux/rwlock.h | 11 ++-- include/linux/rwlock_api_smp.h | 14 ++++- include/linux/rwlock_rt.h | 21 ++++---- include/linux/sched/signal.h | 14 +---- include/linux/spinlock.h | 45 ++++++---------- include/linux/spinlock_api_smp.h | 20 +++++++ include/linux/spinlock_api_up.h | 61 ++++++++++++++++++---- include/linux/spinlock_rt.h | 26 +++++---- kernel/signal.c | 4 +- kernel/time/posix-timers.c | 13 ++--- lib/dec_and_lock.c | 8 +-- lib/lockref.c | 1 - mm/memory.c | 4 +- mm/pgtable-generic.c | 19 ++++--- tools/include/linux/compiler_types.h | 2 - 24 files changed, 163 insertions(+), 195 deletions(-) (limited to 'tools/include') diff --git a/Documentation/dev-tools/context-analysis.rst b/Documentation/dev-tools/context-analysis.rst index 8dd6c0d695aa..e69896e597b6 100644 --- a/Documentation/dev-tools/context-analysis.rst +++ b/Documentation/dev-tools/context-analysis.rst @@ -112,10 +112,8 @@ Keywords __releases_shared __acquire __release - __cond_lock __acquire_shared __release_shared - __cond_lock_shared __acquire_ret __acquire_shared_ret context_unsafe diff --git a/Documentation/mm/process_addrs.rst b/Documentation/mm/process_addrs.rst index 7f2f3e87071d..851680ead45f 100644 --- a/Documentation/mm/process_addrs.rst +++ b/Documentation/mm/process_addrs.rst @@ -583,7 +583,7 @@ To access PTE-level page tables, a helper like :c:func:`!pte_offset_map_lock` or :c:func:`!pte_offset_map` can be used depending on stability requirements. These map the page table into kernel memory if required, take the RCU lock, and depending on variant, may also look up or acquire the PTE lock. -See the comment on :c:func:`!__pte_offset_map_lock`. +See the comment on :c:func:`!pte_offset_map_lock`. Atomicity ^^^^^^^^^ @@ -667,7 +667,7 @@ must be released via :c:func:`!pte_unmap_unlock`. .. note:: There are some variants on this, such as :c:func:`!pte_offset_map_rw_nolock` when we know we hold the PTE stable but for brevity we do not explore this. See the comment for - :c:func:`!__pte_offset_map_lock` for more details. + :c:func:`!pte_offset_map_lock` for more details. When modifying data in ranges we typically only wish to allocate higher page tables as necessary, using these locks to avoid races or overwriting anything, @@ -686,7 +686,7 @@ At the leaf page table, that is the PTE, we can't entirely rely on this pattern as we have separate PMD and PTE locks and a THP collapse for instance might have eliminated the PMD entry as well as the PTE from under us. -This is why :c:func:`!__pte_offset_map_lock` locklessly retrieves the PMD entry +This is why :c:func:`!pte_offset_map_lock` locklessly retrieves the PMD entry for the PTE, carefully checking it is as expected, before acquiring the PTE-specific lock, and then *again* checking that the PMD entry is as expected. diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.c b/drivers/net/wireless/intel/iwlwifi/iwl-trans.c index cc8a84018f70..fa1442246662 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.c +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.c @@ -548,11 +548,11 @@ int iwl_trans_read_config32(struct iwl_trans *trans, u32 ofs, return iwl_trans_pcie_read_config32(trans, ofs, val); } -bool _iwl_trans_grab_nic_access(struct iwl_trans *trans) +bool iwl_trans_grab_nic_access(struct iwl_trans *trans) { return iwl_trans_pcie_grab_nic_access(trans); } -IWL_EXPORT_SYMBOL(_iwl_trans_grab_nic_access); +IWL_EXPORT_SYMBOL(iwl_trans_grab_nic_access); void __releases(nic_access) iwl_trans_release_nic_access(struct iwl_trans *trans) diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h index a552669db6e2..688f9fee2821 100644 --- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h +++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h @@ -1063,11 +1063,7 @@ int iwl_trans_sw_reset(struct iwl_trans *trans); void iwl_trans_set_bits_mask(struct iwl_trans *trans, u32 reg, u32 mask, u32 value); -bool _iwl_trans_grab_nic_access(struct iwl_trans *trans); - -#define iwl_trans_grab_nic_access(trans) \ - __cond_lock(nic_access, \ - likely(_iwl_trans_grab_nic_access(trans))) +bool iwl_trans_grab_nic_access(struct iwl_trans *trans); void __releases(nic_access) iwl_trans_release_nic_access(struct iwl_trans *trans); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/internal.h index 207c56e338dd..7b7b35e442f9 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/internal.h +++ b/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/internal.h @@ -553,10 +553,7 @@ void iwl_trans_pcie_free(struct iwl_trans *trans); void iwl_trans_pcie_free_pnvm_dram_regions(struct iwl_dram_regions *dram_regions, struct device *dev); -bool __iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans, bool silent); -#define _iwl_trans_pcie_grab_nic_access(trans, silent) \ - __cond_lock(nic_access_nobh, \ - likely(__iwl_trans_pcie_grab_nic_access(trans, silent))) +bool _iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans, bool silent); void iwl_trans_pcie_check_product_reset_status(struct pci_dev *pdev); void iwl_trans_pcie_check_product_reset_mode(struct pci_dev *pdev); diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/trans.c index 164d060ec617..415a19ea9f06 100644 --- a/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/trans.c +++ b/drivers/net/wireless/intel/iwlwifi/pcie/gen1_2/trans.c @@ -2327,7 +2327,7 @@ EXPORT_SYMBOL(iwl_trans_pcie_reset); * This version doesn't disable BHs but rather assumes they're * already disabled. */ -bool __iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans, bool silent) +bool _iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans, bool silent) { int ret; struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans); @@ -2415,7 +2415,7 @@ bool iwl_trans_pcie_grab_nic_access(struct iwl_trans *trans) bool ret; local_bh_disable(); - ret = __iwl_trans_pcie_grab_nic_access(trans, false); + ret = _iwl_trans_pcie_grab_nic_access(trans, false); if (ret) { /* keep BHs disabled until iwl_trans_pcie_release_nic_access */ return ret; diff --git a/include/linux/compiler-context-analysis.h b/include/linux/compiler-context-analysis.h index cb728822343f..4f7559d7ae91 100644 --- a/include/linux/compiler-context-analysis.h +++ b/include/linux/compiler-context-analysis.h @@ -341,24 +341,6 @@ static inline void _context_unsafe_alias(void **p) { } */ #define __release(x) __release_ctx_lock(x) -/** - * __cond_lock() - function that conditionally acquires a context lock - * exclusively - * @x: context lock instance pinter - * @c: boolean expression - * - * Return: result of @c - * - * No-op function that conditionally acquires context lock instance @x - * exclusively, if the boolean expression @c is true. The result of @c is the - * return value; for example: - * - * .. code-block:: c - * - * #define spin_trylock(l) __cond_lock(&lock, _spin_trylock(&lock)) - */ -#define __cond_lock(x, c) __try_acquire_ctx_lock(x, c) - /** * __must_hold_shared() - function attribute, caller must hold shared context lock * @@ -417,19 +399,6 @@ static inline void _context_unsafe_alias(void **p) { } */ #define __release_shared(x) __release_shared_ctx_lock(x) -/** - * __cond_lock_shared() - function that conditionally acquires a context lock shared - * @x: context lock instance pinter - * @c: boolean expression - * - * Return: result of @c - * - * No-op function that conditionally acquires context lock instance @x with - * shared access, if the boolean expression @c is true. The result of @c is the - * return value. - */ -#define __cond_lock_shared(x, c) __try_acquire_shared_ctx_lock(x, c) - /** * __acquire_ret() - helper to acquire context lock of return value * @call: call expression diff --git a/include/linux/lockref.h b/include/linux/lockref.h index 815d871fadfc..6ded24cdb4a8 100644 --- a/include/linux/lockref.h +++ b/include/linux/lockref.h @@ -49,9 +49,7 @@ static inline void lockref_init(struct lockref *lockref) void lockref_get(struct lockref *lockref); int lockref_put_return(struct lockref *lockref); bool lockref_get_not_zero(struct lockref *lockref); -bool lockref_put_or_lock(struct lockref *lockref); -#define lockref_put_or_lock(_lockref) \ - (!__cond_lock((_lockref)->lock, !lockref_put_or_lock(_lockref))) +bool lockref_put_or_lock(struct lockref *lockref) __cond_acquires(false, &lockref->lock); void lockref_mark_dead(struct lockref *lockref); bool lockref_get_not_dead(struct lockref *lockref); diff --git a/include/linux/mm.h b/include/linux/mm.h index 15076261d0c2..f369cb633516 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2975,15 +2975,8 @@ static inline pud_t pud_mkspecial(pud_t pud) } #endif /* CONFIG_ARCH_SUPPORTS_PUD_PFNMAP */ -extern pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr, - spinlock_t **ptl); -static inline pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr, - spinlock_t **ptl) -{ - pte_t *ptep; - __cond_lock(*ptl, ptep = __get_locked_pte(mm, addr, ptl)); - return ptep; -} +extern pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr, + spinlock_t **ptl); #ifdef __PAGETABLE_P4D_FOLDED static inline int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, @@ -3337,31 +3330,15 @@ static inline bool pagetable_pte_ctor(struct mm_struct *mm, return true; } -pte_t *___pte_offset_map(pmd_t *pmd, unsigned long addr, pmd_t *pmdvalp); -static inline pte_t *__pte_offset_map(pmd_t *pmd, unsigned long addr, - pmd_t *pmdvalp) -{ - pte_t *pte; +pte_t *__pte_offset_map(pmd_t *pmd, unsigned long addr, pmd_t *pmdvalp); - __cond_lock(RCU, pte = ___pte_offset_map(pmd, addr, pmdvalp)); - return pte; -} static inline pte_t *pte_offset_map(pmd_t *pmd, unsigned long addr) { return __pte_offset_map(pmd, addr, NULL); } -pte_t *__pte_offset_map_lock(struct mm_struct *mm, pmd_t *pmd, - unsigned long addr, spinlock_t **ptlp); -static inline pte_t *pte_offset_map_lock(struct mm_struct *mm, pmd_t *pmd, - unsigned long addr, spinlock_t **ptlp) -{ - pte_t *pte; - - __cond_lock(RCU, __cond_lock(*ptlp, - pte = __pte_offset_map_lock(mm, pmd, addr, ptlp))); - return pte; -} +pte_t *pte_offset_map_lock(struct mm_struct *mm, pmd_t *pmd, + unsigned long addr, spinlock_t **ptlp); pte_t *pte_offset_map_ro_nolock(struct mm_struct *mm, pmd_t *pmd, unsigned long addr, spinlock_t **ptlp); diff --git a/include/linux/rwlock.h b/include/linux/rwlock.h index 151f9d5f3288..65a5b55e1bcd 100644 --- a/include/linux/rwlock.h +++ b/include/linux/rwlock.h @@ -50,8 +50,8 @@ do { \ * regardless of whether CONFIG_SMP or CONFIG_PREEMPT are set. The various * methods are defined as nops in the case they are not required. */ -#define read_trylock(lock) __cond_lock_shared(lock, _raw_read_trylock(lock)) -#define write_trylock(lock) __cond_lock(lock, _raw_write_trylock(lock)) +#define read_trylock(lock) _raw_read_trylock(lock) +#define write_trylock(lock) _raw_write_trylock(lock) #define write_lock(lock) _raw_write_lock(lock) #define read_lock(lock) _raw_read_lock(lock) @@ -113,12 +113,7 @@ do { \ } while (0) #define write_unlock_bh(lock) _raw_write_unlock_bh(lock) -#define write_trylock_irqsave(lock, flags) \ - __cond_lock(lock, ({ \ - local_irq_save(flags); \ - _raw_write_trylock(lock) ? \ - 1 : ({ local_irq_restore(flags); 0; }); \ - })) +#define write_trylock_irqsave(lock, flags) _raw_write_trylock_irqsave(lock, &(flags)) #ifdef arch_rwlock_is_contended #define rwlock_is_contended(lock) \ diff --git a/include/linux/rwlock_api_smp.h b/include/linux/rwlock_api_smp.h index 6d5cc0b7be1f..d903b17c46ca 100644 --- a/include/linux/rwlock_api_smp.h +++ b/include/linux/rwlock_api_smp.h @@ -26,8 +26,8 @@ unsigned long __lockfunc _raw_read_lock_irqsave(rwlock_t *lock) __acquires(lock); unsigned long __lockfunc _raw_write_lock_irqsave(rwlock_t *lock) __acquires(lock); -int __lockfunc _raw_read_trylock(rwlock_t *lock); -int __lockfunc _raw_write_trylock(rwlock_t *lock); +int __lockfunc _raw_read_trylock(rwlock_t *lock) __cond_acquires_shared(true, lock); +int __lockfunc _raw_write_trylock(rwlock_t *lock) __cond_acquires(true, lock); void __lockfunc _raw_read_unlock(rwlock_t *lock) __releases_shared(lock); void __lockfunc _raw_write_unlock(rwlock_t *lock) __releases(lock); void __lockfunc _raw_read_unlock_bh(rwlock_t *lock) __releases_shared(lock); @@ -41,6 +41,16 @@ void __lockfunc _raw_write_unlock_irqrestore(rwlock_t *lock, unsigned long flags) __releases(lock); +static inline bool _raw_write_trylock_irqsave(rwlock_t *lock, unsigned long *flags) + __cond_acquires(true, lock) +{ + local_irq_save(*flags); + if (_raw_write_trylock(lock)) + return true; + local_irq_restore(*flags); + return false; +} + #ifdef CONFIG_INLINE_READ_LOCK #define _raw_read_lock(lock) __raw_read_lock(lock) #endif diff --git a/include/linux/rwlock_rt.h b/include/linux/rwlock_rt.h index f64d6d319a47..37b387dcab21 100644 --- a/include/linux/rwlock_rt.h +++ b/include/linux/rwlock_rt.h @@ -26,11 +26,11 @@ do { \ } while (0) extern void rt_read_lock(rwlock_t *rwlock) __acquires_shared(rwlock); -extern int rt_read_trylock(rwlock_t *rwlock); +extern int rt_read_trylock(rwlock_t *rwlock) __cond_acquires_shared(true, rwlock); extern void rt_read_unlock(rwlock_t *rwlock) __releases_shared(rwlock); extern void rt_write_lock(rwlock_t *rwlock) __acquires(rwlock); extern void rt_write_lock_nested(rwlock_t *rwlock, int subclass) __acquires(rwlock); -extern int rt_write_trylock(rwlock_t *rwlock); +extern int rt_write_trylock(rwlock_t *rwlock) __cond_acquires(true, rwlock); extern void rt_write_unlock(rwlock_t *rwlock) __releases(rwlock); static __always_inline void read_lock(rwlock_t *rwlock) @@ -59,7 +59,7 @@ static __always_inline void read_lock_irq(rwlock_t *rwlock) flags = 0; \ } while (0) -#define read_trylock(lock) __cond_lock_shared(lock, rt_read_trylock(lock)) +#define read_trylock(lock) rt_read_trylock(lock) static __always_inline void read_unlock(rwlock_t *rwlock) __releases_shared(rwlock) @@ -123,14 +123,15 @@ static __always_inline void write_lock_irq(rwlock_t *rwlock) flags = 0; \ } while (0) -#define write_trylock(lock) __cond_lock(lock, rt_write_trylock(lock)) +#define write_trylock(lock) rt_write_trylock(lock) -#define write_trylock_irqsave(lock, flags) \ - __cond_lock(lock, ({ \ - typecheck(unsigned long, flags); \ - flags = 0; \ - rt_write_trylock(lock); \ - })) +static __always_inline bool _write_trylock_irqsave(rwlock_t *rwlock, unsigned long *flags) + __cond_acquires(true, rwlock) +{ + *flags = 0; + return rt_write_trylock(rwlock); +} +#define write_trylock_irqsave(lock, flags) _write_trylock_irqsave(lock, &(flags)) static __always_inline void write_unlock(rwlock_t *rwlock) __releases(rwlock) diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 7d6449982822..a63f65aa5bdd 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -737,18 +737,8 @@ static inline int thread_group_empty(struct task_struct *p) #define delay_group_leader(p) \ (thread_group_leader(p) && !thread_group_empty(p)) -extern struct sighand_struct *__lock_task_sighand(struct task_struct *task, - unsigned long *flags); - -static inline struct sighand_struct *lock_task_sighand(struct task_struct *task, - unsigned long *flags) -{ - struct sighand_struct *ret; - - ret = __lock_task_sighand(task, flags); - (void)__cond_lock(&task->sighand->siglock, ret); - return ret; -} +extern struct sighand_struct *lock_task_sighand(struct task_struct *task, + unsigned long *flags); static inline void unlock_task_sighand(struct task_struct *task, unsigned long *flags) diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 7e560c7a7b23..396b8c5d6c1b 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -213,7 +213,7 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock) * various methods are defined as nops in the case they are not * required. */ -#define raw_spin_trylock(lock) __cond_lock(lock, _raw_spin_trylock(lock)) +#define raw_spin_trylock(lock) _raw_spin_trylock(lock) #define raw_spin_lock(lock) _raw_spin_lock(lock) @@ -284,22 +284,11 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock) } while (0) #define raw_spin_unlock_bh(lock) _raw_spin_unlock_bh(lock) -#define raw_spin_trylock_bh(lock) \ - __cond_lock(lock, _raw_spin_trylock_bh(lock)) +#define raw_spin_trylock_bh(lock) _raw_spin_trylock_bh(lock) -#define raw_spin_trylock_irq(lock) \ - __cond_lock(lock, ({ \ - local_irq_disable(); \ - _raw_spin_trylock(lock) ? \ - 1 : ({ local_irq_enable(); 0; }); \ - })) +#define raw_spin_trylock_irq(lock) _raw_spin_trylock_irq(lock) -#define raw_spin_trylock_irqsave(lock, flags) \ - __cond_lock(lock, ({ \ - local_irq_save(flags); \ - _raw_spin_trylock(lock) ? \ - 1 : ({ local_irq_restore(flags); 0; }); \ - })) +#define raw_spin_trylock_irqsave(lock, flags) _raw_spin_trylock_irqsave(lock, &(flags)) #ifndef CONFIG_PREEMPT_RT /* Include rwlock functions for !RT */ @@ -433,8 +422,12 @@ static __always_inline int spin_trylock_irq(spinlock_t *lock) return raw_spin_trylock_irq(&lock->rlock); } -#define spin_trylock_irqsave(lock, flags) \ - __cond_lock(lock, raw_spin_trylock_irqsave(spinlock_check(lock), flags)) +static __always_inline bool _spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags) + __cond_acquires(true, lock) __no_context_analysis +{ + return raw_spin_trylock_irqsave(spinlock_check(lock), *flags); +} +#define spin_trylock_irqsave(lock, flags) _spin_trylock_irqsave(lock, &(flags)) /** * spin_is_locked() - Check whether a spinlock is locked. @@ -512,23 +505,17 @@ static inline int rwlock_needbreak(rwlock_t *lock) * Decrements @atomic by 1. If the result is 0, returns true and locks * @lock. Returns false for all other cases. */ -extern int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock); -#define atomic_dec_and_lock(atomic, lock) \ - __cond_lock(lock, _atomic_dec_and_lock(atomic, lock)) +extern int atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) __cond_acquires(true, lock); extern int _atomic_dec_and_lock_irqsave(atomic_t *atomic, spinlock_t *lock, - unsigned long *flags); -#define atomic_dec_and_lock_irqsave(atomic, lock, flags) \ - __cond_lock(lock, _atomic_dec_and_lock_irqsave(atomic, lock, &(flags))) + unsigned long *flags) __cond_acquires(true, lock); +#define atomic_dec_and_lock_irqsave(atomic, lock, flags) _atomic_dec_and_lock_irqsave(atomic, lock, &(flags)) -extern int _atomic_dec_and_raw_lock(atomic_t *atomic, raw_spinlock_t *lock); -#define atomic_dec_and_raw_lock(atomic, lock) \ - __cond_lock(lock, _atomic_dec_and_raw_lock(atomic, lock)) +extern int atomic_dec_and_raw_lock(atomic_t *atomic, raw_spinlock_t *lock) __cond_acquires(true, lock); extern int _atomic_dec_and_raw_lock_irqsave(atomic_t *atomic, raw_spinlock_t *lock, - unsigned long *flags); -#define atomic_dec_and_raw_lock_irqsave(atomic, lock, flags) \ - __cond_lock(lock, _atomic_dec_and_raw_lock_irqsave(atomic, lock, &(flags))) + unsigned long *flags) __cond_acquires(true, lock); +#define atomic_dec_and_raw_lock_irqsave(atomic, lock, flags) _atomic_dec_and_raw_lock_irqsave(atomic, lock, &(flags)) int __alloc_bucket_spinlocks(spinlock_t **locks, unsigned int *lock_mask, size_t max_size, unsigned int cpu_mult, diff --git a/include/linux/spinlock_api_smp.h b/include/linux/spinlock_api_smp.h index 7e7d7d373213..bda5e7a390cd 100644 --- a/include/linux/spinlock_api_smp.h +++ b/include/linux/spinlock_api_smp.h @@ -95,6 +95,26 @@ static inline int __raw_spin_trylock(raw_spinlock_t *lock) return 0; } +static __always_inline bool _raw_spin_trylock_irq(raw_spinlock_t *lock) + __cond_acquires(true, lock) +{ + local_irq_disable(); + if (_raw_spin_trylock(lock)) + return true; + local_irq_enable(); + return false; +} + +static __always_inline bool _raw_spin_trylock_irqsave(raw_spinlock_t *lock, unsigned long *flags) + __cond_acquires(true, lock) +{ + local_irq_save(*flags); + if (_raw_spin_trylock(lock)) + return true; + local_irq_restore(*flags); + return false; +} + /* * If lockdep is enabled then we use the non-preemption spin-ops * even on CONFIG_PREEMPTION, because lockdep assumes that interrupts are diff --git a/include/linux/spinlock_api_up.h b/include/linux/spinlock_api_up.h index 018f5aabc1be..a9d5c7c66e03 100644 --- a/include/linux/spinlock_api_up.h +++ b/include/linux/spinlock_api_up.h @@ -24,14 +24,11 @@ * flags straight, to suppress compiler warnings of unused lock * variables, and to add the proper checker annotations: */ -#define ___LOCK_void(lock) \ - do { (void)(lock); } while (0) - #define ___LOCK_(lock) \ - do { __acquire(lock); ___LOCK_void(lock); } while (0) + do { __acquire(lock); (void)(lock); } while (0) #define ___LOCK_shared(lock) \ - do { __acquire_shared(lock); ___LOCK_void(lock); } while (0) + do { __acquire_shared(lock); (void)(lock); } while (0) #define __LOCK(lock, ...) \ do { preempt_disable(); ___LOCK_##__VA_ARGS__(lock); } while (0) @@ -78,10 +75,56 @@ #define _raw_spin_lock_irqsave(lock, flags) __LOCK_IRQSAVE(lock, flags) #define _raw_read_lock_irqsave(lock, flags) __LOCK_IRQSAVE(lock, flags, shared) #define _raw_write_lock_irqsave(lock, flags) __LOCK_IRQSAVE(lock, flags) -#define _raw_spin_trylock(lock) ({ __LOCK(lock, void); 1; }) -#define _raw_read_trylock(lock) ({ __LOCK(lock, void); 1; }) -#define _raw_write_trylock(lock) ({ __LOCK(lock, void); 1; }) -#define _raw_spin_trylock_bh(lock) ({ __LOCK_BH(lock, void); 1; }) + +static __always_inline int _raw_spin_trylock(raw_spinlock_t *lock) + __cond_acquires(true, lock) +{ + __LOCK(lock); + return 1; +} + +static __always_inline int _raw_spin_trylock_bh(raw_spinlock_t *lock) + __cond_acquires(true, lock) +{ + __LOCK_BH(lock); + return 1; +} + +static __always_inline int _raw_spin_trylock_irq(raw_spinlock_t *lock) + __cond_acquires(true, lock) +{ + __LOCK_IRQ(lock); + return 1; +} + +static __always_inline int _raw_spin_trylock_irqsave(raw_spinlock_t *lock, unsigned long *flags) + __cond_acquires(true, lock) +{ + __LOCK_IRQSAVE(lock, *(flags)); + return 1; +} + +static __always_inline int _raw_read_trylock(rwlock_t *lock) + __cond_acquires_shared(true, lock) +{ + __LOCK(lock, shared); + return 1; +} + +static __always_inline int _raw_write_trylock(rwlock_t *lock) + __cond_acquires(true, lock) +{ + __LOCK(lock); + return 1; +} + +static __always_inline int _raw_write_trylock_irqsave(rwlock_t *lock, unsigned long *flags) + __cond_acquires(true, lock) +{ + __LOCK_IRQSAVE(lock, *(flags)); + return 1; +} + #define _raw_spin_unlock(lock) __UNLOCK(lock) #define _raw_read_unlock(lock) __UNLOCK(lock, shared) #define _raw_write_unlock(lock) __UNLOCK(lock) diff --git a/include/linux/spinlock_rt.h b/include/linux/spinlock_rt.h index 6bab73ee1384..0a585768358f 100644 --- a/include/linux/spinlock_rt.h +++ b/include/linux/spinlock_rt.h @@ -37,8 +37,8 @@ extern void rt_spin_lock_nested(spinlock_t *lock, int subclass) __acquires(lock) extern void rt_spin_lock_nest_lock(spinlock_t *lock, struct lockdep_map *nest_lock) __acquires(lock); extern void rt_spin_unlock(spinlock_t *lock) __releases(lock); extern void rt_spin_lock_unlock(spinlock_t *lock); -extern int rt_spin_trylock_bh(spinlock_t *lock); -extern int rt_spin_trylock(spinlock_t *lock); +extern int rt_spin_trylock_bh(spinlock_t *lock) __cond_acquires(true, lock); +extern int rt_spin_trylock(spinlock_t *lock) __cond_acquires(true, lock); static __always_inline void spin_lock(spinlock_t *lock) __acquires(lock) @@ -130,21 +130,19 @@ static __always_inline void spin_unlock_irqrestore(spinlock_t *lock, rt_spin_unlock(lock); } -#define spin_trylock(lock) \ - __cond_lock(lock, rt_spin_trylock(lock)) +#define spin_trylock(lock) rt_spin_trylock(lock) -#define spin_trylock_bh(lock) \ - __cond_lock(lock, rt_spin_trylock_bh(lock)) +#define spin_trylock_bh(lock) rt_spin_trylock_bh(lock) -#define spin_trylock_irq(lock) \ - __cond_lock(lock, rt_spin_trylock(lock)) +#define spin_trylock_irq(lock) rt_spin_trylock(lock) -#define spin_trylock_irqsave(lock, flags) \ - __cond_lock(lock, ({ \ - typecheck(unsigned long, flags); \ - flags = 0; \ - rt_spin_trylock(lock); \ - })) +static __always_inline bool _spin_trylock_irqsave(spinlock_t *lock, unsigned long *flags) + __cond_acquires(true, lock) +{ + *flags = 0; + return rt_spin_trylock(lock); +} +#define spin_trylock_irqsave(lock, flags) _spin_trylock_irqsave(lock, &(flags)) #define spin_is_contended(lock) (((void)(lock), 0)) diff --git a/kernel/signal.c b/kernel/signal.c index e42b8bd6922f..d65d0fe24bfb 100644 --- a/kernel/signal.c +++ b/kernel/signal.c @@ -1355,8 +1355,8 @@ int zap_other_threads(struct task_struct *p) return count; } -struct sighand_struct *__lock_task_sighand(struct task_struct *tsk, - unsigned long *flags) +struct sighand_struct *lock_task_sighand(struct task_struct *tsk, + unsigned long *flags) { struct sighand_struct *sighand; diff --git a/kernel/time/posix-timers.c b/kernel/time/posix-timers.c index 80a8a09a21a0..413e2389f0a5 100644 --- a/kernel/time/posix-timers.c +++ b/kernel/time/posix-timers.c @@ -66,14 +66,7 @@ static const struct k_clock clock_realtime, clock_monotonic; #error "SIGEV_THREAD_ID must not share bit with other SIGEV values!" #endif -static struct k_itimer *__lock_timer(timer_t timer_id); - -#define lock_timer(tid) \ -({ struct k_itimer *__timr; \ - __cond_lock(&__timr->it_lock, __timr = __lock_timer(tid)); \ - __timr; \ -}) - +static struct k_itimer *lock_timer(timer_t timer_id); static inline void unlock_timer(struct k_itimer *timr) { if (likely((timr))) @@ -85,7 +78,7 @@ static inline void unlock_timer(struct k_itimer *timr) #define scoped_timer (scope) -DEFINE_CLASS(lock_timer, struct k_itimer *, unlock_timer(_T), __lock_timer(id), timer_t id); +DEFINE_CLASS(lock_timer, struct k_itimer *, unlock_timer(_T), lock_timer(id), timer_t id); DEFINE_CLASS_IS_COND_GUARD(lock_timer); static struct timer_hash_bucket *hash_bucket(struct signal_struct *sig, unsigned int nr) @@ -600,7 +593,7 @@ COMPAT_SYSCALL_DEFINE3(timer_create, clockid_t, which_clock, } #endif -static struct k_itimer *__lock_timer(timer_t timer_id) +static struct k_itimer *lock_timer(timer_t timer_id) { struct k_itimer *timr; diff --git a/lib/dec_and_lock.c b/lib/dec_and_lock.c index 1dcca8f2e194..8c7c398fd770 100644 --- a/lib/dec_and_lock.c +++ b/lib/dec_and_lock.c @@ -18,7 +18,7 @@ * because the spin-lock and the decrement must be * "atomic". */ -int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) +int atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) { /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */ if (atomic_add_unless(atomic, -1, 1)) @@ -32,7 +32,7 @@ int _atomic_dec_and_lock(atomic_t *atomic, spinlock_t *lock) return 0; } -EXPORT_SYMBOL(_atomic_dec_and_lock); +EXPORT_SYMBOL(atomic_dec_and_lock); int _atomic_dec_and_lock_irqsave(atomic_t *atomic, spinlock_t *lock, unsigned long *flags) @@ -50,7 +50,7 @@ int _atomic_dec_and_lock_irqsave(atomic_t *atomic, spinlock_t *lock, } EXPORT_SYMBOL(_atomic_dec_and_lock_irqsave); -int _atomic_dec_and_raw_lock(atomic_t *atomic, raw_spinlock_t *lock) +int atomic_dec_and_raw_lock(atomic_t *atomic, raw_spinlock_t *lock) { /* Subtract 1 from counter unless that drops it to 0 (ie. it was 1) */ if (atomic_add_unless(atomic, -1, 1)) @@ -63,7 +63,7 @@ int _atomic_dec_and_raw_lock(atomic_t *atomic, raw_spinlock_t *lock) raw_spin_unlock(lock); return 0; } -EXPORT_SYMBOL(_atomic_dec_and_raw_lock); +EXPORT_SYMBOL(atomic_dec_and_raw_lock); int _atomic_dec_and_raw_lock_irqsave(atomic_t *atomic, raw_spinlock_t *lock, unsigned long *flags) diff --git a/lib/lockref.c b/lib/lockref.c index 9210fc6ae714..5d8e3ef3860e 100644 --- a/lib/lockref.c +++ b/lib/lockref.c @@ -105,7 +105,6 @@ EXPORT_SYMBOL(lockref_put_return); * @lockref: pointer to lockref structure * Return: 1 if count updated successfully or 0 if count <= 1 and lock taken */ -#undef lockref_put_or_lock bool lockref_put_or_lock(struct lockref *lockref) { CMPXCHG_LOOP( diff --git a/mm/memory.c b/mm/memory.c index 2a55edc48a65..b751e1f85abc 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -2210,8 +2210,8 @@ static pmd_t *walk_to_pmd(struct mm_struct *mm, unsigned long addr) return pmd; } -pte_t *__get_locked_pte(struct mm_struct *mm, unsigned long addr, - spinlock_t **ptl) +pte_t *get_locked_pte(struct mm_struct *mm, unsigned long addr, + spinlock_t **ptl) { pmd_t *pmd = walk_to_pmd(mm, addr); diff --git a/mm/pgtable-generic.c b/mm/pgtable-generic.c index d3aec7a9926a..af7966169d69 100644 --- a/mm/pgtable-generic.c +++ b/mm/pgtable-generic.c @@ -280,7 +280,7 @@ static unsigned long pmdp_get_lockless_start(void) { return 0; } static void pmdp_get_lockless_end(unsigned long irqflags) { } #endif -pte_t *___pte_offset_map(pmd_t *pmd, unsigned long addr, pmd_t *pmdvalp) +pte_t *__pte_offset_map(pmd_t *pmd, unsigned long addr, pmd_t *pmdvalp) { unsigned long irqflags; pmd_t pmdval; @@ -332,13 +332,12 @@ pte_t *pte_offset_map_rw_nolock(struct mm_struct *mm, pmd_t *pmd, } /* - * pte_offset_map_lock(mm, pmd, addr, ptlp), and its internal implementation - * __pte_offset_map_lock() below, is usually called with the pmd pointer for - * addr, reached by walking down the mm's pgd, p4d, pud for addr: either while - * holding mmap_lock or vma lock for read or for write; or in truncate or rmap - * context, while holding file's i_mmap_lock or anon_vma lock for read (or for - * write). In a few cases, it may be used with pmd pointing to a pmd_t already - * copied to or constructed on the stack. + * pte_offset_map_lock(mm, pmd, addr, ptlp) is usually called with the pmd + * pointer for addr, reached by walking down the mm's pgd, p4d, pud for addr: + * either while holding mmap_lock or vma lock for read or for write; or in + * truncate or rmap context, while holding file's i_mmap_lock or anon_vma lock + * for read (or for write). In a few cases, it may be used with pmd pointing to + * a pmd_t already copied to or constructed on the stack. * * When successful, it returns the pte pointer for addr, with its page table * kmapped if necessary (when CONFIG_HIGHPTE), and locked against concurrent @@ -389,8 +388,8 @@ pte_t *pte_offset_map_rw_nolock(struct mm_struct *mm, pmd_t *pmd, * table, and may not use RCU at all: "outsiders" like khugepaged should avoid * pte_offset_map() and co once the vma is detached from mm or mm_users is zero. */ -pte_t *__pte_offset_map_lock(struct mm_struct *mm, pmd_t *pmd, - unsigned long addr, spinlock_t **ptlp) +pte_t *pte_offset_map_lock(struct mm_struct *mm, pmd_t *pmd, + unsigned long addr, spinlock_t **ptlp) { spinlock_t *ptl; pmd_t pmdval; diff --git a/tools/include/linux/compiler_types.h b/tools/include/linux/compiler_types.h index d09f9dc172a4..067a5b4e0f7b 100644 --- a/tools/include/linux/compiler_types.h +++ b/tools/include/linux/compiler_types.h @@ -20,7 +20,6 @@ # define __releases(x) __attribute__((context(x,1,0))) # define __acquire(x) __context__(x,1) # define __release(x) __context__(x,-1) -# define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) #else /* __CHECKER__ */ /* context/locking */ # define __must_hold(x) @@ -28,7 +27,6 @@ # define __releases(x) # define __acquire(x) (void)0 # define __release(x) (void)0 -# define __cond_lock(x,c) (c) #endif /* __CHECKER__ */ /* Compiler specific macros. */ -- cgit v1.2.3 From 6c9be90527207f9beca78e698dd45969813f4c0e Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Sat, 20 Dec 2025 14:55:55 +0100 Subject: tools/nolibc: remove time conversions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Now that 'struct timespec' and 'struct __kernel_timespec' are compatible, the conversions are not necessary anymore. The same holds true for 'struct itimerspec' and 'struct __kernel_itimerspec'. Remove the conversions. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Reviewed-by: Arnd Bergmann Link: https://patch.msgid.link/20251220-nolibc-uapi-types-v3-11-c662992f75d7@weissschuh.net --- tools/include/nolibc/sys/timerfd.h | 20 ++---------- tools/include/nolibc/time.h | 64 ++++---------------------------------- 2 files changed, 8 insertions(+), 76 deletions(-) (limited to 'tools/include') diff --git a/tools/include/nolibc/sys/timerfd.h b/tools/include/nolibc/sys/timerfd.h index 66f779553d31..616fcfb416a9 100644 --- a/tools/include/nolibc/sys/timerfd.h +++ b/tools/include/nolibc/sys/timerfd.h @@ -33,13 +33,7 @@ static __attribute__((unused)) int sys_timerfd_gettime(int fd, struct itimerspec *curr_value) { #if defined(__NR_timerfd_gettime64) - struct __kernel_itimerspec kcurr_value; - int ret; - - ret = my_syscall2(__NR_timerfd_gettime64, fd, &kcurr_value); - __nolibc_timespec_kernel_to_user(&kcurr_value.it_interval, &curr_value->it_interval); - __nolibc_timespec_kernel_to_user(&kcurr_value.it_value, &curr_value->it_value); - return ret; + return my_syscall2(__NR_timerfd_gettime64, fd, curr_value); #else return my_syscall2(__NR_timerfd_gettime, fd, curr_value); #endif @@ -57,17 +51,7 @@ int sys_timerfd_settime(int fd, int flags, const struct itimerspec *new_value, struct itimerspec *old_value) { #if defined(__NR_timerfd_settime64) - struct __kernel_itimerspec knew_value, kold_value; - int ret; - - __nolibc_timespec_user_to_kernel(&new_value->it_value, &knew_value.it_value); - __nolibc_timespec_user_to_kernel(&new_value->it_interval, &knew_value.it_interval); - ret = my_syscall4(__NR_timerfd_settime64, fd, flags, &knew_value, &kold_value); - if (old_value) { - __nolibc_timespec_kernel_to_user(&kold_value.it_interval, &old_value->it_interval); - __nolibc_timespec_kernel_to_user(&kold_value.it_value, &old_value->it_value); - } - return ret; + return my_syscall4(__NR_timerfd_settime64, fd, flags, new_value, old_value); #else return my_syscall4(__NR_timerfd_settime, fd, flags, new_value, old_value); #endif diff --git a/tools/include/nolibc/time.h b/tools/include/nolibc/time.h index 45df9b09d7b6..ab67f209c99f 100644 --- a/tools/include/nolibc/time.h +++ b/tools/include/nolibc/time.h @@ -18,20 +18,6 @@ #include #include -static __inline__ -void __nolibc_timespec_user_to_kernel(const struct timespec *ts, struct __kernel_timespec *kts) -{ - kts->tv_sec = ts->tv_sec; - kts->tv_nsec = ts->tv_nsec; -} - -static __inline__ -void __nolibc_timespec_kernel_to_user(const struct __kernel_timespec *kts, struct timespec *ts) -{ - ts->tv_sec = kts->tv_sec; - ts->tv_nsec = kts->tv_nsec; -} - /* * int clock_getres(clockid_t clockid, struct timespec *res); * int clock_gettime(clockid_t clockid, struct timespec *tp); @@ -44,13 +30,7 @@ static __attribute__((unused)) int sys_clock_getres(clockid_t clockid, struct timespec *res) { #if defined(__NR_clock_getres_time64) - struct __kernel_timespec kres; - int ret; - - ret = my_syscall2(__NR_clock_getres_time64, clockid, &kres); - if (res) - __nolibc_timespec_kernel_to_user(&kres, res); - return ret; + return my_syscall2(__NR_clock_getres_time64, clockid, res); #else return my_syscall2(__NR_clock_getres, clockid, res); #endif @@ -66,13 +46,7 @@ static __attribute__((unused)) int sys_clock_gettime(clockid_t clockid, struct timespec *tp) { #if defined(__NR_clock_gettime64) - struct __kernel_timespec ktp; - int ret; - - ret = my_syscall2(__NR_clock_gettime64, clockid, &ktp); - if (tp) - __nolibc_timespec_kernel_to_user(&ktp, tp); - return ret; + return my_syscall2(__NR_clock_gettime64, clockid, tp); #else return my_syscall2(__NR_clock_gettime, clockid, tp); #endif @@ -88,10 +62,7 @@ static __attribute__((unused)) int sys_clock_settime(clockid_t clockid, struct timespec *tp) { #if defined(__NR_clock_settime64) - struct __kernel_timespec ktp; - - __nolibc_timespec_user_to_kernel(tp, &ktp); - return my_syscall2(__NR_clock_settime64, clockid, &ktp); + return my_syscall2(__NR_clock_settime64, clockid, tp); #else return my_syscall2(__NR_clock_settime, clockid, tp); #endif @@ -108,14 +79,7 @@ int sys_clock_nanosleep(clockid_t clockid, int flags, const struct timespec *rqt struct timespec *rmtp) { #if defined(__NR_clock_nanosleep_time64) - struct __kernel_timespec krqtp, krmtp; - int ret; - - __nolibc_timespec_user_to_kernel(rqtp, &krqtp); - ret = my_syscall4(__NR_clock_nanosleep_time64, clockid, flags, &krqtp, &krmtp); - if (rmtp) - __nolibc_timespec_kernel_to_user(&krmtp, rmtp); - return ret; + return my_syscall4(__NR_clock_nanosleep_time64, clockid, flags, rqtp, rmtp); #else return my_syscall4(__NR_clock_nanosleep, clockid, flags, rqtp, rmtp); #endif @@ -190,13 +154,7 @@ static __attribute__((unused)) int sys_timer_gettime(timer_t timerid, struct itimerspec *curr_value) { #if defined(__NR_timer_gettime64) - struct __kernel_itimerspec kcurr_value; - int ret; - - ret = my_syscall2(__NR_timer_gettime64, timerid, &kcurr_value); - __nolibc_timespec_kernel_to_user(&kcurr_value.it_interval, &curr_value->it_interval); - __nolibc_timespec_kernel_to_user(&kcurr_value.it_value, &curr_value->it_value); - return ret; + return my_syscall2(__NR_timer_gettime64, timerid, curr_value); #else return my_syscall2(__NR_timer_gettime, timerid, curr_value); #endif @@ -213,17 +171,7 @@ int sys_timer_settime(timer_t timerid, int flags, const struct itimerspec *new_value, struct itimerspec *old_value) { #if defined(__NR_timer_settime64) - struct __kernel_itimerspec knew_value, kold_value; - int ret; - - __nolibc_timespec_user_to_kernel(&new_value->it_value, &knew_value.it_value); - __nolibc_timespec_user_to_kernel(&new_value->it_interval, &knew_value.it_interval); - ret = my_syscall4(__NR_timer_settime64, timerid, flags, &knew_value, &kold_value); - if (old_value) { - __nolibc_timespec_kernel_to_user(&kold_value.it_interval, &old_value->it_interval); - __nolibc_timespec_kernel_to_user(&kold_value.it_value, &old_value->it_value); - } - return ret; + return my_syscall4(__NR_timer_settime64, timerid, flags, new_value, old_value); #else return my_syscall4(__NR_timer_settime, timerid, flags, new_value, old_value); #endif -- cgit v1.2.3 From dd6659efe0529e7177e9270a0fc044a0b17deb8a Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Sat, 20 Dec 2025 14:55:56 +0100 Subject: tools/nolibc: add compiler version detection macros MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some upcoming logic needs to depend on the version of GCC or clang. Add some helper macros to keep the conditionals readable. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Reviewed-by: Arnd Bergmann Link: https://patch.msgid.link/20251220-nolibc-uapi-types-v3-12-c662992f75d7@weissschuh.net --- tools/include/nolibc/compiler.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'tools/include') diff --git a/tools/include/nolibc/compiler.h b/tools/include/nolibc/compiler.h index 87090bbc53e0..c9ffd0496dae 100644 --- a/tools/include/nolibc/compiler.h +++ b/tools/include/nolibc/compiler.h @@ -47,4 +47,20 @@ # define __nolibc_fallthrough do { } while (0) #endif /* __nolibc_has_attribute(fallthrough) */ +#define __nolibc_version(_major, _minor, _patch) ((_major) * 10000 + (_minor) * 100 + (_patch)) + +#ifdef __GNUC__ +# define __nolibc_gnuc_version \ + __nolibc_version(__GNUC__, __GNUC_MINOR__, __GNUC_PATCHLEVEL__) +#else +# define __nolibc_gnuc_version 0 +#endif /* __GNUC__ */ + +#ifdef __clang__ +# define __nolibc_clang_version \ + __nolibc_version(__clang_major__, __clang_minor__, __clang_patchlevel__) +#else +# define __nolibc_clang_version 0 +#endif /* __clang__ */ + #endif /* _NOLIBC_COMPILER_H */ -- cgit v1.2.3 From 37219aa5b12326cd60f4586779c687f3394e80f5 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Sat, 20 Dec 2025 14:55:57 +0100 Subject: tools/nolibc: add __nolibc_static_assert() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a wrapper for _Static_assert() to use within nolibc. While _Static_assert() itself was only standardized in C11, in GCC and clang dialects it is also available in older standards. Link: https://lore.kernel.org/lkml/20251203192330.GA12995@1wt.eu/ Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Reviewed-by: Arnd Bergmann Link: https://patch.msgid.link/20251220-nolibc-uapi-types-v3-13-c662992f75d7@weissschuh.net --- tools/include/nolibc/compiler.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'tools/include') diff --git a/tools/include/nolibc/compiler.h b/tools/include/nolibc/compiler.h index c9ffd0496dae..a8c7619dcdde 100644 --- a/tools/include/nolibc/compiler.h +++ b/tools/include/nolibc/compiler.h @@ -63,4 +63,12 @@ # define __nolibc_clang_version 0 #endif /* __clang__ */ +#if __STDC_VERSION__ >= 201112L || \ + __nolibc_gnuc_version >= __nolibc_version(4, 6, 0) || \ + __nolibc_clang_version >= __nolibc_version(3, 0, 0) +# define __nolibc_static_assert(_t) _Static_assert(_t, "") +#else +# define __nolibc_static_assert(_t) +#endif + #endif /* _NOLIBC_COMPILER_H */ -- cgit v1.2.3 From f3ed932644a671038b31f7f536a066eeef6803b0 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Sat, 20 Dec 2025 14:55:58 +0100 Subject: selftests/nolibc: add static assertions around time types handling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The nolibc system call wrappers expect the libc types to be compatible to the kernel types. Make sure these expectations hold at compile-time. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Reviewed-by: Arnd Bergmann Link: https://patch.msgid.link/20251220-nolibc-uapi-types-v3-14-c662992f75d7@weissschuh.net --- tools/include/nolibc/sys/timerfd.h | 4 ++++ tools/include/nolibc/time.h | 18 ++++++++++++++++++ 2 files changed, 22 insertions(+) (limited to 'tools/include') diff --git a/tools/include/nolibc/sys/timerfd.h b/tools/include/nolibc/sys/timerfd.h index 616fcfb416a9..29fd92bd47d2 100644 --- a/tools/include/nolibc/sys/timerfd.h +++ b/tools/include/nolibc/sys/timerfd.h @@ -33,8 +33,10 @@ static __attribute__((unused)) int sys_timerfd_gettime(int fd, struct itimerspec *curr_value) { #if defined(__NR_timerfd_gettime64) + __nolibc_assert_time64_type(curr_value->it_value.tv_sec); return my_syscall2(__NR_timerfd_gettime64, fd, curr_value); #else + __nolibc_assert_native_time64(); return my_syscall2(__NR_timerfd_gettime, fd, curr_value); #endif } @@ -51,8 +53,10 @@ int sys_timerfd_settime(int fd, int flags, const struct itimerspec *new_value, struct itimerspec *old_value) { #if defined(__NR_timerfd_settime64) + __nolibc_assert_time64_type(new_value->it_value.tv_sec); return my_syscall4(__NR_timerfd_settime64, fd, flags, new_value, old_value); #else + __nolibc_assert_native_time64(); return my_syscall4(__NR_timerfd_settime, fd, flags, new_value, old_value); #endif } diff --git a/tools/include/nolibc/time.h b/tools/include/nolibc/time.h index ab67f209c99f..f9257d6a7878 100644 --- a/tools/include/nolibc/time.h +++ b/tools/include/nolibc/time.h @@ -18,6 +18,12 @@ #include #include +#define __nolibc_assert_time64_type(t) \ + __nolibc_static_assert(sizeof(t) == 8) + +#define __nolibc_assert_native_time64() \ + __nolibc_assert_time64_type(__kernel_old_time_t) + /* * int clock_getres(clockid_t clockid, struct timespec *res); * int clock_gettime(clockid_t clockid, struct timespec *tp); @@ -30,8 +36,10 @@ static __attribute__((unused)) int sys_clock_getres(clockid_t clockid, struct timespec *res) { #if defined(__NR_clock_getres_time64) + __nolibc_assert_time64_type(res->tv_sec); return my_syscall2(__NR_clock_getres_time64, clockid, res); #else + __nolibc_assert_native_time64(); return my_syscall2(__NR_clock_getres, clockid, res); #endif } @@ -46,8 +54,10 @@ static __attribute__((unused)) int sys_clock_gettime(clockid_t clockid, struct timespec *tp) { #if defined(__NR_clock_gettime64) + __nolibc_assert_time64_type(tp->tv_sec); return my_syscall2(__NR_clock_gettime64, clockid, tp); #else + __nolibc_assert_native_time64(); return my_syscall2(__NR_clock_gettime, clockid, tp); #endif } @@ -62,8 +72,10 @@ static __attribute__((unused)) int sys_clock_settime(clockid_t clockid, struct timespec *tp) { #if defined(__NR_clock_settime64) + __nolibc_assert_time64_type(tp->tv_sec); return my_syscall2(__NR_clock_settime64, clockid, tp); #else + __nolibc_assert_native_time64(); return my_syscall2(__NR_clock_settime, clockid, tp); #endif } @@ -79,8 +91,10 @@ int sys_clock_nanosleep(clockid_t clockid, int flags, const struct timespec *rqt struct timespec *rmtp) { #if defined(__NR_clock_nanosleep_time64) + __nolibc_assert_time64_type(rqtp->tv_sec); return my_syscall4(__NR_clock_nanosleep_time64, clockid, flags, rqtp, rmtp); #else + __nolibc_assert_native_time64(); return my_syscall4(__NR_clock_nanosleep, clockid, flags, rqtp, rmtp); #endif } @@ -154,8 +168,10 @@ static __attribute__((unused)) int sys_timer_gettime(timer_t timerid, struct itimerspec *curr_value) { #if defined(__NR_timer_gettime64) + __nolibc_assert_time64_type(curr_value->it_value.tv_sec); return my_syscall2(__NR_timer_gettime64, timerid, curr_value); #else + __nolibc_assert_native_time64(); return my_syscall2(__NR_timer_gettime, timerid, curr_value); #endif } @@ -171,8 +187,10 @@ int sys_timer_settime(timer_t timerid, int flags, const struct itimerspec *new_value, struct itimerspec *old_value) { #if defined(__NR_timer_settime64) + __nolibc_assert_time64_type(new_value->it_value.tv_sec); return my_syscall4(__NR_timer_settime64, timerid, flags, new_value, old_value); #else + __nolibc_assert_native_time64(); return my_syscall4(__NR_timer_settime, timerid, flags, new_value, old_value); #endif } -- cgit v1.2.3 From 57624b38ce99b906cbb191a1d536bb871ad2d8c2 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Sun, 4 Jan 2026 23:43:13 +0100 Subject: tools/nolibc: align sys_vfork() with sys_fork() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently the generic variants of sys_fork() and sys_vfork() differ in both they precedence of used system calls and the usage of sys_clone() vs sys_clone3(). While the interface of clone3() in sys_vfork() is more consistent over different architectures, qemu-user does not support it, making testing harder. We already handle the different clone() interfaces for sys_fork() in the architecture-specific headers, and can do so also for sys_vfork(). In fact SPARC already has such handling and only s390 is currently missing. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://patch.msgid.link/20260104-nolibc-vfork-v1-1-a63464b9e4e6@weissschuh.net --- tools/include/nolibc/arch-s390.h | 8 ++++++++ tools/include/nolibc/sys.h | 18 +++++------------- 2 files changed, 13 insertions(+), 13 deletions(-) (limited to 'tools/include') diff --git a/tools/include/nolibc/arch-s390.h b/tools/include/nolibc/arch-s390.h index 5bee6ecbde0a..904281e95f99 100644 --- a/tools/include/nolibc/arch-s390.h +++ b/tools/include/nolibc/arch-s390.h @@ -8,6 +8,7 @@ #include "types.h" +#include #include #include @@ -189,4 +190,11 @@ pid_t sys_fork(void) } #define sys_fork sys_fork +static __attribute__((unused)) +pid_t sys_vfork(void) +{ + return my_syscall5(__NR_clone, 0, CLONE_VM | CLONE_VFORK | SIGCHLD, 0, 0, 0); +} +#define sys_vfork sys_vfork + #endif /* _NOLIBC_ARCH_S390_H */ diff --git a/tools/include/nolibc/sys.h b/tools/include/nolibc/sys.h index 847af1ccbdc9..403ee9ce8389 100644 --- a/tools/include/nolibc/sys.h +++ b/tools/include/nolibc/sys.h @@ -22,7 +22,7 @@ #include #include #include /* for O_* and AT_* */ -#include /* for clone_args */ +#include /* for CLONE_* */ #include /* for statx() */ #include "errno.h" @@ -363,19 +363,11 @@ pid_t fork(void) static __attribute__((unused)) pid_t sys_vfork(void) { -#if defined(__NR_vfork) +#if defined(__NR_clone) + /* See the note in sys_fork(). */ + return my_syscall5(__NR_clone, CLONE_VM | CLONE_VFORK | SIGCHLD, 0, 0, 0, 0); +#elif defined(__NR_vfork) return my_syscall0(__NR_vfork); -#else - /* - * clone() could be used but has different argument orders per - * architecture. - */ - struct clone_args args = { - .flags = CLONE_VM | CLONE_VFORK, - .exit_signal = SIGCHLD, - }; - - return my_syscall2(__NR_clone3, &args, sizeof(args)); #endif } #endif -- cgit v1.2.3 From 2b421662c7887a0649fe409155a1f101562d0fa9 Mon Sep 17 00:00:00 2001 From: Leon Hwang Date: Wed, 7 Jan 2026 10:20:16 +0800 Subject: bpf: Introduce BPF_F_CPU and BPF_F_ALL_CPUS flags Introduce BPF_F_CPU and BPF_F_ALL_CPUS flags and check them for following APIs: * 'map_lookup_elem()' * 'map_update_elem()' * 'generic_map_lookup_batch()' * 'generic_map_update_batch()' And, get the correct value size for these APIs. Acked-by: Andrii Nakryiko Signed-off-by: Leon Hwang Link: https://lore.kernel.org/r/20260107022022.12843-2-leon.hwang@linux.dev Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 23 ++++++++++++++++++++++- include/uapi/linux/bpf.h | 2 ++ kernel/bpf/syscall.c | 31 +++++++++++++++++-------------- tools/include/uapi/linux/bpf.h | 2 ++ 4 files changed, 43 insertions(+), 15 deletions(-) (limited to 'tools/include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a63e47d2109c..108bab1bda9d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -3915,14 +3915,35 @@ bpf_prog_update_insn_ptrs(struct bpf_prog *prog, u32 *offsets, void *image) } #endif +static inline bool bpf_map_supports_cpu_flags(enum bpf_map_type map_type) +{ + return false; +} + static inline int bpf_map_check_op_flags(struct bpf_map *map, u64 flags, u64 allowed_flags) { - if (flags & ~allowed_flags) + u32 cpu; + + if ((u32)flags & ~allowed_flags) return -EINVAL; if ((flags & BPF_F_LOCK) && !btf_record_has_field(map->record, BPF_SPIN_LOCK)) return -EINVAL; + if (!(flags & BPF_F_CPU) && flags >> 32) + return -EINVAL; + + if (flags & (BPF_F_CPU | BPF_F_ALL_CPUS)) { + if (!bpf_map_supports_cpu_flags(map->map_type)) + return -EINVAL; + if ((flags & BPF_F_CPU) && (flags & BPF_F_ALL_CPUS)) + return -EINVAL; + + cpu = flags >> 32; + if ((flags & BPF_F_CPU) && cpu >= num_possible_cpus()) + return -ERANGE; + } + return 0; } diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 84ced3ed2d21..2a2ade4be60f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1384,6 +1384,8 @@ enum { BPF_NOEXIST = 1, /* create new element if it didn't exist */ BPF_EXIST = 2, /* update existing element */ BPF_F_LOCK = 4, /* spin_lock-ed map_lookup/map_update */ + BPF_F_CPU = 8, /* cpu flag for percpu maps, upper 32-bit of flags is a cpu number */ + BPF_F_ALL_CPUS = 16, /* update value across all CPUs for percpu maps */ }; /* flags for BPF_MAP_CREATE command */ diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 6dd2ad2f9e81..e8cfe9d67e64 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -133,12 +133,14 @@ bool bpf_map_write_active(const struct bpf_map *map) return atomic64_read(&map->writecnt) != 0; } -static u32 bpf_map_value_size(const struct bpf_map *map) -{ - if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || - map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || - map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY || - map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) +static u32 bpf_map_value_size(const struct bpf_map *map, u64 flags) +{ + if (flags & (BPF_F_CPU | BPF_F_ALL_CPUS)) + return map->value_size; + else if (map->map_type == BPF_MAP_TYPE_PERCPU_HASH || + map->map_type == BPF_MAP_TYPE_LRU_PERCPU_HASH || + map->map_type == BPF_MAP_TYPE_PERCPU_ARRAY || + map->map_type == BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE) return round_up(map->value_size, 8) * num_possible_cpus(); else if (IS_FD_MAP(map)) return sizeof(u32); @@ -1729,7 +1731,7 @@ static int map_lookup_elem(union bpf_attr *attr) if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ)) return -EPERM; - err = bpf_map_check_op_flags(map, attr->flags, BPF_F_LOCK); + err = bpf_map_check_op_flags(map, attr->flags, BPF_F_LOCK | BPF_F_CPU); if (err) return err; @@ -1737,7 +1739,7 @@ static int map_lookup_elem(union bpf_attr *attr) if (IS_ERR(key)) return PTR_ERR(key); - value_size = bpf_map_value_size(map); + value_size = bpf_map_value_size(map, attr->flags); err = -ENOMEM; value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN); @@ -1804,7 +1806,7 @@ static int map_update_elem(union bpf_attr *attr, bpfptr_t uattr) goto err_put; } - value_size = bpf_map_value_size(map); + value_size = bpf_map_value_size(map, attr->flags); value = kvmemdup_bpfptr(uvalue, value_size); if (IS_ERR(value)) { err = PTR_ERR(value); @@ -2000,11 +2002,12 @@ int generic_map_update_batch(struct bpf_map *map, struct file *map_file, void *key, *value; int err = 0; - err = bpf_map_check_op_flags(map, attr->batch.elem_flags, BPF_F_LOCK); + err = bpf_map_check_op_flags(map, attr->batch.elem_flags, + BPF_F_LOCK | BPF_F_CPU | BPF_F_ALL_CPUS); if (err) return err; - value_size = bpf_map_value_size(map); + value_size = bpf_map_value_size(map, attr->batch.elem_flags); max_count = attr->batch.count; if (!max_count) @@ -2059,11 +2062,11 @@ int generic_map_lookup_batch(struct bpf_map *map, u32 value_size, cp, max_count; int err; - err = bpf_map_check_op_flags(map, attr->batch.elem_flags, BPF_F_LOCK); + err = bpf_map_check_op_flags(map, attr->batch.elem_flags, BPF_F_LOCK | BPF_F_CPU); if (err) return err; - value_size = bpf_map_value_size(map); + value_size = bpf_map_value_size(map, attr->batch.elem_flags); max_count = attr->batch.count; if (!max_count) @@ -2185,7 +2188,7 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr) goto err_put; } - value_size = bpf_map_value_size(map); + value_size = bpf_map_value_size(map, 0); err = -ENOMEM; value = kvmalloc(value_size, GFP_USER | __GFP_NOWARN); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 6b92b0847ec2..b816bc53d2e1 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1384,6 +1384,8 @@ enum { BPF_NOEXIST = 1, /* create new element if it didn't exist */ BPF_EXIST = 2, /* update existing element */ BPF_F_LOCK = 4, /* spin_lock-ed map_lookup/map_update */ + BPF_F_CPU = 8, /* cpu flag for percpu maps, upper 32-bit of flags is a cpu number */ + BPF_F_ALL_CPUS = 16, /* update value across all CPUs for percpu maps */ }; /* flags for BPF_MAP_CREATE command */ -- cgit v1.2.3 From edaf30743185f6ed8e29dcb2f1d01e183c0b807b Mon Sep 17 00:00:00 2001 From: Daniel Palmer Date: Mon, 5 Jan 2026 11:36:27 +0900 Subject: tools/nolibc: Add fread() to stdio.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a very basic version of fread() like we already have for fwrite(). Signed-off-by: Daniel Palmer Link: https://patch.msgid.link/20260105023629.1502801-2-daniel@thingy.jp Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/stdio.h | 34 +++++++++++++++++++++++++++++++++- 1 file changed, 33 insertions(+), 1 deletion(-) (limited to 'tools/include') diff --git a/tools/include/nolibc/stdio.h b/tools/include/nolibc/stdio.h index 1f16dab2ac88..6904252df97d 100644 --- a/tools/include/nolibc/stdio.h +++ b/tools/include/nolibc/stdio.h @@ -170,7 +170,7 @@ int putchar(int c) } -/* fwrite(), puts(), fputs(). Note that puts() emits '\n' but not fputs(). */ +/* fwrite(), fread(), puts(), fputs(). Note that puts() emits '\n' but not fputs(). */ /* internal fwrite()-like function which only takes a size and returns 0 on * success or EOF on error. It automatically retries on short writes. @@ -204,6 +204,38 @@ size_t fwrite(const void *s, size_t size, size_t nmemb, FILE *stream) return written; } +/* internal fread()-like function which only takes a size and returns 0 on + * success or EOF on error. It automatically retries on short reads. + */ +static __attribute__((unused)) +int _fread(void *buf, size_t size, FILE *stream) +{ + int fd = fileno(stream); + ssize_t ret; + + while (size) { + ret = read(fd, buf, size); + if (ret <= 0) + return EOF; + size -= ret; + buf += ret; + } + return 0; +} + +static __attribute__((unused)) +size_t fread(void *s, size_t size, size_t nmemb, FILE *stream) +{ + size_t nread; + + for (nread = 0; nread < nmemb; nread++) { + if (_fread(s, size, stream) != 0) + break; + s += size; + } + return nread; +} + static __attribute__((unused)) int fputs(const char *s, FILE *stream) { -- cgit v1.2.3 From 109770cc81680b802ee983b09b61c3979240fd09 Mon Sep 17 00:00:00 2001 From: Daniel Palmer Date: Mon, 5 Jan 2026 11:36:28 +0900 Subject: tools/nolibc: Add fseek() to stdio.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A very basic wrapper around lseek() that implements fseek(). Signed-off-by: Daniel Palmer Link: https://patch.msgid.link/20260105023629.1502801-3-daniel@thingy.jp Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/stdio.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'tools/include') diff --git a/tools/include/nolibc/stdio.h b/tools/include/nolibc/stdio.h index 6904252df97d..233318b0d0f0 100644 --- a/tools/include/nolibc/stdio.h +++ b/tools/include/nolibc/stdio.h @@ -272,6 +272,25 @@ char *fgets(char *s, int size, FILE *stream) } +/* fseek */ +static __attribute__((unused)) +int fseek(FILE *stream, long offset, int whence) +{ + int fd = fileno(stream); + off_t ret; + + ret = lseek(fd, offset, whence); + + /* lseek() and fseek() differ in that lseek returns the new + * position or -1, fseek() returns either 0 or -1. + */ + if (ret >= 0) + return 0; + + return -1; +} + + /* minimal printf(). It supports the following formats: * - %[l*]{d,u,c,x,p} * - %s -- cgit v1.2.3 From 602544773763da411ffa67567fa1d146f3a40231 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Mon, 12 Jan 2026 16:31:09 -0800 Subject: uapi: promote EFSCORRUPTED and EUCLEAN to errno.h Stop definining these privately and instead move them to the uapi errno.h so that they become canonical instead of copy pasta. Cc: linux-api@vger.kernel.org Signed-off-by: Darrick J. Wong Link: https://patch.msgid.link/176826402587.3490369.17659117524205214600.stgit@frogsfrogsfrogs Reviewed-by: Gao Xiang Reviewed-by: Christoph Hellwig Reviewed-by: Jan Kara Signed-off-by: Christian Brauner --- arch/alpha/include/uapi/asm/errno.h | 2 ++ arch/mips/include/uapi/asm/errno.h | 2 ++ arch/parisc/include/uapi/asm/errno.h | 2 ++ arch/sparc/include/uapi/asm/errno.h | 2 ++ fs/erofs/internal.h | 2 -- fs/ext2/ext2.h | 1 - fs/ext4/ext4.h | 3 --- fs/f2fs/f2fs.h | 3 --- fs/minix/minix.h | 2 -- fs/udf/udf_sb.h | 2 -- fs/xfs/xfs_linux.h | 2 -- include/linux/jbd2.h | 3 --- include/uapi/asm-generic/errno.h | 2 ++ tools/arch/alpha/include/uapi/asm/errno.h | 2 ++ tools/arch/mips/include/uapi/asm/errno.h | 2 ++ tools/arch/parisc/include/uapi/asm/errno.h | 2 ++ tools/arch/sparc/include/uapi/asm/errno.h | 2 ++ tools/include/uapi/asm-generic/errno.h | 2 ++ 18 files changed, 20 insertions(+), 18 deletions(-) (limited to 'tools/include') diff --git a/arch/alpha/include/uapi/asm/errno.h b/arch/alpha/include/uapi/asm/errno.h index 3d265f6babaf..6791f6508632 100644 --- a/arch/alpha/include/uapi/asm/errno.h +++ b/arch/alpha/include/uapi/asm/errno.h @@ -55,6 +55,7 @@ #define ENOSR 82 /* Out of streams resources */ #define ETIME 83 /* Timer expired */ #define EBADMSG 84 /* Not a data message */ +#define EFSBADCRC EBADMSG /* Bad CRC detected */ #define EPROTO 85 /* Protocol error */ #define ENODATA 86 /* No data available */ #define ENOSTR 87 /* Device not a stream */ @@ -96,6 +97,7 @@ #define EREMCHG 115 /* Remote address changed */ #define EUCLEAN 117 /* Structure needs cleaning */ +#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ #define ENOTNAM 118 /* Not a XENIX named type file */ #define ENAVAIL 119 /* No XENIX semaphores available */ #define EISNAM 120 /* Is a named type file */ diff --git a/arch/mips/include/uapi/asm/errno.h b/arch/mips/include/uapi/asm/errno.h index 2fb714e2d6d8..c01ed91b1ef4 100644 --- a/arch/mips/include/uapi/asm/errno.h +++ b/arch/mips/include/uapi/asm/errno.h @@ -50,6 +50,7 @@ #define EDOTDOT 73 /* RFS specific error */ #define EMULTIHOP 74 /* Multihop attempted */ #define EBADMSG 77 /* Not a data message */ +#define EFSBADCRC EBADMSG /* Bad CRC detected */ #define ENAMETOOLONG 78 /* File name too long */ #define EOVERFLOW 79 /* Value too large for defined data type */ #define ENOTUNIQ 80 /* Name not unique on network */ @@ -88,6 +89,7 @@ #define EISCONN 133 /* Transport endpoint is already connected */ #define ENOTCONN 134 /* Transport endpoint is not connected */ #define EUCLEAN 135 /* Structure needs cleaning */ +#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ #define ENOTNAM 137 /* Not a XENIX named type file */ #define ENAVAIL 138 /* No XENIX semaphores available */ #define EISNAM 139 /* Is a named type file */ diff --git a/arch/parisc/include/uapi/asm/errno.h b/arch/parisc/include/uapi/asm/errno.h index 8d94739d75c6..8cbc07c1903e 100644 --- a/arch/parisc/include/uapi/asm/errno.h +++ b/arch/parisc/include/uapi/asm/errno.h @@ -36,6 +36,7 @@ #define EDOTDOT 66 /* RFS specific error */ #define EBADMSG 67 /* Not a data message */ +#define EFSBADCRC EBADMSG /* Bad CRC detected */ #define EUSERS 68 /* Too many users */ #define EDQUOT 69 /* Quota exceeded */ #define ESTALE 70 /* Stale file handle */ @@ -62,6 +63,7 @@ #define ERESTART 175 /* Interrupted system call should be restarted */ #define ESTRPIPE 176 /* Streams pipe error */ #define EUCLEAN 177 /* Structure needs cleaning */ +#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ #define ENOTNAM 178 /* Not a XENIX named type file */ #define ENAVAIL 179 /* No XENIX semaphores available */ #define EISNAM 180 /* Is a named type file */ diff --git a/arch/sparc/include/uapi/asm/errno.h b/arch/sparc/include/uapi/asm/errno.h index 81a732b902ee..4a41e7835fd5 100644 --- a/arch/sparc/include/uapi/asm/errno.h +++ b/arch/sparc/include/uapi/asm/errno.h @@ -48,6 +48,7 @@ #define ENOSR 74 /* Out of streams resources */ #define ENOMSG 75 /* No message of desired type */ #define EBADMSG 76 /* Not a data message */ +#define EFSBADCRC EBADMSG /* Bad CRC detected */ #define EIDRM 77 /* Identifier removed */ #define EDEADLK 78 /* Resource deadlock would occur */ #define ENOLCK 79 /* No record locks available */ @@ -91,6 +92,7 @@ #define ENOTUNIQ 115 /* Name not unique on network */ #define ERESTART 116 /* Interrupted syscall should be restarted */ #define EUCLEAN 117 /* Structure needs cleaning */ +#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ #define ENOTNAM 118 /* Not a XENIX named type file */ #define ENAVAIL 119 /* No XENIX semaphores available */ #define EISNAM 120 /* Is a named type file */ diff --git a/fs/erofs/internal.h b/fs/erofs/internal.h index f7f622836198..d06e99baf5d5 100644 --- a/fs/erofs/internal.h +++ b/fs/erofs/internal.h @@ -541,6 +541,4 @@ long erofs_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); long erofs_compat_ioctl(struct file *filp, unsigned int cmd, unsigned long arg); -#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ - #endif /* __EROFS_INTERNAL_H */ diff --git a/fs/ext2/ext2.h b/fs/ext2/ext2.h index cf97b76e9fd3..5e0c6c5fcb6c 100644 --- a/fs/ext2/ext2.h +++ b/fs/ext2/ext2.h @@ -357,7 +357,6 @@ struct ext2_inode { */ #define EXT2_VALID_FS 0x0001 /* Unmounted cleanly */ #define EXT2_ERROR_FS 0x0002 /* Errors detected */ -#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ /* * Mount flags diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h index 56112f201cac..62c091b52bac 100644 --- a/fs/ext4/ext4.h +++ b/fs/ext4/ext4.h @@ -3938,7 +3938,4 @@ extern int ext4_block_write_begin(handle_t *handle, struct folio *folio, get_block_t *get_block); #endif /* __KERNEL__ */ -#define EFSBADCRC EBADMSG /* Bad CRC detected */ -#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ - #endif /* _EXT4_H */ diff --git a/fs/f2fs/f2fs.h b/fs/f2fs/f2fs.h index 20edbb99b814..9f3aa3c7f126 100644 --- a/fs/f2fs/f2fs.h +++ b/fs/f2fs/f2fs.h @@ -5004,7 +5004,4 @@ static inline void f2fs_invalidate_internal_cache(struct f2fs_sb_info *sbi, f2fs_invalidate_compress_pages_range(sbi, blkaddr, len); } -#define EFSBADCRC EBADMSG /* Bad CRC detected */ -#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ - #endif /* _LINUX_F2FS_H */ diff --git a/fs/minix/minix.h b/fs/minix/minix.h index 2bfaf377f208..7e1f652f16d3 100644 --- a/fs/minix/minix.h +++ b/fs/minix/minix.h @@ -175,6 +175,4 @@ static inline int minix_test_bit(int nr, const void *vaddr) __minix_error_inode((inode), __func__, __LINE__, \ (fmt), ##__VA_ARGS__) -#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ - #endif /* FS_MINIX_H */ diff --git a/fs/udf/udf_sb.h b/fs/udf/udf_sb.h index 08ec8756b948..8399accc788d 100644 --- a/fs/udf/udf_sb.h +++ b/fs/udf/udf_sb.h @@ -55,8 +55,6 @@ #define MF_DUPLICATE_MD 0x01 #define MF_MIRROR_FE_LOADED 0x02 -#define EFSCORRUPTED EUCLEAN - struct udf_meta_data { __u32 s_meta_file_loc; __u32 s_mirror_file_loc; diff --git a/fs/xfs/xfs_linux.h b/fs/xfs/xfs_linux.h index 4dd747bdbcca..55064228c4d5 100644 --- a/fs/xfs/xfs_linux.h +++ b/fs/xfs/xfs_linux.h @@ -121,8 +121,6 @@ typedef __u32 xfs_nlink_t; #define ENOATTR ENODATA /* Attribute not found */ #define EWRONGFS EINVAL /* Mount with wrong filesystem type */ -#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ -#define EFSBADCRC EBADMSG /* Bad CRC detected */ #define __return_address __builtin_return_address(0) diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index f5eaf76198f3..a53a00d36228 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1815,7 +1815,4 @@ static inline int jbd2_handle_buffer_credits(handle_t *handle) #endif /* __KERNEL__ */ -#define EFSBADCRC EBADMSG /* Bad CRC detected */ -#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ - #endif /* _LINUX_JBD2_H */ diff --git a/include/uapi/asm-generic/errno.h b/include/uapi/asm-generic/errno.h index cf9c51ac49f9..92e7ae493ee3 100644 --- a/include/uapi/asm-generic/errno.h +++ b/include/uapi/asm-generic/errno.h @@ -55,6 +55,7 @@ #define EMULTIHOP 72 /* Multihop attempted */ #define EDOTDOT 73 /* RFS specific error */ #define EBADMSG 74 /* Not a data message */ +#define EFSBADCRC EBADMSG /* Bad CRC detected */ #define EOVERFLOW 75 /* Value too large for defined data type */ #define ENOTUNIQ 76 /* Name not unique on network */ #define EBADFD 77 /* File descriptor in bad state */ @@ -98,6 +99,7 @@ #define EINPROGRESS 115 /* Operation now in progress */ #define ESTALE 116 /* Stale file handle */ #define EUCLEAN 117 /* Structure needs cleaning */ +#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ #define ENOTNAM 118 /* Not a XENIX named type file */ #define ENAVAIL 119 /* No XENIX semaphores available */ #define EISNAM 120 /* Is a named type file */ diff --git a/tools/arch/alpha/include/uapi/asm/errno.h b/tools/arch/alpha/include/uapi/asm/errno.h index 3d265f6babaf..6791f6508632 100644 --- a/tools/arch/alpha/include/uapi/asm/errno.h +++ b/tools/arch/alpha/include/uapi/asm/errno.h @@ -55,6 +55,7 @@ #define ENOSR 82 /* Out of streams resources */ #define ETIME 83 /* Timer expired */ #define EBADMSG 84 /* Not a data message */ +#define EFSBADCRC EBADMSG /* Bad CRC detected */ #define EPROTO 85 /* Protocol error */ #define ENODATA 86 /* No data available */ #define ENOSTR 87 /* Device not a stream */ @@ -96,6 +97,7 @@ #define EREMCHG 115 /* Remote address changed */ #define EUCLEAN 117 /* Structure needs cleaning */ +#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ #define ENOTNAM 118 /* Not a XENIX named type file */ #define ENAVAIL 119 /* No XENIX semaphores available */ #define EISNAM 120 /* Is a named type file */ diff --git a/tools/arch/mips/include/uapi/asm/errno.h b/tools/arch/mips/include/uapi/asm/errno.h index 2fb714e2d6d8..c01ed91b1ef4 100644 --- a/tools/arch/mips/include/uapi/asm/errno.h +++ b/tools/arch/mips/include/uapi/asm/errno.h @@ -50,6 +50,7 @@ #define EDOTDOT 73 /* RFS specific error */ #define EMULTIHOP 74 /* Multihop attempted */ #define EBADMSG 77 /* Not a data message */ +#define EFSBADCRC EBADMSG /* Bad CRC detected */ #define ENAMETOOLONG 78 /* File name too long */ #define EOVERFLOW 79 /* Value too large for defined data type */ #define ENOTUNIQ 80 /* Name not unique on network */ @@ -88,6 +89,7 @@ #define EISCONN 133 /* Transport endpoint is already connected */ #define ENOTCONN 134 /* Transport endpoint is not connected */ #define EUCLEAN 135 /* Structure needs cleaning */ +#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ #define ENOTNAM 137 /* Not a XENIX named type file */ #define ENAVAIL 138 /* No XENIX semaphores available */ #define EISNAM 139 /* Is a named type file */ diff --git a/tools/arch/parisc/include/uapi/asm/errno.h b/tools/arch/parisc/include/uapi/asm/errno.h index 8d94739d75c6..8cbc07c1903e 100644 --- a/tools/arch/parisc/include/uapi/asm/errno.h +++ b/tools/arch/parisc/include/uapi/asm/errno.h @@ -36,6 +36,7 @@ #define EDOTDOT 66 /* RFS specific error */ #define EBADMSG 67 /* Not a data message */ +#define EFSBADCRC EBADMSG /* Bad CRC detected */ #define EUSERS 68 /* Too many users */ #define EDQUOT 69 /* Quota exceeded */ #define ESTALE 70 /* Stale file handle */ @@ -62,6 +63,7 @@ #define ERESTART 175 /* Interrupted system call should be restarted */ #define ESTRPIPE 176 /* Streams pipe error */ #define EUCLEAN 177 /* Structure needs cleaning */ +#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ #define ENOTNAM 178 /* Not a XENIX named type file */ #define ENAVAIL 179 /* No XENIX semaphores available */ #define EISNAM 180 /* Is a named type file */ diff --git a/tools/arch/sparc/include/uapi/asm/errno.h b/tools/arch/sparc/include/uapi/asm/errno.h index 81a732b902ee..4a41e7835fd5 100644 --- a/tools/arch/sparc/include/uapi/asm/errno.h +++ b/tools/arch/sparc/include/uapi/asm/errno.h @@ -48,6 +48,7 @@ #define ENOSR 74 /* Out of streams resources */ #define ENOMSG 75 /* No message of desired type */ #define EBADMSG 76 /* Not a data message */ +#define EFSBADCRC EBADMSG /* Bad CRC detected */ #define EIDRM 77 /* Identifier removed */ #define EDEADLK 78 /* Resource deadlock would occur */ #define ENOLCK 79 /* No record locks available */ @@ -91,6 +92,7 @@ #define ENOTUNIQ 115 /* Name not unique on network */ #define ERESTART 116 /* Interrupted syscall should be restarted */ #define EUCLEAN 117 /* Structure needs cleaning */ +#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ #define ENOTNAM 118 /* Not a XENIX named type file */ #define ENAVAIL 119 /* No XENIX semaphores available */ #define EISNAM 120 /* Is a named type file */ diff --git a/tools/include/uapi/asm-generic/errno.h b/tools/include/uapi/asm-generic/errno.h index cf9c51ac49f9..92e7ae493ee3 100644 --- a/tools/include/uapi/asm-generic/errno.h +++ b/tools/include/uapi/asm-generic/errno.h @@ -55,6 +55,7 @@ #define EMULTIHOP 72 /* Multihop attempted */ #define EDOTDOT 73 /* RFS specific error */ #define EBADMSG 74 /* Not a data message */ +#define EFSBADCRC EBADMSG /* Bad CRC detected */ #define EOVERFLOW 75 /* Value too large for defined data type */ #define ENOTUNIQ 76 /* Name not unique on network */ #define EBADFD 77 /* File descriptor in bad state */ @@ -98,6 +99,7 @@ #define EINPROGRESS 115 /* Operation now in progress */ #define ESTALE 116 /* Stale file handle */ #define EUCLEAN 117 /* Structure needs cleaning */ +#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ #define ENOTNAM 118 /* Not a XENIX named type file */ #define ENAVAIL 119 /* No XENIX semaphores available */ #define EISNAM 120 /* Is a named type file */ -- cgit v1.2.3 From 1d7cf255eefbb479d0eea9aa3b6372a1e52f8c62 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 16 Oct 2025 13:51:25 -0700 Subject: tools headers: Update the linux/unaligned.h copy with the kernel sources To pick up the changes in: vdso: Switch get/put_unaligned() from packed struct to memcpy As the code is dependent on __unqual_scalar_typeof, update also the tools version of compiler_types.h to include this. Signed-off-by: Ian Rogers Signed-off-by: Thomas Gleixner Link: https://patch.msgid.link/20251016205126.2882625-4-irogers@google.com --- tools/include/linux/compiler_types.h | 22 +++++++++++++++++++ tools/include/vdso/unaligned.h | 41 ++++++++++++++++++++++++++++++------ 2 files changed, 57 insertions(+), 6 deletions(-) (limited to 'tools/include') diff --git a/tools/include/linux/compiler_types.h b/tools/include/linux/compiler_types.h index d09f9dc172a4..890982283a5e 100644 --- a/tools/include/linux/compiler_types.h +++ b/tools/include/linux/compiler_types.h @@ -40,4 +40,26 @@ #define asm_goto_output(x...) asm goto(x) #endif +/* + * __unqual_scalar_typeof(x) - Declare an unqualified scalar type, leaving + * non-scalar types unchanged. + */ +/* + * Prefer C11 _Generic for better compile-times and simpler code. Note: 'char' + * is not type-compatible with 'signed char', and we define a separate case. + */ +#define __scalar_type_to_expr_cases(type) \ + unsigned type: (unsigned type)0, \ + signed type: (signed type)0 + +#define __unqual_scalar_typeof(x) typeof( \ + _Generic((x), \ + char: (char)0, \ + __scalar_type_to_expr_cases(char), \ + __scalar_type_to_expr_cases(short), \ + __scalar_type_to_expr_cases(int), \ + __scalar_type_to_expr_cases(long), \ + __scalar_type_to_expr_cases(long long), \ + default: (x))) + #endif /* __LINUX_COMPILER_TYPES_H */ diff --git a/tools/include/vdso/unaligned.h b/tools/include/vdso/unaligned.h index ff0c06b6513e..9076483c9fbb 100644 --- a/tools/include/vdso/unaligned.h +++ b/tools/include/vdso/unaligned.h @@ -2,14 +2,43 @@ #ifndef __VDSO_UNALIGNED_H #define __VDSO_UNALIGNED_H -#define __get_unaligned_t(type, ptr) ({ \ - const struct { type x; } __packed * __get_pptr = (typeof(__get_pptr))(ptr); \ - __get_pptr->x; \ +#include + +/** + * __get_unaligned_t - read an unaligned value from memory. + * @type: the type to load from the pointer. + * @ptr: the pointer to load from. + * + * Use memcpy to affect an unaligned type sized load avoiding undefined behavior + * from approaches like type punning that require -fno-strict-aliasing in order + * to be correct. As type may be const, use __unqual_scalar_typeof to map to a + * non-const type - you can't memcpy into a const type. The + * __get_unaligned_ctrl_type gives __unqual_scalar_typeof its required + * expression rather than type, a pointer is used to avoid warnings about mixing + * the use of 0 and NULL. The void* cast silences ubsan warnings. + */ +#define __get_unaligned_t(type, ptr) ({ \ + type *__get_unaligned_ctrl_type __always_unused = NULL; \ + __unqual_scalar_typeof(*__get_unaligned_ctrl_type) __get_unaligned_val; \ + __builtin_memcpy(&__get_unaligned_val, (void *)(ptr), \ + sizeof(__get_unaligned_val)); \ + __get_unaligned_val; \ }) -#define __put_unaligned_t(type, val, ptr) do { \ - struct { type x; } __packed * __put_pptr = (typeof(__put_pptr))(ptr); \ - __put_pptr->x = (val); \ +/** + * __put_unaligned_t - write an unaligned value to memory. + * @type: the type of the value to store. + * @val: the value to store. + * @ptr: the pointer to store to. + * + * Use memcpy to affect an unaligned type sized store avoiding undefined + * behavior from approaches like type punning that require -fno-strict-aliasing + * in order to be correct. The void* cast silences ubsan warnings. + */ +#define __put_unaligned_t(type, val, ptr) do { \ + type __put_unaligned_val = (val); \ + __builtin_memcpy((void *)(ptr), &__put_unaligned_val, \ + sizeof(__put_unaligned_val)); \ } while (0) #endif /* __VDSO_UNALIGNED_H */ -- cgit v1.2.3 From 10a62a0611f5544d209446acfde5beb7b27773c7 Mon Sep 17 00:00:00 2001 From: Ian Rogers Date: Thu, 16 Oct 2025 13:51:26 -0700 Subject: tools headers: Remove unneeded ignoring of warnings in unaligned.h Now that get/put_unaligned() use memcpy() the -Wpacked and -Wattributes warnings don't need disabling anymore. Signed-off-by: Ian Rogers Signed-off-by: Thomas Gleixner Link: https://patch.msgid.link/20251016205126.2882625-5-irogers@google.com --- tools/include/linux/unaligned.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'tools/include') diff --git a/tools/include/linux/unaligned.h b/tools/include/linux/unaligned.h index 395a4464fe73..d51ddafed138 100644 --- a/tools/include/linux/unaligned.h +++ b/tools/include/linux/unaligned.h @@ -6,9 +6,6 @@ * This is the most generic implementation of unaligned accesses * and should work almost anywhere. */ -#pragma GCC diagnostic push -#pragma GCC diagnostic ignored "-Wpacked" -#pragma GCC diagnostic ignored "-Wattributes" #include #define get_unaligned(ptr) __get_unaligned_t(typeof(*(ptr)), (ptr)) @@ -143,6 +140,5 @@ static inline u64 get_unaligned_be48(const void *p) { return __get_unaligned_be48(p); } -#pragma GCC diagnostic pop #endif /* __LINUX_UNALIGNED_H */ -- cgit v1.2.3 From d2bdcde9626cbea0c44a6aaa33b440c8adf81e09 Mon Sep 17 00:00:00 2001 From: Dapeng Mi Date: Wed, 14 Jan 2026 09:17:45 +0800 Subject: perf/x86/intel: Add support for PEBS memory auxiliary info field in DMR With the introduction of the OMR feature, the PEBS memory auxiliary info field for load and store latency events has been restructured for DMR. The memory auxiliary info field's bit[8] indicates whether a L2 cache miss occurred for a memory load or store instruction. If bit[8] is 0, it signifies no L2 cache miss, and bits[7:0] specify the exact cache data source (up to the L2 cache level). If bit[8] is 1, bits[7:0] represent the OMR encoding, indicating the specific L3 cache or memory region involved in the memory access. A significant enhancement is OMR encoding provides up to 8 fine-grained memory regions besides the cache region. A significant enhancement for OMR encoding is the ability to provide up to 8 fine-grained memory regions in addition to the cache region, offering more detailed insights into memory access regions. For detailed information on the memory auxiliary info encoding, please refer to section 16.2 "PEBS LOAD LATENCY AND STORE LATENCY FACILITY" in the ISE documentation. This patch ensures that the PEBS memory auxiliary info field is correctly interpreted and utilized in DMR. Signed-off-by: Dapeng Mi Signed-off-by: Peter Zijlstra (Intel) Link: https://patch.msgid.link/20260114011750.350569-3-dapeng1.mi@linux.intel.com --- arch/x86/events/intel/ds.c | 140 ++++++++++++++++++++++++++++++++++ arch/x86/events/perf_event.h | 2 + include/uapi/linux/perf_event.h | 27 ++++++- tools/include/uapi/linux/perf_event.h | 27 ++++++- 4 files changed, 190 insertions(+), 6 deletions(-) (limited to 'tools/include') diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index feb1c3cf63e4..272e652f25fc 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -34,6 +34,17 @@ struct pebs_record_32 { */ +union omr_encoding { + struct { + u8 omr_source : 4; + u8 omr_remote : 1; + u8 omr_hitm : 1; + u8 omr_snoop : 1; + u8 omr_promoted : 1; + }; + u8 omr_full; +}; + union intel_x86_pebs_dse { u64 val; struct { @@ -73,6 +84,18 @@ union intel_x86_pebs_dse { unsigned int lnc_addr_blk:1; unsigned int ld_reserved6:18; }; + struct { + unsigned int pnc_dse: 8; + unsigned int pnc_l2_miss:1; + unsigned int pnc_stlb_clean_hit:1; + unsigned int pnc_stlb_any_hit:1; + unsigned int pnc_stlb_miss:1; + unsigned int pnc_locked:1; + unsigned int pnc_data_blk:1; + unsigned int pnc_addr_blk:1; + unsigned int pnc_fb_full:1; + unsigned int ld_reserved8:16; + }; }; @@ -228,6 +251,85 @@ void __init intel_pmu_pebs_data_source_lnl(void) __intel_pmu_pebs_data_source_cmt(data_source); } +/* Version for Panthercove and later */ + +/* L2 hit */ +#define PNC_PEBS_DATA_SOURCE_MAX 16 +static u64 pnc_pebs_l2_hit_data_source[PNC_PEBS_DATA_SOURCE_MAX] = { + P(OP, LOAD) | P(LVL, NA) | LEVEL(NA) | P(SNOOP, NA), /* 0x00: non-cache access */ + OP_LH | LEVEL(L0) | P(SNOOP, NONE), /* 0x01: L0 hit */ + OP_LH | P(LVL, L1) | LEVEL(L1) | P(SNOOP, NONE), /* 0x02: L1 hit */ + OP_LH | P(LVL, LFB) | LEVEL(LFB) | P(SNOOP, NONE), /* 0x03: L1 Miss Handling Buffer hit */ + OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, NONE), /* 0x04: L2 Hit Clean */ + 0, /* 0x05: Reserved */ + 0, /* 0x06: Reserved */ + OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, HIT), /* 0x07: L2 Hit Snoop HIT */ + OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, HITM), /* 0x08: L2 Hit Snoop Hit Modified */ + OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, MISS), /* 0x09: Prefetch Promotion */ + OP_LH | P(LVL, L2) | LEVEL(L2) | P(SNOOP, MISS), /* 0x0a: Cross Core Prefetch Promotion */ + 0, /* 0x0b: Reserved */ + 0, /* 0x0c: Reserved */ + 0, /* 0x0d: Reserved */ + 0, /* 0x0e: Reserved */ + OP_LH | P(LVL, UNC) | LEVEL(NA) | P(SNOOP, NONE), /* 0x0f: uncached */ +}; + +/* L2 miss */ +#define OMR_DATA_SOURCE_MAX 16 +static u64 omr_data_source[OMR_DATA_SOURCE_MAX] = { + P(OP, LOAD) | P(LVL, NA) | LEVEL(NA) | P(SNOOP, NA), /* 0x00: invalid */ + 0, /* 0x01: Reserved */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(REGION, L_SHARE), /* 0x02: local CA shared cache */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(REGION, L_NON_SHARE),/* 0x03: local CA non-shared cache */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(REGION, O_IO), /* 0x04: other CA IO agent */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(REGION, O_SHARE), /* 0x05: other CA shared cache */ + OP_LH | P(LVL, L3) | LEVEL(L3) | P(REGION, O_NON_SHARE),/* 0x06: other CA non-shared cache */ + OP_LH | LEVEL(RAM) | P(REGION, MMIO), /* 0x07: MMIO */ + OP_LH | LEVEL(RAM) | P(REGION, MEM0), /* 0x08: Memory region 0 */ + OP_LH | LEVEL(RAM) | P(REGION, MEM1), /* 0x09: Memory region 1 */ + OP_LH | LEVEL(RAM) | P(REGION, MEM2), /* 0x0a: Memory region 2 */ + OP_LH | LEVEL(RAM) | P(REGION, MEM3), /* 0x0b: Memory region 3 */ + OP_LH | LEVEL(RAM) | P(REGION, MEM4), /* 0x0c: Memory region 4 */ + OP_LH | LEVEL(RAM) | P(REGION, MEM5), /* 0x0d: Memory region 5 */ + OP_LH | LEVEL(RAM) | P(REGION, MEM6), /* 0x0e: Memory region 6 */ + OP_LH | LEVEL(RAM) | P(REGION, MEM7), /* 0x0f: Memory region 7 */ +}; + +static u64 parse_omr_data_source(u8 dse) +{ + union omr_encoding omr; + u64 val = 0; + + omr.omr_full = dse; + val = omr_data_source[omr.omr_source]; + if (omr.omr_source > 0x1 && omr.omr_source < 0x7) + val |= omr.omr_remote ? P(LVL, REM_CCE1) : 0; + else if (omr.omr_source > 0x7) + val |= omr.omr_remote ? P(LVL, REM_RAM1) : P(LVL, LOC_RAM); + + if (omr.omr_remote) + val |= REM; + + val |= omr.omr_hitm ? P(SNOOP, HITM) : P(SNOOP, HIT); + + if (omr.omr_source == 0x2) { + u8 snoop = omr.omr_snoop | omr.omr_promoted; + + if (snoop == 0x0) + val |= P(SNOOP, NA); + else if (snoop == 0x1) + val |= P(SNOOP, MISS); + else if (snoop == 0x2) + val |= P(SNOOP, HIT); + else if (snoop == 0x3) + val |= P(SNOOP, NONE); + } else if (omr.omr_source > 0x2 && omr.omr_source < 0x7) { + val |= omr.omr_snoop ? P(SNOOPX, FWD) : 0; + } + + return val; +} + static u64 precise_store_data(u64 status) { union intel_x86_pebs_dse dse; @@ -411,6 +513,44 @@ u64 arl_h_latency_data(struct perf_event *event, u64 status) return lnl_latency_data(event, status); } +u64 pnc_latency_data(struct perf_event *event, u64 status) +{ + union intel_x86_pebs_dse dse; + union perf_mem_data_src src; + u64 val; + + dse.val = status; + + if (!dse.pnc_l2_miss) + val = pnc_pebs_l2_hit_data_source[dse.pnc_dse & 0xf]; + else + val = parse_omr_data_source(dse.pnc_dse); + + if (!val) + val = P(OP, LOAD) | LEVEL(NA) | P(SNOOP, NA); + + if (dse.pnc_stlb_miss) + val |= P(TLB, MISS) | P(TLB, L2); + else + val |= P(TLB, HIT) | P(TLB, L1) | P(TLB, L2); + + if (dse.pnc_locked) + val |= P(LOCK, LOCKED); + + if (dse.pnc_data_blk) + val |= P(BLK, DATA); + if (dse.pnc_addr_blk) + val |= P(BLK, ADDR); + if (!dse.pnc_data_blk && !dse.pnc_addr_blk) + val |= P(BLK, NA); + + src.val = val; + if (event->hw.flags & PERF_X86_EVENT_PEBS_ST_HSW) + src.mem_op = P(OP, STORE); + + return src.val; +} + static u64 load_latency_data(struct perf_event *event, u64 status) { union intel_x86_pebs_dse dse; diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 586e3fdfe6d8..bd501c2a0f73 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -1664,6 +1664,8 @@ u64 lnl_latency_data(struct perf_event *event, u64 status); u64 arl_h_latency_data(struct perf_event *event, u64 status); +u64 pnc_latency_data(struct perf_event *event, u64 status); + extern struct event_constraint intel_core2_pebs_event_constraints[]; extern struct event_constraint intel_atom_pebs_event_constraints[]; diff --git a/include/uapi/linux/perf_event.h b/include/uapi/linux/perf_event.h index c44a8fb3e418..533393ec94d0 100644 --- a/include/uapi/linux/perf_event.h +++ b/include/uapi/linux/perf_event.h @@ -1330,14 +1330,16 @@ union perf_mem_data_src { mem_snoopx : 2, /* Snoop mode, ext */ mem_blk : 3, /* Access blocked */ mem_hops : 3, /* Hop level */ - mem_rsvd : 18; + mem_region : 5, /* cache/memory regions */ + mem_rsvd : 13; }; }; #elif defined(__BIG_ENDIAN_BITFIELD) union perf_mem_data_src { __u64 val; struct { - __u64 mem_rsvd : 18, + __u64 mem_rsvd : 13, + mem_region : 5, /* cache/memory regions */ mem_hops : 3, /* Hop level */ mem_blk : 3, /* Access blocked */ mem_snoopx : 2, /* Snoop mode, ext */ @@ -1394,7 +1396,7 @@ union perf_mem_data_src { #define PERF_MEM_LVLNUM_L4 0x0004 /* L4 */ #define PERF_MEM_LVLNUM_L2_MHB 0x0005 /* L2 Miss Handling Buffer */ #define PERF_MEM_LVLNUM_MSC 0x0006 /* Memory-side Cache */ -/* 0x007 available */ +#define PERF_MEM_LVLNUM_L0 0x0007 /* L0 */ #define PERF_MEM_LVLNUM_UNC 0x0008 /* Uncached */ #define PERF_MEM_LVLNUM_CXL 0x0009 /* CXL */ #define PERF_MEM_LVLNUM_IO 0x000a /* I/O */ @@ -1447,6 +1449,25 @@ union perf_mem_data_src { /* 5-7 available */ #define PERF_MEM_HOPS_SHIFT 43 +/* Cache/Memory region */ +#define PERF_MEM_REGION_NA 0x0 /* Invalid */ +#define PERF_MEM_REGION_RSVD 0x01 /* Reserved */ +#define PERF_MEM_REGION_L_SHARE 0x02 /* Local CA shared cache */ +#define PERF_MEM_REGION_L_NON_SHARE 0x03 /* Local CA non-shared cache */ +#define PERF_MEM_REGION_O_IO 0x04 /* Other CA IO agent */ +#define PERF_MEM_REGION_O_SHARE 0x05 /* Other CA shared cache */ +#define PERF_MEM_REGION_O_NON_SHARE 0x06 /* Other CA non-shared cache */ +#define PERF_MEM_REGION_MMIO 0x07 /* MMIO */ +#define PERF_MEM_REGION_MEM0 0x08 /* Memory region 0 */ +#define PERF_MEM_REGION_MEM1 0x09 /* Memory region 1 */ +#define PERF_MEM_REGION_MEM2 0x0a /* Memory region 2 */ +#define PERF_MEM_REGION_MEM3 0x0b /* Memory region 3 */ +#define PERF_MEM_REGION_MEM4 0x0c /* Memory region 4 */ +#define PERF_MEM_REGION_MEM5 0x0d /* Memory region 5 */ +#define PERF_MEM_REGION_MEM6 0x0e /* Memory region 6 */ +#define PERF_MEM_REGION_MEM7 0x0f /* Memory region 7 */ +#define PERF_MEM_REGION_SHIFT 46 + #define PERF_MEM_S(a, s) \ (((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT) diff --git a/tools/include/uapi/linux/perf_event.h b/tools/include/uapi/linux/perf_event.h index c44a8fb3e418..d4b99610a3b0 100644 --- a/tools/include/uapi/linux/perf_event.h +++ b/tools/include/uapi/linux/perf_event.h @@ -1330,14 +1330,16 @@ union perf_mem_data_src { mem_snoopx : 2, /* Snoop mode, ext */ mem_blk : 3, /* Access blocked */ mem_hops : 3, /* Hop level */ - mem_rsvd : 18; + mem_region : 5, /* cache/memory regions */ + mem_rsvd : 13; }; }; #elif defined(__BIG_ENDIAN_BITFIELD) union perf_mem_data_src { __u64 val; struct { - __u64 mem_rsvd : 18, + __u64 mem_rsvd : 13, + mem_region : 5, /* cache/memory regions */ mem_hops : 3, /* Hop level */ mem_blk : 3, /* Access blocked */ mem_snoopx : 2, /* Snoop mode, ext */ @@ -1394,7 +1396,7 @@ union perf_mem_data_src { #define PERF_MEM_LVLNUM_L4 0x0004 /* L4 */ #define PERF_MEM_LVLNUM_L2_MHB 0x0005 /* L2 Miss Handling Buffer */ #define PERF_MEM_LVLNUM_MSC 0x0006 /* Memory-side Cache */ -/* 0x007 available */ +#define PERF_MEM_LVLNUM_L0 0x0007 /* L0 */ #define PERF_MEM_LVLNUM_UNC 0x0008 /* Uncached */ #define PERF_MEM_LVLNUM_CXL 0x0009 /* CXL */ #define PERF_MEM_LVLNUM_IO 0x000a /* I/O */ @@ -1447,6 +1449,25 @@ union perf_mem_data_src { /* 5-7 available */ #define PERF_MEM_HOPS_SHIFT 43 +/* Cache/Memory region */ +#define PERF_MEM_REGION_NA 0x0 /* Invalid */ +#define PERF_MEM_REGION_RSVD 0x01 /* Reserved */ +#define PERF_MEM_REGION_L_SHARE 0x02 /* Local CA shared cache */ +#define PERF_MEM_REGION_L_NON_SHARE 0x03 /* Local CA non-shared cache */ +#define PERF_MEM_REGION_O_IO 0x04 /* Other CA IO agent */ +#define PERF_MEM_REGION_O_SHARE 0x05 /* Other CA shared cache */ +#define PERF_MEM_REGION_O_NON_SHARE 0x06 /* Other CA non-shared cache */ +#define PERF_MEM_REGION_MMIO 0x07 /* MMIO */ +#define PERF_MEM_REGION_MEM0 0x08 /* Memory region 0 */ +#define PERF_MEM_REGION_MEM1 0x09 /* Memory region 1 */ +#define PERF_MEM_REGION_MEM2 0x0a /* Memory region 2 */ +#define PERF_MEM_REGION_MEM3 0x0b /* Memory region 3 */ +#define PERF_MEM_REGION_MEM4 0x0c /* Memory region 4 */ +#define PERF_MEM_REGION_MEM5 0x0d /* Memory region 5 */ +#define PERF_MEM_REGION_MEM6 0x0e /* Memory region 6 */ +#define PERF_MEM_REGION_MEM7 0x0f /* Memory region 7 */ +#define PERF_MEM_REGION_SHIFT 46 + #define PERF_MEM_S(a, s) \ (((__u64)PERF_MEM_##a##_##s) << PERF_MEM_##a##_SHIFT) -- cgit v1.2.3 From 73061dbeca783aaf311e1af9610f8cba1c1176cd Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 20 Jan 2026 21:11:44 +0000 Subject: selftests/io_uring: add io_uring_queue_init_params Add a ring init variant taking struct io_uring_params, which mimicks liburing API. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- tools/include/io_uring/mini_liburing.h | 25 +++++++++++++++++-------- 1 file changed, 17 insertions(+), 8 deletions(-) (limited to 'tools/include') diff --git a/tools/include/io_uring/mini_liburing.h b/tools/include/io_uring/mini_liburing.h index 9ccb16074eb5..a55407b09dbb 100644 --- a/tools/include/io_uring/mini_liburing.h +++ b/tools/include/io_uring/mini_liburing.h @@ -126,21 +126,18 @@ static inline int io_uring_enter(int fd, unsigned int to_submit, flags, sig, _NSIG / 8); } -static inline int io_uring_queue_init(unsigned int entries, - struct io_uring *ring, - unsigned int flags) +static inline int io_uring_queue_init_params(unsigned int entries, + struct io_uring *ring, + struct io_uring_params *p) { - struct io_uring_params p; int fd, ret; memset(ring, 0, sizeof(*ring)); - memset(&p, 0, sizeof(p)); - p.flags = flags; - fd = io_uring_setup(entries, &p); + fd = io_uring_setup(entries, p); if (fd < 0) return fd; - ret = io_uring_mmap(fd, &p, &ring->sq, &ring->cq); + ret = io_uring_mmap(fd, p, &ring->sq, &ring->cq); if (!ret) ring->ring_fd = fd; else @@ -148,6 +145,18 @@ static inline int io_uring_queue_init(unsigned int entries, return ret; } +static inline int io_uring_queue_init(unsigned int entries, + struct io_uring *ring, + unsigned int flags) +{ + struct io_uring_params p; + + memset(&p, 0, sizeof(p)); + p.flags = flags; + + return io_uring_queue_init_params(entries, ring, &p); +} + /* Get a sqe */ static inline struct io_uring_sqe *io_uring_get_sqe(struct io_uring *ring) { -- cgit v1.2.3 From 145e0074392587606aa5df353d0e761f0b8357d5 Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Tue, 20 Jan 2026 21:11:45 +0000 Subject: selftests/io_uring: support NO_SQARRAY in miniliburing Add support for IORING_SETUP_NO_SQARRAY in miniliburing. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- tools/include/io_uring/mini_liburing.h | 34 +++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) (limited to 'tools/include') diff --git a/tools/include/io_uring/mini_liburing.h b/tools/include/io_uring/mini_liburing.h index a55407b09dbb..44be4446feda 100644 --- a/tools/include/io_uring/mini_liburing.h +++ b/tools/include/io_uring/mini_liburing.h @@ -6,6 +6,7 @@ #include #include #include +#include struct io_sq_ring { unsigned int *head; @@ -55,6 +56,7 @@ struct io_uring { struct io_uring_sq sq; struct io_uring_cq cq; int ring_fd; + unsigned flags; }; #if defined(__x86_64) || defined(__i386__) @@ -72,7 +74,14 @@ static inline int io_uring_mmap(int fd, struct io_uring_params *p, void *ptr; int ret; - sq->ring_sz = p->sq_off.array + p->sq_entries * sizeof(unsigned int); + if (p->flags & IORING_SETUP_NO_SQARRAY) { + sq->ring_sz = p->cq_off.cqes; + sq->ring_sz += p->cq_entries * sizeof(struct io_uring_cqe); + } else { + sq->ring_sz = p->sq_off.array; + sq->ring_sz += p->sq_entries * sizeof(unsigned int); + } + ptr = mmap(0, sq->ring_sz, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, fd, IORING_OFF_SQ_RING); if (ptr == MAP_FAILED) @@ -83,7 +92,8 @@ static inline int io_uring_mmap(int fd, struct io_uring_params *p, sq->kring_entries = ptr + p->sq_off.ring_entries; sq->kflags = ptr + p->sq_off.flags; sq->kdropped = ptr + p->sq_off.dropped; - sq->array = ptr + p->sq_off.array; + if (!(p->flags & IORING_SETUP_NO_SQARRAY)) + sq->array = ptr + p->sq_off.array; size = p->sq_entries * sizeof(struct io_uring_sqe); sq->sqes = mmap(0, size, PROT_READ | PROT_WRITE, @@ -138,10 +148,12 @@ static inline int io_uring_queue_init_params(unsigned int entries, if (fd < 0) return fd; ret = io_uring_mmap(fd, p, &ring->sq, &ring->cq); - if (!ret) + if (!ret) { ring->ring_fd = fd; - else + ring->flags = p->flags; + } else { close(fd); + } return ret; } @@ -208,10 +220,18 @@ static inline int io_uring_submit(struct io_uring *ring) ktail = *sq->ktail; to_submit = sq->sqe_tail - sq->sqe_head; - for (submitted = 0; submitted < to_submit; submitted++) { - read_barrier(); - sq->array[ktail++ & mask] = sq->sqe_head++ & mask; + + if (!(ring->flags & IORING_SETUP_NO_SQARRAY)) { + for (submitted = 0; submitted < to_submit; submitted++) { + read_barrier(); + sq->array[ktail++ & mask] = sq->sqe_head++ & mask; + } + } else { + ktail += to_submit; + sq->sqe_head += to_submit; + submitted = to_submit; } + if (!submitted) return 0; -- cgit v1.2.3 From cb68cba4453d3e021b27c2a08fcefdd1376a5ef0 Mon Sep 17 00:00:00 2001 From: Swapnil Sapkal Date: Mon, 19 Jan 2026 17:58:23 +0000 Subject: tools/lib: Add list_is_first() Add list_is_first() to check whether @list is the first entry in list @head Signed-off-by: Swapnil Sapkal Tested-by: Chen Yu Acked-by: Ian Rogers Acked-by: Namhyung Kim Acked-by: Peter Zijlstra Cc: Adrian Hunter Cc: Alexander Shishkin Cc: Andi Kleen Cc: Anubhav Shelat Cc: Ben Gainey Cc: Blake Jones Cc: Chun-Tse Shao Cc: David Vernet Cc: Dmitriy Vyukov Cc: Dr. David Alan Gilbert Cc: Gautham Shenoy Cc: Graham Woodward Cc: Ingo Molnar Cc: James Clark Cc: Jiri Olsa Cc: Juri Lelli Cc: K Prateek Nayak Cc: Kan Liang Cc: Leo Yan Cc: Madadi Vineeth Reddy Cc: Mark Rutland Cc: Ravi Bangoria Cc: Sandipan Das Cc: Santosh Shukla Cc: Shrikanth Hegde Cc: Steven Rostedt (VMware) Cc: Tejun Heo Cc: Thomas Falcon Cc: Tim Chen Cc: Vincent Guittot Cc: Yang Jihong Cc: Yujie Liu Cc: Zhongqiu Han Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/list.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'tools/include') diff --git a/tools/include/linux/list.h b/tools/include/linux/list.h index a4dfb6a7cc6a..a692ff7aed5c 100644 --- a/tools/include/linux/list.h +++ b/tools/include/linux/list.h @@ -169,6 +169,16 @@ static inline void list_move_tail(struct list_head *list, list_add_tail(list, head); } +/** + * list_is_first -- tests whether @list is the first entry in list @head + * @list: the entry to test + * @head: the head of the list + */ +static inline int list_is_first(const struct list_head *list, const struct list_head *head) +{ + return list->prev == head; +} + /** * list_is_last - tests whether @list is the last entry in list @head * @list: the entry to test -- cgit v1.2.3 From 2d419c44658f75e7655794341a95c0687830f3df Mon Sep 17 00:00:00 2001 From: Menglong Dong Date: Sat, 24 Jan 2026 14:19:56 +0800 Subject: bpf: add fsession support The fsession is something that similar to kprobe session. It allow to attach a single BPF program to both the entry and the exit of the target functions. Introduce the struct bpf_fsession_link, which allows to add the link to both the fentry and fexit progs_hlist of the trampoline. Signed-off-by: Menglong Dong Co-developed-by: Leon Hwang Signed-off-by: Leon Hwang Link: https://lore.kernel.org/r/20260124062008.8657-2-dongml2@chinatelecom.cn Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 19 ++++++++ include/uapi/linux/bpf.h | 1 + kernel/bpf/btf.c | 2 + kernel/bpf/syscall.c | 18 +++++++- kernel/bpf/trampoline.c | 53 ++++++++++++++++++---- kernel/bpf/verifier.c | 12 +++-- net/bpf/test_run.c | 1 + net/core/bpf_sk_storage.c | 1 + tools/include/uapi/linux/bpf.h | 1 + .../selftests/bpf/prog_tests/tracing_failure.c | 2 +- 10 files changed, 97 insertions(+), 13 deletions(-) (limited to 'tools/include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 5936f8e2996f..41228b0add52 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -1309,6 +1309,7 @@ enum bpf_tramp_prog_type { BPF_TRAMP_MODIFY_RETURN, BPF_TRAMP_MAX, BPF_TRAMP_REPLACE, /* more than MAX */ + BPF_TRAMP_FSESSION, }; struct bpf_tramp_image { @@ -1875,6 +1876,11 @@ struct bpf_tracing_link { struct bpf_prog *tgt_prog; }; +struct bpf_fsession_link { + struct bpf_tracing_link link; + struct bpf_tramp_link fexit; +}; + struct bpf_raw_tp_link { struct bpf_link link; struct bpf_raw_event_map *btp; @@ -2169,6 +2175,19 @@ static inline void bpf_struct_ops_desc_release(struct bpf_struct_ops_desc *st_op #endif +static inline int bpf_fsession_cnt(struct bpf_tramp_links *links) +{ + struct bpf_tramp_links fentries = links[BPF_TRAMP_FENTRY]; + int cnt = 0; + + for (int i = 0; i < links[BPF_TRAMP_FENTRY].nr_links; i++) { + if (fentries.links[i]->link.prog->expected_attach_type == BPF_TRACE_FSESSION) + cnt++; + } + + return cnt; +} + int bpf_prog_ctx_arg_info_init(struct bpf_prog *prog, const struct bpf_ctx_arg_aux *info, u32 cnt); diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 2a2ade4be60f..44e7dbc278e3 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1145,6 +1145,7 @@ enum bpf_attach_type { BPF_NETKIT_PEER, BPF_TRACE_KPROBE_SESSION, BPF_TRACE_UPROBE_SESSION, + BPF_TRACE_FSESSION, __MAX_BPF_ATTACH_TYPE }; diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c index d10b3404260f..8959f3bc1e92 100644 --- a/kernel/bpf/btf.c +++ b/kernel/bpf/btf.c @@ -6219,6 +6219,7 @@ static int btf_validate_prog_ctx_type(struct bpf_verifier_log *log, const struct case BPF_TRACE_FENTRY: case BPF_TRACE_FEXIT: case BPF_MODIFY_RETURN: + case BPF_TRACE_FSESSION: /* allow u64* as ctx */ if (btf_is_int(t) && t->size == 8) return 0; @@ -6820,6 +6821,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type, fallthrough; case BPF_LSM_CGROUP: case BPF_TRACE_FEXIT: + case BPF_TRACE_FSESSION: /* When LSM programs are attached to void LSM hooks * they use FEXIT trampolines and when attached to * int LSM hooks, they use MODIFY_RETURN trampolines. diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c index 3c5c03d43f5f..b9184545c3fd 100644 --- a/kernel/bpf/syscall.c +++ b/kernel/bpf/syscall.c @@ -3577,6 +3577,7 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog, case BPF_PROG_TYPE_TRACING: if (prog->expected_attach_type != BPF_TRACE_FENTRY && prog->expected_attach_type != BPF_TRACE_FEXIT && + prog->expected_attach_type != BPF_TRACE_FSESSION && prog->expected_attach_type != BPF_MODIFY_RETURN) { err = -EINVAL; goto out_put_prog; @@ -3626,7 +3627,21 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog, key = bpf_trampoline_compute_key(tgt_prog, NULL, btf_id); } - link = kzalloc(sizeof(*link), GFP_USER); + if (prog->expected_attach_type == BPF_TRACE_FSESSION) { + struct bpf_fsession_link *fslink; + + fslink = kzalloc(sizeof(*fslink), GFP_USER); + if (fslink) { + bpf_link_init(&fslink->fexit.link, BPF_LINK_TYPE_TRACING, + &bpf_tracing_link_lops, prog, attach_type); + fslink->fexit.cookie = bpf_cookie; + link = &fslink->link; + } else { + link = NULL; + } + } else { + link = kzalloc(sizeof(*link), GFP_USER); + } if (!link) { err = -ENOMEM; goto out_put_prog; @@ -4350,6 +4365,7 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type) case BPF_TRACE_RAW_TP: case BPF_TRACE_FENTRY: case BPF_TRACE_FEXIT: + case BPF_TRACE_FSESSION: case BPF_MODIFY_RETURN: return BPF_PROG_TYPE_TRACING; case BPF_LSM_MAC: diff --git a/kernel/bpf/trampoline.c b/kernel/bpf/trampoline.c index 2a125d063e62..edf9da43762d 100644 --- a/kernel/bpf/trampoline.c +++ b/kernel/bpf/trampoline.c @@ -109,10 +109,17 @@ bool bpf_prog_has_trampoline(const struct bpf_prog *prog) enum bpf_attach_type eatype = prog->expected_attach_type; enum bpf_prog_type ptype = prog->type; - return (ptype == BPF_PROG_TYPE_TRACING && - (eatype == BPF_TRACE_FENTRY || eatype == BPF_TRACE_FEXIT || - eatype == BPF_MODIFY_RETURN)) || - (ptype == BPF_PROG_TYPE_LSM && eatype == BPF_LSM_MAC); + switch (ptype) { + case BPF_PROG_TYPE_TRACING: + if (eatype == BPF_TRACE_FENTRY || eatype == BPF_TRACE_FEXIT || + eatype == BPF_MODIFY_RETURN || eatype == BPF_TRACE_FSESSION) + return true; + return false; + case BPF_PROG_TYPE_LSM: + return eatype == BPF_LSM_MAC; + default: + return false; + } } void bpf_image_ksym_init(void *data, unsigned int size, struct bpf_ksym *ksym) @@ -559,6 +566,8 @@ static enum bpf_tramp_prog_type bpf_attach_type_to_tramp(struct bpf_prog *prog) return BPF_TRAMP_MODIFY_RETURN; case BPF_TRACE_FEXIT: return BPF_TRAMP_FEXIT; + case BPF_TRACE_FSESSION: + return BPF_TRAMP_FSESSION; case BPF_LSM_MAC: if (!prog->aux->attach_func_proto->type) /* The function returns void, we cannot modify its @@ -594,8 +603,10 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link, struct bpf_trampoline *tr, struct bpf_prog *tgt_prog) { + struct bpf_fsession_link *fslink = NULL; enum bpf_tramp_prog_type kind; struct bpf_tramp_link *link_exiting; + struct hlist_head *prog_list; int err = 0; int cnt = 0, i; @@ -621,24 +632,43 @@ static int __bpf_trampoline_link_prog(struct bpf_tramp_link *link, BPF_MOD_JUMP, NULL, link->link.prog->bpf_func); } + if (kind == BPF_TRAMP_FSESSION) { + prog_list = &tr->progs_hlist[BPF_TRAMP_FENTRY]; + cnt++; + } else { + prog_list = &tr->progs_hlist[kind]; + } if (cnt >= BPF_MAX_TRAMP_LINKS) return -E2BIG; if (!hlist_unhashed(&link->tramp_hlist)) /* prog already linked */ return -EBUSY; - hlist_for_each_entry(link_exiting, &tr->progs_hlist[kind], tramp_hlist) { + hlist_for_each_entry(link_exiting, prog_list, tramp_hlist) { if (link_exiting->link.prog != link->link.prog) continue; /* prog already linked */ return -EBUSY; } - hlist_add_head(&link->tramp_hlist, &tr->progs_hlist[kind]); - tr->progs_cnt[kind]++; + hlist_add_head(&link->tramp_hlist, prog_list); + if (kind == BPF_TRAMP_FSESSION) { + tr->progs_cnt[BPF_TRAMP_FENTRY]++; + fslink = container_of(link, struct bpf_fsession_link, link.link); + hlist_add_head(&fslink->fexit.tramp_hlist, &tr->progs_hlist[BPF_TRAMP_FEXIT]); + tr->progs_cnt[BPF_TRAMP_FEXIT]++; + } else { + tr->progs_cnt[kind]++; + } err = bpf_trampoline_update(tr, true /* lock_direct_mutex */); if (err) { hlist_del_init(&link->tramp_hlist); - tr->progs_cnt[kind]--; + if (kind == BPF_TRAMP_FSESSION) { + tr->progs_cnt[BPF_TRAMP_FENTRY]--; + hlist_del_init(&fslink->fexit.tramp_hlist); + tr->progs_cnt[BPF_TRAMP_FEXIT]--; + } else { + tr->progs_cnt[kind]--; + } } return err; } @@ -672,6 +702,13 @@ static int __bpf_trampoline_unlink_prog(struct bpf_tramp_link *link, guard(mutex)(&tgt_prog->aux->ext_mutex); tgt_prog->aux->is_extended = false; return err; + } else if (kind == BPF_TRAMP_FSESSION) { + struct bpf_fsession_link *fslink = + container_of(link, struct bpf_fsession_link, link.link); + + hlist_del_init(&fslink->fexit.tramp_hlist); + tr->progs_cnt[BPF_TRAMP_FEXIT]--; + kind = BPF_TRAMP_FENTRY; } hlist_del_init(&link->tramp_hlist); tr->progs_cnt[kind]--; diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index c7f5234d5fd2..41bbed6418b5 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -17848,6 +17848,7 @@ static int check_return_code(struct bpf_verifier_env *env, int regno, const char switch (env->prog->expected_attach_type) { case BPF_TRACE_FENTRY: case BPF_TRACE_FEXIT: + case BPF_TRACE_FSESSION: range = retval_range(0, 0); break; case BPF_TRACE_RAW_TP: @@ -23774,6 +23775,7 @@ patch_map_ops_generic: if (prog_type == BPF_PROG_TYPE_TRACING && insn->imm == BPF_FUNC_get_func_ret) { if (eatype == BPF_TRACE_FEXIT || + eatype == BPF_TRACE_FSESSION || eatype == BPF_MODIFY_RETURN) { /* Load nr_args from ctx - 8 */ insn_buf[0] = BPF_LDX_MEM(BPF_DW, BPF_REG_0, BPF_REG_1, -8); @@ -24725,7 +24727,8 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, if (tgt_prog->type == BPF_PROG_TYPE_TRACING && prog_extension && (tgt_prog->expected_attach_type == BPF_TRACE_FENTRY || - tgt_prog->expected_attach_type == BPF_TRACE_FEXIT)) { + tgt_prog->expected_attach_type == BPF_TRACE_FEXIT || + tgt_prog->expected_attach_type == BPF_TRACE_FSESSION)) { /* Program extensions can extend all program types * except fentry/fexit. The reason is the following. * The fentry/fexit programs are used for performance @@ -24740,7 +24743,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, * beyond reasonable stack size. Hence extending fentry * is not allowed. */ - bpf_log(log, "Cannot extend fentry/fexit\n"); + bpf_log(log, "Cannot extend fentry/fexit/fsession\n"); return -EINVAL; } } else { @@ -24824,6 +24827,7 @@ int bpf_check_attach_target(struct bpf_verifier_log *log, case BPF_LSM_CGROUP: case BPF_TRACE_FENTRY: case BPF_TRACE_FEXIT: + case BPF_TRACE_FSESSION: if (!btf_type_is_func(t)) { bpf_log(log, "attach_btf_id %u is not a function\n", btf_id); @@ -24990,6 +24994,7 @@ static bool can_be_sleepable(struct bpf_prog *prog) case BPF_TRACE_FEXIT: case BPF_MODIFY_RETURN: case BPF_TRACE_ITER: + case BPF_TRACE_FSESSION: return true; default: return false; @@ -25071,9 +25076,10 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) tgt_info.tgt_name); return -EINVAL; } else if ((prog->expected_attach_type == BPF_TRACE_FEXIT || + prog->expected_attach_type == BPF_TRACE_FSESSION || prog->expected_attach_type == BPF_MODIFY_RETURN) && btf_id_set_contains(&noreturn_deny, btf_id)) { - verbose(env, "Attaching fexit/fmod_ret to __noreturn function '%s' is rejected.\n", + verbose(env, "Attaching fexit/fsession/fmod_ret to __noreturn function '%s' is rejected.\n", tgt_info.tgt_name); return -EINVAL; } diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c index 26cfcfdc45eb..178c4738e63b 100644 --- a/net/bpf/test_run.c +++ b/net/bpf/test_run.c @@ -685,6 +685,7 @@ int bpf_prog_test_run_tracing(struct bpf_prog *prog, switch (prog->expected_attach_type) { case BPF_TRACE_FENTRY: case BPF_TRACE_FEXIT: + case BPF_TRACE_FSESSION: if (bpf_fentry_test1(1) != 2 || bpf_fentry_test2(2, 3) != 5 || bpf_fentry_test3(4, 5, 6) != 15 || diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c index 850dd736ccd1..de111818f3a0 100644 --- a/net/core/bpf_sk_storage.c +++ b/net/core/bpf_sk_storage.c @@ -365,6 +365,7 @@ static bool bpf_sk_storage_tracing_allowed(const struct bpf_prog *prog) return true; case BPF_TRACE_FENTRY: case BPF_TRACE_FEXIT: + case BPF_TRACE_FSESSION: return !!strncmp(prog->aux->attach_func_name, "bpf_sk_storage", strlen("bpf_sk_storage")); default: diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index b816bc53d2e1..3ca7d76e05f0 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -1145,6 +1145,7 @@ enum bpf_attach_type { BPF_NETKIT_PEER, BPF_TRACE_KPROBE_SESSION, BPF_TRACE_UPROBE_SESSION, + BPF_TRACE_FSESSION, __MAX_BPF_ATTACH_TYPE }; diff --git a/tools/testing/selftests/bpf/prog_tests/tracing_failure.c b/tools/testing/selftests/bpf/prog_tests/tracing_failure.c index 10e231965589..f9f9e1cb87bf 100644 --- a/tools/testing/selftests/bpf/prog_tests/tracing_failure.c +++ b/tools/testing/selftests/bpf/prog_tests/tracing_failure.c @@ -73,7 +73,7 @@ static void test_tracing_deny(void) static void test_fexit_noreturns(void) { test_tracing_fail_prog("fexit_noreturns", - "Attaching fexit/fmod_ret to __noreturn function 'do_exit' is rejected."); + "Attaching fexit/fsession/fmod_ret to __noreturn function 'do_exit' is rejected."); } void test_tracing_failure(void) -- cgit v1.2.3 From 752b807028e63f1473b84eb1350e131eca5e5249 Mon Sep 17 00:00:00 2001 From: Matt Bobrowski Date: Tue, 27 Jan 2026 08:51:10 +0000 Subject: bpf: add new BPF_CGROUP_ITER_CHILDREN control option Currently, the BPF cgroup iterator supports walking descendants in either pre-order (BPF_CGROUP_ITER_DESCENDANTS_PRE) or post-order (BPF_CGROUP_ITER_DESCENDANTS_POST). These modes perform an exhaustive depth-first search (DFS) of the hierarchy. In scenarios where a BPF program may need to inspect only the direct children of a given parent cgroup, a full DFS is unnecessarily expensive. This patch introduces a new BPF cgroup iterator control option, BPF_CGROUP_ITER_CHILDREN. This control option restricts the traversal to the immediate children of a specified parent cgroup, allowing for more targeted and efficient iteration, particularly when exhaustive depth-first search (DFS) traversal is not required. Signed-off-by: Matt Bobrowski Link: https://lore.kernel.org/r/20260127085112.3608687-1-mattbobrowski@google.com Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 8 ++++++++ kernel/bpf/cgroup_iter.c | 26 +++++++++++++++++++++----- tools/include/uapi/linux/bpf.h | 8 ++++++++ 3 files changed, 37 insertions(+), 5 deletions(-) (limited to 'tools/include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 44e7dbc278e3..c8d400b7680a 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -119,6 +119,14 @@ enum bpf_cgroup_iter_order { BPF_CGROUP_ITER_DESCENDANTS_PRE, /* walk descendants in pre-order. */ BPF_CGROUP_ITER_DESCENDANTS_POST, /* walk descendants in post-order. */ BPF_CGROUP_ITER_ANCESTORS_UP, /* walk ancestors upward. */ + /* + * Walks the immediate children of the specified parent + * cgroup_subsys_state. Unlike BPF_CGROUP_ITER_DESCENDANTS_PRE, + * BPF_CGROUP_ITER_DESCENDANTS_POST, and BPF_CGROUP_ITER_ANCESTORS_UP + * the iterator does not include the specified parent as one of the + * returned iterator elements. + */ + BPF_CGROUP_ITER_CHILDREN, }; union bpf_iter_link_info { diff --git a/kernel/bpf/cgroup_iter.c b/kernel/bpf/cgroup_iter.c index f04a468cf6a7..fd51fe3d92cc 100644 --- a/kernel/bpf/cgroup_iter.c +++ b/kernel/bpf/cgroup_iter.c @@ -8,12 +8,13 @@ #include "../cgroup/cgroup-internal.h" /* cgroup_mutex and cgroup_is_dead */ -/* cgroup_iter provides four modes of traversal to the cgroup hierarchy. +/* cgroup_iter provides five modes of traversal to the cgroup hierarchy. * * 1. Walk the descendants of a cgroup in pre-order. * 2. Walk the descendants of a cgroup in post-order. * 3. Walk the ancestors of a cgroup. * 4. Show the given cgroup only. + * 5. Walk the children of a given parent cgroup. * * For walking descendants, cgroup_iter can walk in either pre-order or * post-order. For walking ancestors, the iter walks up from a cgroup to @@ -78,6 +79,8 @@ static void *cgroup_iter_seq_start(struct seq_file *seq, loff_t *pos) return css_next_descendant_pre(NULL, p->start_css); else if (p->order == BPF_CGROUP_ITER_DESCENDANTS_POST) return css_next_descendant_post(NULL, p->start_css); + else if (p->order == BPF_CGROUP_ITER_CHILDREN) + return css_next_child(NULL, p->start_css); else /* BPF_CGROUP_ITER_SELF_ONLY and BPF_CGROUP_ITER_ANCESTORS_UP */ return p->start_css; } @@ -113,6 +116,8 @@ static void *cgroup_iter_seq_next(struct seq_file *seq, void *v, loff_t *pos) return css_next_descendant_post(curr, p->start_css); else if (p->order == BPF_CGROUP_ITER_ANCESTORS_UP) return curr->parent; + else if (p->order == BPF_CGROUP_ITER_CHILDREN) + return css_next_child(curr, p->start_css); else /* BPF_CGROUP_ITER_SELF_ONLY */ return NULL; } @@ -200,11 +205,16 @@ static int bpf_iter_attach_cgroup(struct bpf_prog *prog, int order = linfo->cgroup.order; struct cgroup *cgrp; - if (order != BPF_CGROUP_ITER_DESCENDANTS_PRE && - order != BPF_CGROUP_ITER_DESCENDANTS_POST && - order != BPF_CGROUP_ITER_ANCESTORS_UP && - order != BPF_CGROUP_ITER_SELF_ONLY) + switch (order) { + case BPF_CGROUP_ITER_DESCENDANTS_PRE: + case BPF_CGROUP_ITER_DESCENDANTS_POST: + case BPF_CGROUP_ITER_ANCESTORS_UP: + case BPF_CGROUP_ITER_SELF_ONLY: + case BPF_CGROUP_ITER_CHILDREN: + break; + default: return -EINVAL; + } if (fd && id) return -EINVAL; @@ -257,6 +267,8 @@ show_order: seq_puts(seq, "order: descendants_post\n"); else if (aux->cgroup.order == BPF_CGROUP_ITER_ANCESTORS_UP) seq_puts(seq, "order: ancestors_up\n"); + else if (aux->cgroup.order == BPF_CGROUP_ITER_CHILDREN) + seq_puts(seq, "order: children\n"); else /* BPF_CGROUP_ITER_SELF_ONLY */ seq_puts(seq, "order: self_only\n"); } @@ -320,6 +332,7 @@ __bpf_kfunc int bpf_iter_css_new(struct bpf_iter_css *it, case BPF_CGROUP_ITER_DESCENDANTS_PRE: case BPF_CGROUP_ITER_DESCENDANTS_POST: case BPF_CGROUP_ITER_ANCESTORS_UP: + case BPF_CGROUP_ITER_CHILDREN: break; default: return -EINVAL; @@ -345,6 +358,9 @@ __bpf_kfunc struct cgroup_subsys_state *bpf_iter_css_next(struct bpf_iter_css *i case BPF_CGROUP_ITER_DESCENDANTS_POST: kit->pos = css_next_descendant_post(kit->pos, kit->start); break; + case BPF_CGROUP_ITER_CHILDREN: + kit->pos = css_next_child(kit->pos, kit->start); + break; case BPF_CGROUP_ITER_ANCESTORS_UP: kit->pos = kit->pos ? kit->pos->parent : kit->start; } diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 3ca7d76e05f0..5e38b4887de6 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -119,6 +119,14 @@ enum bpf_cgroup_iter_order { BPF_CGROUP_ITER_DESCENDANTS_PRE, /* walk descendants in pre-order. */ BPF_CGROUP_ITER_DESCENDANTS_POST, /* walk descendants in post-order. */ BPF_CGROUP_ITER_ANCESTORS_UP, /* walk ancestors upward. */ + /* + * Walks the immediate children of the specified parent + * cgroup_subsys_state. Unlike BPF_CGROUP_ITER_DESCENDANTS_PRE, + * BPF_CGROUP_ITER_DESCENDANTS_POST, and BPF_CGROUP_ITER_ANCESTORS_UP + * the iterator does not include the specified parent as one of the + * returned iterator elements. + */ + BPF_CGROUP_ITER_CHILDREN, }; union bpf_iter_link_info { -- cgit v1.2.3 From 944e3f7562c55fa37ebcdd58e5f60f296c81a854 Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Tue, 27 Jan 2026 12:12:06 +0100 Subject: tools: Update context analysis macros in compiler_types.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In sync with the main kernel headers, include a stub version of compiler-context-analysis.h in tools/include/linux/compiler_types.h and remove the sparse context tracking definitions. Since tools/ headers are generally self-contained, provide a standalone tools/include/linux/compiler-context-analysis.h with no-op stubs for now. Also clean up redundant stubs in tools/testing/shared/linux/kernel.h that are now redundant. This fixes build errors in tools/testing/radix-tree/ where headers from include/linux/ (like cleanup.h) are used directly and expect these macros to be defined: | cc -I../shared -I. -I../../include -I../../arch/x86/include -I../../../lib -g -Og -Wall -D_LGPL_SOURCE -fsanitize=address -fsanitize=undefined -c -o radix-tree.o radix-tree.c | In file included from ../shared/linux/cleanup.h:2, | from ../shared/linux/../../../../include/linux/idr.h:18, | from ../shared/linux/idr.h:5, | from radix-tree.c:18: | ../shared/linux/../../../../include/linux/idr.h: In function ‘class_idr_alloc_destructor’: | ../shared/linux/../../../../include/linux/cleanup.h:283:9: error: expected declaration specifiers before ‘__no_context_analysis’ | 283 | __no_context_analysis \ | | ^~~~~~~~~~~~~~~~~~~~~ Closes: https://lore.kernel.org/oe-lkp/202601261546.d7ae2447-lkp@intel.com Reported-by: kernel test robot Signed-off-by: Marco Elver Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Lorenzo Stoakes Tested-by: Lorenzo Stoakes Link: https://patch.msgid.link/20260127111428.3747328-1-elver@google.com --- tools/include/linux/compiler-context-analysis.h | 42 +++++++++++++++++++++++++ tools/include/linux/compiler_types.h | 16 +--------- tools/testing/shared/linux/kernel.h | 4 --- 3 files changed, 43 insertions(+), 19 deletions(-) create mode 100644 tools/include/linux/compiler-context-analysis.h (limited to 'tools/include') diff --git a/tools/include/linux/compiler-context-analysis.h b/tools/include/linux/compiler-context-analysis.h new file mode 100644 index 000000000000..13a9115e9e58 --- /dev/null +++ b/tools/include/linux/compiler-context-analysis.h @@ -0,0 +1,42 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef _TOOLS_LINUX_COMPILER_CONTEXT_ANALYSIS_H +#define _TOOLS_LINUX_COMPILER_CONTEXT_ANALYSIS_H + +/* + * Macros and attributes for compiler-based static context analysis. + * No-op stubs for tools. + */ + +#define __guarded_by(...) +#define __pt_guarded_by(...) + +#define context_lock_struct(name, ...) struct __VA_ARGS__ name + +#define __no_context_analysis +#define __context_unsafe(comment) +#define context_unsafe(...) ({ __VA_ARGS__; }) +#define context_unsafe_alias(p) +#define disable_context_analysis() +#define enable_context_analysis() + +#define __must_hold(...) +#define __must_not_hold(...) +#define __acquires(...) +#define __cond_acquires(ret, x) +#define __releases(...) +#define __acquire(x) (void)0 +#define __release(x) (void)0 + +#define __must_hold_shared(...) +#define __acquires_shared(...) +#define __cond_acquires_shared(ret, x) +#define __releases_shared(...) +#define __acquire_shared(x) (void)0 +#define __release_shared(x) (void)0 + +#define __acquire_ret(call, expr) (call) +#define __acquire_shared_ret(call, expr) (call) +#define __acquires_ret +#define __acquires_shared_ret + +#endif /* _TOOLS_LINUX_COMPILER_CONTEXT_ANALYSIS_H */ diff --git a/tools/include/linux/compiler_types.h b/tools/include/linux/compiler_types.h index 067a5b4e0f7b..14e420467eee 100644 --- a/tools/include/linux/compiler_types.h +++ b/tools/include/linux/compiler_types.h @@ -13,21 +13,7 @@ #define __has_builtin(x) (0) #endif -#ifdef __CHECKER__ -/* context/locking */ -# define __must_hold(x) __attribute__((context(x,1,1))) -# define __acquires(x) __attribute__((context(x,0,1))) -# define __releases(x) __attribute__((context(x,1,0))) -# define __acquire(x) __context__(x,1) -# define __release(x) __context__(x,-1) -#else /* __CHECKER__ */ -/* context/locking */ -# define __must_hold(x) -# define __acquires(x) -# define __releases(x) -# define __acquire(x) (void)0 -# define __release(x) (void)0 -#endif /* __CHECKER__ */ +#include /* Compiler specific macros. */ #ifdef __GNUC__ diff --git a/tools/testing/shared/linux/kernel.h b/tools/testing/shared/linux/kernel.h index c0a2bb785b92..dc2b4ccfb185 100644 --- a/tools/testing/shared/linux/kernel.h +++ b/tools/testing/shared/linux/kernel.h @@ -21,9 +21,5 @@ #define schedule() #define PAGE_SHIFT 12 -#define __acquires(x) -#define __releases(x) -#define __must_hold(x) - #define EXPORT_PER_CPU_SYMBOL_GPL(x) #endif /* _KERNEL_H */ -- cgit v1.2.3 From a537c0da168a08b0b6a7f7bd9e75f4cc8d45ff57 Mon Sep 17 00:00:00 2001 From: Leo Yan Date: Fri, 23 Jan 2026 13:32:03 +0000 Subject: tools: Fix bitfield dependency failure MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A perf build failure was reported by Thomas Voegtle on stable kernel v6.6.120: CC tests/sample-parsing.o CC util/intel-pt-decoder/intel-pt-pkt-decoder.o CC util/perf-regs-arch/perf_regs_csky.o CC util/arm-spe-decoder/arm-spe-pkt-decoder.o CC util/perf-regs-arch/perf_regs_loongarch.o In file included from util/arm-spe-decoder/arm-spe-pkt-decoder.h:10, from util/arm-spe-decoder/arm-spe-pkt-decoder.c:14: /local/git/linux-stable-rc/tools/include/linux/bitfield.h: In function ‘le16_encode_bits’: /local/git/linux-stable-rc/tools/include/linux/bitfield.h:166:31: error: implicit declaration of function ‘cpu_to_le16’; did you mean ‘htole16’? [-Werror=implicit-function-declaration] ____MAKE_OP(le##size,u##size,cpu_to_le##size,le##size##_to_cpu) \ ^~~~~~~~~ /local/git/linux-stable-rc/tools/include/linux/bitfield.h:149:9: note: in definition of macro ‘____MAKE_OP’ return to((v & field_mask(field)) * field_multiplier(field)); \ ^~ /local/git/linux-stable-rc/tools/include/linux/bitfield.h:170:1: note: in expansion of macro ‘__MAKE_OP’ __MAKE_OP(16) Fix this by including linux/kernel.h, which provides the required definitions. The issue was not found on the mainline due to the relevant C files have included kernel.h. It'd be good to merge this change on mainline as well for robustness. Closes: https://lore.kernel.org/stable/3a44500b-d7c8-179f-61f6-e51cb50d3512@lio96.de/ Fixes: 64d86c03e1441742 ("perf arm-spe: Extend branch operations") Reported-by: Hamza Mahfooz Reported-by: Thomas Voegtle Signed-off-by: Leo Yan Cc: Greg Kroah-Hartman Cc: Ian Rogers Cc: James Clark Cc: Leo Yan Cc: Namhyung Kim To: Sasha Levin Cc: stable@vger.kernel.org Signed-off-by: Arnaldo Carvalho de Melo --- tools/include/linux/bitfield.h | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/include') diff --git a/tools/include/linux/bitfield.h b/tools/include/linux/bitfield.h index 6093fa6db260..ddf81f24956b 100644 --- a/tools/include/linux/bitfield.h +++ b/tools/include/linux/bitfield.h @@ -8,6 +8,7 @@ #define _LINUX_BITFIELD_H #include +#include #include /* -- cgit v1.2.3 From 21c8a5bae7bd594f5b89db551b618d60b994b8cf Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Thu, 22 Jan 2026 16:06:13 +0000 Subject: tools: bitmap: add missing bitmap_[subset(), andnot()] The bitmap_subset() and bitmap_andnot() functions are not present in the tools version of include/linux/bitmap.h, so add them as subsequent patches implement test code that requires them. We also add the missing __bitmap_subset() to tools/lib/bitmap.c. Link: https://lkml.kernel.org/r/0fd0d4ec868297f522003cb4b5898b53b498805b.1769097829.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Acked-by: Liam R. Howlett Cc: Baolin Wang Cc: Barry Song Cc: David Hildenbrand Cc: Dev Jain Cc: Jason Gunthorpe Cc: Suren Baghdasaryan Cc: Vlastimil Babka Cc: Zi Yan Cc: Damien Le Moal Cc: "Darrick J. Wong" Cc: Jarkko Sakkinen Cc: Yury Norov Cc: Chris Mason Cc: Pedro Falcato Signed-off-by: Andrew Morton --- tools/include/linux/bitmap.h | 22 ++++++++++++++++++++++ tools/lib/bitmap.c | 29 +++++++++++++++++++++++++++++ 2 files changed, 51 insertions(+) (limited to 'tools/include') diff --git a/tools/include/linux/bitmap.h b/tools/include/linux/bitmap.h index 0d992245c600..250883090a5d 100644 --- a/tools/include/linux/bitmap.h +++ b/tools/include/linux/bitmap.h @@ -24,6 +24,10 @@ void __bitmap_set(unsigned long *map, unsigned int start, int len); void __bitmap_clear(unsigned long *map, unsigned int start, int len); bool __bitmap_intersects(const unsigned long *bitmap1, const unsigned long *bitmap2, unsigned int bits); +bool __bitmap_subset(const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int nbits); +bool __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int nbits); #define BITMAP_FIRST_WORD_MASK(start) (~0UL << ((start) & (BITS_PER_LONG - 1))) #define BITMAP_LAST_WORD_MASK(nbits) (~0UL >> (-(nbits) & (BITS_PER_LONG - 1))) @@ -81,6 +85,15 @@ static inline void bitmap_or(unsigned long *dst, const unsigned long *src1, __bitmap_or(dst, src1, src2, nbits); } +static __always_inline +bool bitmap_andnot(unsigned long *dst, const unsigned long *src1, + const unsigned long *src2, unsigned int nbits) +{ + if (small_const_nbits(nbits)) + return (*dst = *src1 & ~(*src2) & BITMAP_LAST_WORD_MASK(nbits)) != 0; + return __bitmap_andnot(dst, src1, src2, nbits); +} + static inline unsigned long *bitmap_alloc(unsigned int nbits, gfp_t flags __maybe_unused) { return malloc(bitmap_size(nbits)); @@ -157,6 +170,15 @@ static inline bool bitmap_intersects(const unsigned long *src1, return __bitmap_intersects(src1, src2, nbits); } +static __always_inline +bool bitmap_subset(const unsigned long *src1, const unsigned long *src2, unsigned int nbits) +{ + if (small_const_nbits(nbits)) + return ! ((*src1 & ~(*src2)) & BITMAP_LAST_WORD_MASK(nbits)); + else + return __bitmap_subset(src1, src2, nbits); +} + static inline void bitmap_set(unsigned long *map, unsigned int start, unsigned int nbits) { if (__builtin_constant_p(nbits) && nbits == 1) diff --git a/tools/lib/bitmap.c b/tools/lib/bitmap.c index 51255c69754d..aa83d22c45e3 100644 --- a/tools/lib/bitmap.c +++ b/tools/lib/bitmap.c @@ -140,3 +140,32 @@ void __bitmap_clear(unsigned long *map, unsigned int start, int len) *p &= ~mask_to_clear; } } + +bool __bitmap_andnot(unsigned long *dst, const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int bits) +{ + unsigned int k; + unsigned int lim = bits/BITS_PER_LONG; + unsigned long result = 0; + + for (k = 0; k < lim; k++) + result |= (dst[k] = bitmap1[k] & ~bitmap2[k]); + if (bits % BITS_PER_LONG) + result |= (dst[k] = bitmap1[k] & ~bitmap2[k] & + BITMAP_LAST_WORD_MASK(bits)); + return result != 0; +} + +bool __bitmap_subset(const unsigned long *bitmap1, + const unsigned long *bitmap2, unsigned int bits) +{ + unsigned int k, lim = bits/BITS_PER_LONG; + for (k = 0; k < lim; ++k) + if (bitmap1[k] & ~bitmap2[k]) + return false; + + if (bits % BITS_PER_LONG) + if ((bitmap1[k] & ~bitmap2[k]) & BITMAP_LAST_WORD_MASK(bits)) + return false; + return true; +} -- cgit v1.2.3