From b0c9bfbab925ac6385d4d06a134fd89cadf771fe Mon Sep 17 00:00:00 2001 From: Johannes Nixdorf Date: Fri, 25 Jul 2025 18:31:19 +0200 Subject: selftests/seccomp: Add a test for the WAIT_KILLABLE_RECV fast reply race If WAIT_KILLABLE_RECV was specified, and an event is received, the tracee's syscall is not supposed to be interruptible. This was not properly ensured if the reply was sent too fast, and an interrupting signal was received before the reply was processed on the tracee side. Add a test for this, that consists of: - a tracee with a timer that keeps sending it signals while repeatedly running a traced syscall in a loop, - a tracer that repeatedly handles all syscalls from the tracee in a loop, and - a shared pipe between both, on which the tracee sends one byte per syscall attempted and the tracer reads one byte per syscall handled. If the syscall for the tracee is restarted after the tracer received the event for it due to this bug, the tracee will not have sent a second token on the pipe, which the tracer will notice and fail the test. The tests also uses SECCOMP_IOCTL_NOTIF_ADDFD with SECCOMP_ADDFD_FLAG_SEND for the reply, as the fix for the bug has an additional code path change for handling addfd, which would not be exercised by a simple SECCOMP_IOCTL_NOTIF_SEND, and it is possible to fix the bug while leaving the same race intact for the addfd case. This test is not guaranteed to reproduce the bug on every run, but the parameters (signal frequency and number of repeated syscalls) have been chosen so that on my machine this test: - takes ~0.8s in the good case (+1s in the failure case), and - detects the bug in 999 of 1000 runs. Signed-off-by: Johannes Nixdorf Link: https://lore.kernel.org/r/20250725-seccomp-races-v2-2-cf8b9d139596@nixdorf.dev Signed-off-by: Kees Cook --- tools/testing/selftests/seccomp/seccomp_bpf.c | 131 ++++++++++++++++++++++++++ 1 file changed, 131 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c index 61acbd45ffaa..fc4910d35342 100644 --- a/tools/testing/selftests/seccomp/seccomp_bpf.c +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -24,6 +24,7 @@ #include #include #include +#include #include #include #include @@ -3547,6 +3548,10 @@ static void signal_handler(int signal) perror("write from signal"); } +static void signal_handler_nop(int signal) +{ +} + TEST(user_notification_signal) { pid_t pid; @@ -4819,6 +4824,132 @@ TEST(user_notification_wait_killable_fatal) EXPECT_EQ(SIGTERM, WTERMSIG(status)); } +/* Ensure signals after the reply do not interrupt */ +TEST(user_notification_wait_killable_after_reply) +{ + int i, max_iter = 100000; + int listener, status; + int pipe_fds[2]; + pid_t pid; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret) + { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + listener = user_notif_syscall( + __NR_dup, SECCOMP_FILTER_FLAG_NEW_LISTENER | + SECCOMP_FILTER_FLAG_WAIT_KILLABLE_RECV); + ASSERT_GE(listener, 0); + + /* + * Used to count invocations. One token is transferred from the child + * to the parent per syscall invocation, the parent tries to take + * one token per successful RECV. If the syscall is restarted after + * RECV the parent will try to get two tokens while the child only + * provided one. + */ + ASSERT_EQ(pipe(pipe_fds), 0); + + pid = fork(); + ASSERT_GE(pid, 0); + + if (pid == 0) { + struct sigaction new_action = { + .sa_handler = signal_handler_nop, + .sa_flags = SA_RESTART, + }; + struct itimerval timer = { + .it_value = { .tv_usec = 1000 }, + .it_interval = { .tv_usec = 1000 }, + }; + char c = 'a'; + + close(pipe_fds[0]); + + /* Setup the sigaction with SA_RESTART */ + if (sigaction(SIGALRM, &new_action, NULL)) { + perror("sigaction"); + exit(1); + } + + /* + * Kill with SIGALRM repeatedly, to try to hit the race when + * handling the syscall. + */ + if (setitimer(ITIMER_REAL, &timer, NULL) < 0) + perror("setitimer"); + + for (i = 0; i < max_iter; ++i) { + int fd; + + /* Send one token per iteration to catch repeats. */ + if (write(pipe_fds[1], &c, sizeof(c)) != 1) { + perror("write"); + exit(1); + } + + fd = syscall(__NR_dup, 0); + if (fd < 0) { + perror("dup"); + exit(1); + } + close(fd); + } + + exit(0); + } + + close(pipe_fds[1]); + + for (i = 0; i < max_iter; ++i) { + struct seccomp_notif req = {}; + struct seccomp_notif_addfd addfd = {}; + struct pollfd pfd = { + .fd = pipe_fds[0], + .events = POLLIN, + }; + char c; + + /* + * Try to receive one token. If it failed, one child syscall + * was restarted after RECV and needed to be handled twice. + */ + ASSERT_EQ(poll(&pfd, 1, 1000), 1) + kill(pid, SIGKILL); + + ASSERT_EQ(read(pipe_fds[0], &c, sizeof(c)), 1) + kill(pid, SIGKILL); + + /* + * Get the notification, reply to it as fast as possible to test + * whether the child wrongly skips going into the non-preemptible + * (TASK_KILLABLE) state. + */ + do + ret = ioctl(listener, SECCOMP_IOCTL_NOTIF_RECV, &req); + while (ret < 0 && errno == ENOENT); /* Accept interruptions before RECV */ + ASSERT_EQ(ret, 0) + kill(pid, SIGKILL); + + addfd.id = req.id; + addfd.flags = SECCOMP_ADDFD_FLAG_SEND; + addfd.srcfd = 0; + ASSERT_GE(ioctl(listener, SECCOMP_IOCTL_NOTIF_ADDFD, &addfd), 0) + kill(pid, SIGKILL); + } + + /* + * Wait for the process to exit, and make sure the process terminated + * with a zero exit code.. + */ + EXPECT_EQ(waitpid(pid, &status, 0), pid); + EXPECT_EQ(true, WIFEXITED(status)); + EXPECT_EQ(0, WEXITSTATUS(status)); +} + struct tsync_vs_thread_leader_args { pthread_t leader; }; -- cgit v1.2.3 From 31e838e1cdf4c4088fee8154ce8c12713ebfb2da Mon Sep 17 00:00:00 2001 From: Amery Hung Date: Wed, 30 Jul 2025 11:58:53 -0700 Subject: selftests/bpf: Introduce task local data Task local data defines an abstract storage type for storing task- specific data (TLD). This patch provides user space and bpf implementation as header-only libraries for accessing task local data. Task local data is a bpf task local storage map with two UPTRs: - tld_meta_u, shared by all tasks of a process, consists of the total count and size of TLDs and an array of metadata of TLDs. A TLD metadata contains the size and name. The name is used to identify a specific TLD in bpf programs. - u_tld_data points to a task-specific memory. It stores TLD data and the starting offset of data in a page. Task local design decouple user space and bpf programs. Since bpf program does not know the size of TLDs in compile time, u_tld_data is declared as a page to accommodate TLDs up to a page. As a result, while user space will likely allocate memory smaller than a page for actual TLDs, it needs to pin a page to kernel. It will pin the page that contains enough memory if the allocated memory spans across the page boundary. The library also creates another task local storage map, tld_key_map, to cache keys for bpf programs to speed up the access. Below are the core task local data API: User space BPF Define TLD TLD_DEFINE_KEY(), tld_create_key() - Init TLD object - tld_object_init() Get TLD data tld_get_data() tld_get_data() - TLD_DEFINE_KEY(), tld_create_key() A TLD is first defined by the user space with TLD_DEFINE_KEY() or tld_create_key(). TLD_DEFINE_KEY() defines a TLD statically and allocates just enough memory during initialization. tld_create_key() allows creating TLDs on the fly, but has a fix memory budget, TLD_DYN_DATA_SIZE. Internally, they all call __tld_create_key(), which iterates tld_meta_u->metadata to check if a TLD can be added. The total TLD size needs to fit into a page (limit of UPTR), and no two TLDs can have the same name. If a TLD can be added, u_tld_meta->cnt is increased using cmpxchg as there may be other concurrent __tld_create_key(). After a successful cmpxchg, the last available tld_meta_u->metadata now belongs to the calling thread. To prevent other threads from reading incomplete metadata while it is being updated, tld_meta_u->metadata->size is used to signal the completion. Finally, the offset, derived from adding up prior TLD sizes is then encapsulated as an opaque object key to prevent user misuse. The offset is guaranteed to be 8-byte aligned to prevent load/store tearing and allow atomic operations on it. - tld_get_data() User space programs can pass the key to tld_get_data() to get a pointer to the associated TLD. The pointer will remain valid for the lifetime of the thread. tld_data_u is lazily allocated on the first call to tld_get_data(). Trying to read task local data from bpf will result in -ENODATA during tld_object_init(). The task-specific memory need to be freed manually by calling tld_free() on thread exit to prevent memory leak or use TLD_FREE_DATA_ON_THREAD_EXIT. - tld_object_init() (BPF) BPF programs need to call tld_object_init() before calling tld_get_data(). This is to avoid redundant map lookup in tld_get_data() by storing pointers to the map values on stack. The pointers are encapsulated as tld_object. tld_key_map is also created on the first time tld_object_init() is called to cache TLD keys successfully fetched by tld_get_data(). bpf_task_storage_get(.., F_CREATE) needs to be retried since it may fail when another thread has already taken the percpu counter lock for the task local storage. - tld_get_data() (BPF) BPF programs can also get a pointer to a TLD with tld_get_data(). It uses the cached key in tld_key_map to locate the data in tld_data_u->data. If the cached key is not set yet (<= 0), __tld_fetch_key() will be called to iterate tld_meta_u->metadata and find the TLD by name. To prevent redundant string comparison in the future when the search fail, the tld_meta_u->cnt is stored in the non-positive range of the key. Next time, __tld_fetch_key() will be called only if there are new TLDs and the search will start from the newly added tld_meta_u->metadata using the old tld_meta_u-cnt. Signed-off-by: Amery Hung Reviewed-by: Emil Tsalapatis Link: https://lore.kernel.org/r/20250730185903.3574598-3-ameryhung@gmail.com Signed-off-by: Alexei Starovoitov --- .../selftests/bpf/prog_tests/task_local_data.h | 386 +++++++++++++++++++++ .../selftests/bpf/progs/task_local_data.bpf.h | 237 +++++++++++++ 2 files changed, 623 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/task_local_data.h create mode 100644 tools/testing/selftests/bpf/progs/task_local_data.bpf.h (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/task_local_data.h b/tools/testing/selftests/bpf/prog_tests/task_local_data.h new file mode 100644 index 000000000000..a408d10c3688 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/task_local_data.h @@ -0,0 +1,386 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __TASK_LOCAL_DATA_H +#define __TASK_LOCAL_DATA_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#ifdef TLD_FREE_DATA_ON_THREAD_EXIT +#include +#endif + +#include + +/* + * OPTIONS + * + * Define the option before including the header + * + * TLD_FREE_DATA_ON_THREAD_EXIT - Frees memory on thread exit automatically + * + * Thread-specific memory for storing TLD is allocated lazily on the first call to + * tld_get_data(). The thread that calls it must also call tld_free() on thread exit + * to prevent memory leak. Pthread will be included if the option is defined. A pthread + * key will be registered with a destructor that calls tld_free(). + * + * + * TLD_DYN_DATA_SIZE - The maximum size of memory allocated for TLDs created dynamically + * (default: 64 bytes) + * + * A TLD can be defined statically using TLD_DEFINE_KEY() or created on the fly using + * tld_create_key(). As the total size of TLDs created with tld_create_key() cannot be + * possibly known statically, a memory area of size TLD_DYN_DATA_SIZE will be allocated + * for these TLDs. This additional memory is allocated for every thread that calls + * tld_get_data() even if no tld_create_key are actually called, so be mindful of + * potential memory wastage. Use TLD_DEFINE_KEY() whenever possible as just enough memory + * will be allocated for TLDs created with it. + * + * + * TLD_NAME_LEN - The maximum length of the name of a TLD (default: 62) + * + * Setting TLD_NAME_LEN will affect the maximum number of TLDs a process can store, + * TLD_MAX_DATA_CNT. + * + * + * TLD_DATA_USE_ALIGNED_ALLOC - Always use aligned_alloc() instead of malloc() + * + * When allocating the memory for storing TLDs, we need to make sure there is a memory + * region of the X bytes within a page. This is due to the limit posed by UPTR: memory + * pinned to the kernel cannot exceed a page nor can it cross the page boundary. The + * library normally calls malloc(2*X) given X bytes of total TLDs, and only uses + * aligned_alloc(PAGE_SIZE, X) when X >= PAGE_SIZE / 2. This is to reduce memory wastage + * as not all memory allocator can use the exact amount of memory requested to fulfill + * aligned_alloc(). For example, some may round the size up to the alignment. Enable the + * option to always use aligned_alloc() if the implementation has low memory overhead. + */ + +#define TLD_PAGE_SIZE getpagesize() +#define TLD_PAGE_MASK (~(TLD_PAGE_SIZE - 1)) + +#define TLD_ROUND_MASK(x, y) ((__typeof__(x))((y) - 1)) +#define TLD_ROUND_UP(x, y) ((((x) - 1) | TLD_ROUND_MASK(x, y)) + 1) + +#define TLD_READ_ONCE(x) (*(volatile typeof(x) *)&(x)) + +#ifndef TLD_DYN_DATA_SIZE +#define TLD_DYN_DATA_SIZE 64 +#endif + +#define TLD_MAX_DATA_CNT (TLD_PAGE_SIZE / sizeof(struct tld_metadata) - 1) + +#ifndef TLD_NAME_LEN +#define TLD_NAME_LEN 62 +#endif + +#ifdef __cplusplus +extern "C" { +#endif + +typedef struct { + __s16 off; +} tld_key_t; + +struct tld_metadata { + char name[TLD_NAME_LEN]; + _Atomic __u16 size; +}; + +struct tld_meta_u { + _Atomic __u8 cnt; + __u16 size; + struct tld_metadata metadata[]; +}; + +struct tld_data_u { + __u64 start; /* offset of tld_data_u->data in a page */ + char data[]; +}; + +struct tld_map_value { + void *data; + struct tld_meta_u *meta; +}; + +struct tld_meta_u * _Atomic tld_meta_p __attribute__((weak)); +__thread struct tld_data_u *tld_data_p __attribute__((weak)); +__thread void *tld_data_alloc_p __attribute__((weak)); + +#ifdef TLD_FREE_DATA_ON_THREAD_EXIT +pthread_key_t tld_pthread_key __attribute__((weak)); + +static void tld_free(void); + +static void __tld_thread_exit_handler(void *unused) +{ + tld_free(); +} +#endif + +static int __tld_init_meta_p(void) +{ + struct tld_meta_u *meta, *uninit = NULL; + int err = 0; + + meta = (struct tld_meta_u *)aligned_alloc(TLD_PAGE_SIZE, TLD_PAGE_SIZE); + if (!meta) { + err = -ENOMEM; + goto out; + } + + memset(meta, 0, TLD_PAGE_SIZE); + meta->size = TLD_DYN_DATA_SIZE; + + if (!atomic_compare_exchange_strong(&tld_meta_p, &uninit, meta)) { + free(meta); + goto out; + } + +#ifdef TLD_FREE_DATA_ON_THREAD_EXIT + pthread_key_create(&tld_pthread_key, __tld_thread_exit_handler); +#endif +out: + return err; +} + +static int __tld_init_data_p(int map_fd) +{ + bool use_aligned_alloc = false; + struct tld_map_value map_val; + struct tld_data_u *data; + void *data_alloc = NULL; + int err, tid_fd = -1; + + tid_fd = syscall(SYS_pidfd_open, gettid(), O_EXCL); + if (tid_fd < 0) { + err = -errno; + goto out; + } + +#ifdef TLD_DATA_USE_ALIGNED_ALLOC + use_aligned_alloc = true; +#endif + + /* + * tld_meta_p->size = TLD_DYN_DATA_SIZE + + * total size of TLDs defined via TLD_DEFINE_KEY() + */ + data_alloc = (use_aligned_alloc || tld_meta_p->size * 2 >= TLD_PAGE_SIZE) ? + aligned_alloc(TLD_PAGE_SIZE, tld_meta_p->size) : + malloc(tld_meta_p->size * 2); + if (!data_alloc) { + err = -ENOMEM; + goto out; + } + + /* + * Always pass a page-aligned address to UPTR since the size of tld_map_value::data + * is a page in BTF. If data_alloc spans across two pages, use the page that contains large + * enough memory. + */ + if (TLD_PAGE_SIZE - (~TLD_PAGE_MASK & (intptr_t)data_alloc) >= tld_meta_p->size) { + map_val.data = (void *)(TLD_PAGE_MASK & (intptr_t)data_alloc); + data = data_alloc; + data->start = (~TLD_PAGE_MASK & (intptr_t)data_alloc) + + offsetof(struct tld_data_u, data); + } else { + map_val.data = (void *)(TLD_ROUND_UP((intptr_t)data_alloc, TLD_PAGE_SIZE)); + data = (void *)(TLD_ROUND_UP((intptr_t)data_alloc, TLD_PAGE_SIZE)); + data->start = offsetof(struct tld_data_u, data); + } + map_val.meta = TLD_READ_ONCE(tld_meta_p); + + err = bpf_map_update_elem(map_fd, &tid_fd, &map_val, 0); + if (err) { + free(data_alloc); + goto out; + } + + tld_data_p = data; + tld_data_alloc_p = data_alloc; +#ifdef TLD_FREE_DATA_ON_THREAD_EXIT + pthread_setspecific(tld_pthread_key, (void *)1); +#endif +out: + if (tid_fd >= 0) + close(tid_fd); + return err; +} + +static tld_key_t __tld_create_key(const char *name, size_t size, bool dyn_data) +{ + int err, i, sz, off = 0; + __u8 cnt; + + if (!TLD_READ_ONCE(tld_meta_p)) { + err = __tld_init_meta_p(); + if (err) + return (tld_key_t){err}; + } + + for (i = 0; i < TLD_MAX_DATA_CNT; i++) { +retry: + cnt = atomic_load(&tld_meta_p->cnt); + if (i < cnt) { + /* A metadata is not ready until size is updated with a non-zero value */ + while (!(sz = atomic_load(&tld_meta_p->metadata[i].size))) + sched_yield(); + + if (!strncmp(tld_meta_p->metadata[i].name, name, TLD_NAME_LEN)) + return (tld_key_t){-EEXIST}; + + off += TLD_ROUND_UP(sz, 8); + continue; + } + + /* + * TLD_DEFINE_KEY() is given memory upto a page while at most + * TLD_DYN_DATA_SIZE is allocated for tld_create_key() + */ + if (dyn_data) { + if (off + TLD_ROUND_UP(size, 8) > tld_meta_p->size) + return (tld_key_t){-E2BIG}; + } else { + if (off + TLD_ROUND_UP(size, 8) > TLD_PAGE_SIZE - sizeof(struct tld_data_u)) + return (tld_key_t){-E2BIG}; + tld_meta_p->size += TLD_ROUND_UP(size, 8); + } + + /* + * Only one tld_create_key() can increase the current cnt by one and + * takes the latest available slot. Other threads will check again if a new + * TLD can still be added, and then compete for the new slot after the + * succeeding thread update the size. + */ + if (!atomic_compare_exchange_strong(&tld_meta_p->cnt, &cnt, cnt + 1)) + goto retry; + + strncpy(tld_meta_p->metadata[i].name, name, TLD_NAME_LEN); + atomic_store(&tld_meta_p->metadata[i].size, size); + return (tld_key_t){(__s16)off}; + } + + return (tld_key_t){-ENOSPC}; +} + +/** + * TLD_DEFINE_KEY() - Define a TLD and a global variable key associated with the TLD. + * + * @name: The name of the TLD + * @size: The size of the TLD + * @key: The variable name of the key. Cannot exceed TLD_NAME_LEN + * + * The macro can only be used in file scope. + * + * A global variable key of opaque type, tld_key_t, will be declared and initialized before + * main() starts. Use tld_key_is_err() or tld_key_err_or_zero() later to check if the key + * creation succeeded. Pass the key to tld_get_data() to get a pointer to the TLD. + * bpf programs can also fetch the same key by name. + * + * The total size of TLDs created using TLD_DEFINE_KEY() cannot exceed a page. Just + * enough memory will be allocated for each thread on the first call to tld_get_data(). + */ +#define TLD_DEFINE_KEY(key, name, size) \ +tld_key_t key; \ + \ +__attribute__((constructor)) \ +void __tld_define_key_##key(void) \ +{ \ + key = __tld_create_key(name, size, false); \ +} + +/** + * tld_create_key() - Create a TLD and return a key associated with the TLD. + * + * @name: The name the TLD + * @size: The size of the TLD + * + * Return an opaque object key. Use tld_key_is_err() or tld_key_err_or_zero() to check + * if the key creation succeeded. Pass the key to tld_get_data() to get a pointer to + * locate the TLD. bpf programs can also fetch the same key by name. + * + * Use tld_create_key() only when a TLD needs to be created dynamically (e.g., @name is + * not known statically or a TLD needs to be created conditionally) + * + * An additional TLD_DYN_DATA_SIZE bytes are allocated per-thread to accommodate TLDs + * created dynamically with tld_create_key(). Since only a user page is pinned to the + * kernel, when TLDs created with TLD_DEFINE_KEY() uses more than TLD_PAGE_SIZE - + * TLD_DYN_DATA_SIZE, the buffer size will be limited to the rest of the page. + */ +__attribute__((unused)) +static tld_key_t tld_create_key(const char *name, size_t size) +{ + return __tld_create_key(name, size, true); +} + +__attribute__((unused)) +static inline bool tld_key_is_err(tld_key_t key) +{ + return key.off < 0; +} + +__attribute__((unused)) +static inline int tld_key_err_or_zero(tld_key_t key) +{ + return tld_key_is_err(key) ? key.off : 0; +} + +/** + * tld_get_data() - Get a pointer to the TLD associated with the given key of the + * calling thread. + * + * @map_fd: A file descriptor of tld_data_map, the underlying BPF task local storage map + * of task local data. + * @key: A key object created by TLD_DEFINE_KEY() or tld_create_key(). + * + * Return a pointer to the TLD if the key is valid; NULL if not enough memory for TLD + * for this thread, or the key is invalid. The returned pointer is guaranteed to be 8-byte + * aligned. + * + * Threads that call tld_get_data() must call tld_free() on exit to prevent + * memory leak if TLD_FREE_DATA_ON_THREAD_EXIT is not defined. + */ +__attribute__((unused)) +static void *tld_get_data(int map_fd, tld_key_t key) +{ + if (!TLD_READ_ONCE(tld_meta_p)) + return NULL; + + /* tld_data_p is allocated on the first invocation of tld_get_data() */ + if (!tld_data_p && __tld_init_data_p(map_fd)) + return NULL; + + return tld_data_p->data + key.off; +} + +/** + * tld_free() - Free task local data memory of the calling thread + * + * For the calling thread, all pointers to TLDs acquired before will become invalid. + * + * Users must call tld_free() on thread exit to prevent memory leak. Alternatively, + * define TLD_FREE_DATA_ON_THREAD_EXIT and a thread exit handler will be registered + * to free the memory automatically. + */ +__attribute__((unused)) +static void tld_free(void) +{ + if (tld_data_alloc_p) { + free(tld_data_alloc_p); + tld_data_alloc_p = NULL; + tld_data_p = NULL; + } +} + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* __TASK_LOCAL_DATA_H */ diff --git a/tools/testing/selftests/bpf/progs/task_local_data.bpf.h b/tools/testing/selftests/bpf/progs/task_local_data.bpf.h new file mode 100644 index 000000000000..432fff2af844 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/task_local_data.bpf.h @@ -0,0 +1,237 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __TASK_LOCAL_DATA_BPF_H +#define __TASK_LOCAL_DATA_BPF_H + +/* + * Task local data is a library that facilitates sharing per-task data + * between user space and bpf programs. + * + * + * USAGE + * + * A TLD, an entry of data in task local data, first needs to be created by the + * user space. This is done by calling user space API, TLD_DEFINE_KEY() or + * tld_create_key(), with the name of the TLD and the size. + * + * TLD_DEFINE_KEY(prio, "priority", sizeof(int)); + * + * or + * + * void func_call(...) { + * tld_key_t prio, in_cs; + * + * prio = tld_create_key("priority", sizeof(int)); + * in_cs = tld_create_key("in_critical_section", sizeof(bool)); + * ... + * + * A key associated with the TLD, which has an opaque type tld_key_t, will be + * initialized or returned. It can be used to get a pointer to the TLD in the + * user space by calling tld_get_data(). + * + * In a bpf program, tld_object_init() first needs to be called to initialized a + * tld_object on the stack. Then, TLDs can be accessed by calling tld_get_data(). + * The API will try to fetch the key by the name and use it to locate the data. + * A pointer to the TLD will be returned. It also caches the key in a task local + * storage map, tld_key_map, whose value type, struct tld_keys, must be defined + * by the developer. + * + * struct tld_keys { + * tld_key_t prio; + * tld_key_t in_cs; + * }; + * + * SEC("struct_ops") + * void prog(struct task_struct task, ...) + * { + * struct tld_object tld_obj; + * int err, *p; + * + * err = tld_object_init(task, &tld_obj); + * if (err) + * return; + * + * p = tld_get_data(&tld_obj, prio, "priority", sizeof(int)); + * if (p) + * // do something depending on *p + */ +#include +#include + +#define TLD_ROUND_MASK(x, y) ((__typeof__(x))((y) - 1)) +#define TLD_ROUND_UP(x, y) ((((x) - 1) | TLD_ROUND_MASK(x, y)) + 1) + +#define TLD_MAX_DATA_CNT (__PAGE_SIZE / sizeof(struct tld_metadata) - 1) + +#ifndef TLD_NAME_LEN +#define TLD_NAME_LEN 62 +#endif + +#ifndef TLD_KEY_MAP_CREATE_RETRY +#define TLD_KEY_MAP_CREATE_RETRY 10 +#endif + +typedef struct { + __s16 off; +} tld_key_t; + +struct tld_metadata { + char name[TLD_NAME_LEN]; + __u16 size; +}; + +struct tld_meta_u { + __u8 cnt; + __u16 size; + struct tld_metadata metadata[TLD_MAX_DATA_CNT]; +}; + +struct tld_data_u { + __u64 start; /* offset of tld_data_u->data in a page */ + char data[__PAGE_SIZE - sizeof(__u64)]; +}; + +struct tld_map_value { + struct tld_data_u __uptr *data; + struct tld_meta_u __uptr *meta; +}; + +typedef struct tld_uptr_dummy { + struct tld_data_u data[0]; + struct tld_meta_u meta[0]; +} *tld_uptr_dummy_t; + +struct tld_object { + struct tld_map_value *data_map; + struct tld_keys *key_map; + /* + * Force the compiler to generate the actual definition of tld_meta_u + * and tld_data_u in BTF. Without it, tld_meta_u and u_tld_data will + * be BTF_KIND_FWD. + */ + tld_uptr_dummy_t dummy[0]; +}; + +/* + * Map value of tld_key_map for caching keys. Must be defined by the developer. + * Members should be tld_key_t and passed to the 3rd argument of tld_fetch_key(). + */ +struct tld_keys; + +struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct tld_map_value); +} tld_data_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_TASK_STORAGE); + __uint(map_flags, BPF_F_NO_PREALLOC); + __type(key, int); + __type(value, struct tld_keys); +} tld_key_map SEC(".maps"); + +/** + * tld_object_init() - Initialize a tld_object. + * + * @task: The task_struct of the target task + * @tld_obj: A pointer to a tld_object to be initialized + * + * Return 0 on success; -ENODATA if the user space did not initialize task local data + * for the current task through tld_get_data(); -ENOMEM if the creation of tld_key_map + * fails + */ +__attribute__((unused)) +static int tld_object_init(struct task_struct *task, struct tld_object *tld_obj) +{ + int i; + + tld_obj->data_map = bpf_task_storage_get(&tld_data_map, task, 0, 0); + if (!tld_obj->data_map) + return -ENODATA; + + bpf_for(i, 0, TLD_KEY_MAP_CREATE_RETRY) { + tld_obj->key_map = bpf_task_storage_get(&tld_key_map, task, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (likely(tld_obj->key_map)) + break; + } + if (!tld_obj->key_map) + return -ENOMEM; + + return 0; +} + +/* + * Return the offset of TLD if @name is found. Otherwise, return the current TLD count + * using the nonpositive range so that the next tld_get_data() can skip fetching key if + * no new TLD is added or start comparing name from the first newly added TLD. + */ +__attribute__((unused)) +static int __tld_fetch_key(struct tld_object *tld_obj, const char *name, int i_start) +{ + struct tld_metadata *metadata; + int i, cnt, start, off = 0; + + if (!tld_obj->data_map || !tld_obj->data_map->data || !tld_obj->data_map->meta) + return 0; + + start = tld_obj->data_map->data->start; + cnt = tld_obj->data_map->meta->cnt; + metadata = tld_obj->data_map->meta->metadata; + + bpf_for(i, 0, cnt) { + if (i >= TLD_MAX_DATA_CNT) + break; + + if (i >= i_start && !bpf_strncmp(metadata[i].name, TLD_NAME_LEN, name)) + return start + off; + + off += TLD_ROUND_UP(metadata[i].size, 8); + } + + return -cnt; +} + +/** + * tld_get_data() - Retrieve a pointer to the TLD associated with the name. + * + * @tld_obj: A pointer to a valid tld_object initialized by tld_object_init() + * @key: The cached key of the TLD in tld_key_map + * @name: The name of the key associated with a TLD + * @size: The size of the TLD. Must be a known constant value + * + * Return a pointer to the TLD associated with @name; NULL if not found or @size is too + * big. @key is used to cache the key if the TLD is found to speed up subsequent calls. + * It should be defined as an member of tld_keys of tld_key_t type by the developer. + */ +#define tld_get_data(tld_obj, key, name, size) \ + ({ \ + void *data = NULL, *_data = (tld_obj)->data_map->data; \ + long off = (tld_obj)->key_map->key.off; \ + int cnt; \ + \ + if (likely(_data)) { \ + if (likely(off > 0)) { \ + barrier_var(off); \ + if (likely(off < __PAGE_SIZE - size)) \ + data = _data + off; \ + } else { \ + cnt = -(off); \ + if (likely((tld_obj)->data_map->meta) && \ + cnt < (tld_obj)->data_map->meta->cnt) { \ + off = __tld_fetch_key(tld_obj, name, cnt); \ + (tld_obj)->key_map->key.off = off; \ + \ + if (likely(off < __PAGE_SIZE - size)) { \ + barrier_var(off); \ + if (off > 0) \ + data = _data + off; \ + } \ + } \ + } \ + } \ + data; \ + }) + +#endif -- cgit v1.2.3 From 120f1a950e495d9751fdb5c8b7852d94546dcd03 Mon Sep 17 00:00:00 2001 From: Amery Hung Date: Wed, 30 Jul 2025 11:58:54 -0700 Subject: selftests/bpf: Test basic task local data operations Test basic operations of task local data with valid and invalid tld_create_key(). For invalid calls, make sure they return the right error code and check that the TLDs are not inserted by running tld_get_data(" value_not_exists") on the bpf side. The call should a null pointer. For valid calls, first make sure the TLDs are created by calling tld_get_data() on the bpf side. The call should return a valid pointer. Finally, verify that the TLDs are indeed task-specific (i.e., their addresses do not overlap) with multiple user threads. This done by writing values unique to each thread, reading them from both user space and bpf, and checking if the value read back matches the value written. Signed-off-by: Amery Hung Reviewed-by: Emil Tsalapatis Link: https://lore.kernel.org/r/20250730185903.3574598-4-ameryhung@gmail.com Signed-off-by: Alexei Starovoitov --- .../bpf/prog_tests/test_task_local_data.c | 192 +++++++++++++++++++++ .../selftests/bpf/progs/test_task_local_data.c | 65 +++++++ 2 files changed, 257 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/test_task_local_data.c create mode 100644 tools/testing/selftests/bpf/progs/test_task_local_data.c (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/test_task_local_data.c b/tools/testing/selftests/bpf/prog_tests/test_task_local_data.c new file mode 100644 index 000000000000..2e77d3fa2534 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_task_local_data.c @@ -0,0 +1,192 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include +#include + +#define TLD_FREE_DATA_ON_THREAD_EXIT +#define TLD_DYN_DATA_SIZE 4096 +#include "task_local_data.h" + +struct test_tld_struct { + __u64 a; + __u64 b; + __u64 c; + __u64 d; +}; + +#include "test_task_local_data.skel.h" + +TLD_DEFINE_KEY(value0_key, "value0", sizeof(int)); + +/* + * Reset task local data between subtests by clearing metadata other + * than the statically defined value0. This is safe as subtests run + * sequentially. Users of task local data library should not touch + * library internal. + */ +static void reset_tld(void) +{ + if (TLD_READ_ONCE(tld_meta_p)) { + /* Remove TLDs created by tld_create_key() */ + tld_meta_p->cnt = 1; + tld_meta_p->size = TLD_DYN_DATA_SIZE; + memset(&tld_meta_p->metadata[1], 0, + (TLD_MAX_DATA_CNT - 1) * sizeof(struct tld_metadata)); + } +} + +/* Serialize access to bpf program's global variables */ +static pthread_mutex_t global_mutex; + +static tld_key_t *tld_keys; + +#define TEST_BASIC_THREAD_NUM 32 + +void *test_task_local_data_basic_thread(void *arg) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts); + struct test_task_local_data *skel = (struct test_task_local_data *)arg; + int fd, err, tid, *value0, *value1; + struct test_tld_struct *value2; + + fd = bpf_map__fd(skel->maps.tld_data_map); + + value0 = tld_get_data(fd, value0_key); + if (!ASSERT_OK_PTR(value0, "tld_get_data")) + goto out; + + value1 = tld_get_data(fd, tld_keys[1]); + if (!ASSERT_OK_PTR(value1, "tld_get_data")) + goto out; + + value2 = tld_get_data(fd, tld_keys[2]); + if (!ASSERT_OK_PTR(value2, "tld_get_data")) + goto out; + + tid = gettid(); + + *value0 = tid + 0; + *value1 = tid + 1; + value2->a = tid + 2; + value2->b = tid + 3; + value2->c = tid + 4; + value2->d = tid + 5; + + pthread_mutex_lock(&global_mutex); + /* Run task_main that read task local data and save to global variables */ + err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.task_main), &opts); + ASSERT_OK(err, "run task_main"); + ASSERT_OK(opts.retval, "task_main retval"); + + ASSERT_EQ(skel->bss->test_value0, tid + 0, "tld_get_data value0"); + ASSERT_EQ(skel->bss->test_value1, tid + 1, "tld_get_data value1"); + ASSERT_EQ(skel->bss->test_value2.a, tid + 2, "tld_get_data value2.a"); + ASSERT_EQ(skel->bss->test_value2.b, tid + 3, "tld_get_data value2.b"); + ASSERT_EQ(skel->bss->test_value2.c, tid + 4, "tld_get_data value2.c"); + ASSERT_EQ(skel->bss->test_value2.d, tid + 5, "tld_get_data value2.d"); + pthread_mutex_unlock(&global_mutex); + + /* Make sure valueX are indeed local to threads */ + ASSERT_EQ(*value0, tid + 0, "value0"); + ASSERT_EQ(*value1, tid + 1, "value1"); + ASSERT_EQ(value2->a, tid + 2, "value2.a"); + ASSERT_EQ(value2->b, tid + 3, "value2.b"); + ASSERT_EQ(value2->c, tid + 4, "value2.c"); + ASSERT_EQ(value2->d, tid + 5, "value2.d"); + + *value0 = tid + 5; + *value1 = tid + 4; + value2->a = tid + 3; + value2->b = tid + 2; + value2->c = tid + 1; + value2->d = tid + 0; + + /* Run task_main again */ + pthread_mutex_lock(&global_mutex); + err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.task_main), &opts); + ASSERT_OK(err, "run task_main"); + ASSERT_OK(opts.retval, "task_main retval"); + + ASSERT_EQ(skel->bss->test_value0, tid + 5, "tld_get_data value0"); + ASSERT_EQ(skel->bss->test_value1, tid + 4, "tld_get_data value1"); + ASSERT_EQ(skel->bss->test_value2.a, tid + 3, "tld_get_data value2.a"); + ASSERT_EQ(skel->bss->test_value2.b, tid + 2, "tld_get_data value2.b"); + ASSERT_EQ(skel->bss->test_value2.c, tid + 1, "tld_get_data value2.c"); + ASSERT_EQ(skel->bss->test_value2.d, tid + 0, "tld_get_data value2.d"); + pthread_mutex_unlock(&global_mutex); + +out: + pthread_exit(NULL); +} + +static void test_task_local_data_basic(void) +{ + struct test_task_local_data *skel; + pthread_t thread[TEST_BASIC_THREAD_NUM]; + char dummy_key_name[TLD_NAME_LEN]; + tld_key_t key; + int i, err; + + reset_tld(); + + ASSERT_OK(pthread_mutex_init(&global_mutex, NULL), "pthread_mutex_init"); + + skel = test_task_local_data__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + return; + + tld_keys = calloc(TLD_MAX_DATA_CNT, sizeof(tld_key_t)); + if (!ASSERT_OK_PTR(tld_keys, "calloc tld_keys")) + goto out; + + ASSERT_FALSE(tld_key_is_err(value0_key), "TLD_DEFINE_KEY"); + tld_keys[1] = tld_create_key("value1", sizeof(int)); + ASSERT_FALSE(tld_key_is_err(tld_keys[1]), "tld_create_key"); + tld_keys[2] = tld_create_key("value2", sizeof(struct test_tld_struct)); + ASSERT_FALSE(tld_key_is_err(tld_keys[2]), "tld_create_key"); + + /* + * Shouldn't be able to store data exceed a page. Create a TLD just big + * enough to exceed a page. TLDs already created are int value0, int + * value1, and struct test_tld_struct value2. + */ + key = tld_create_key("value_not_exist", + TLD_PAGE_SIZE - 2 * sizeof(int) - sizeof(struct test_tld_struct) + 1); + ASSERT_EQ(tld_key_err_or_zero(key), -E2BIG, "tld_create_key"); + + key = tld_create_key("value2", sizeof(struct test_tld_struct)); + ASSERT_EQ(tld_key_err_or_zero(key), -EEXIST, "tld_create_key"); + + /* Shouldn't be able to create the (TLD_MAX_DATA_CNT+1)-th TLD */ + for (i = 3; i < TLD_MAX_DATA_CNT; i++) { + snprintf(dummy_key_name, TLD_NAME_LEN, "dummy_value%d", i); + tld_keys[i] = tld_create_key(dummy_key_name, sizeof(int)); + ASSERT_FALSE(tld_key_is_err(tld_keys[i]), "tld_create_key"); + } + key = tld_create_key("value_not_exist", sizeof(struct test_tld_struct)); + ASSERT_EQ(tld_key_err_or_zero(key), -ENOSPC, "tld_create_key"); + + /* Access TLDs from multiple threads and check if they are thread-specific */ + for (i = 0; i < TEST_BASIC_THREAD_NUM; i++) { + err = pthread_create(&thread[i], NULL, test_task_local_data_basic_thread, skel); + if (!ASSERT_OK(err, "pthread_create")) + goto out; + } + +out: + for (i = 0; i < TEST_BASIC_THREAD_NUM; i++) + pthread_join(thread[i], NULL); + + if (tld_keys) { + free(tld_keys); + tld_keys = NULL; + } + tld_free(); + test_task_local_data__destroy(skel); +} + +void test_task_local_data(void) +{ + if (test__start_subtest("task_local_data_basic")) + test_task_local_data_basic(); +} diff --git a/tools/testing/selftests/bpf/progs/test_task_local_data.c b/tools/testing/selftests/bpf/progs/test_task_local_data.c new file mode 100644 index 000000000000..fffafc013044 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_task_local_data.c @@ -0,0 +1,65 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include + +#include "task_local_data.bpf.h" + +struct tld_keys { + tld_key_t value0; + tld_key_t value1; + tld_key_t value2; + tld_key_t value_not_exist; +}; + +struct test_tld_struct { + __u64 a; + __u64 b; + __u64 c; + __u64 d; +}; + +int test_value0; +int test_value1; +struct test_tld_struct test_value2; + +SEC("syscall") +int task_main(void *ctx) +{ + struct tld_object tld_obj; + struct test_tld_struct *struct_p; + struct task_struct *task; + int err, *int_p; + + task = bpf_get_current_task_btf(); + err = tld_object_init(task, &tld_obj); + if (err) + return 1; + + int_p = tld_get_data(&tld_obj, value0, "value0", sizeof(int)); + if (int_p) + test_value0 = *int_p; + else + return 2; + + int_p = tld_get_data(&tld_obj, value1, "value1", sizeof(int)); + if (int_p) + test_value1 = *int_p; + else + return 3; + + struct_p = tld_get_data(&tld_obj, value2, "value2", sizeof(struct test_tld_struct)); + if (struct_p) + test_value2 = *struct_p; + else + return 4; + + int_p = tld_get_data(&tld_obj, value_not_exist, "value_not_exist", sizeof(int)); + if (int_p) + return 5; + + return 0; +} + +char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 784181141782204f6dbbeadf01780e81da5fcb29 Mon Sep 17 00:00:00 2001 From: Amery Hung Date: Wed, 30 Jul 2025 11:58:55 -0700 Subject: selftests/bpf: Test concurrent task local data key creation Test thread-safety of tld_create_key(). Since tld_create_key() does not rely on locks but memory barriers and atomic operations to protect the shared metadata, the thread-safety of the function is non-trivial. Make sure concurrent tld_key_create(), both valid and invalid, can not race and corrupt metatada, which may leads to TLDs not being thread- specific or duplicate TLDs with the same name. Signed-off-by: Amery Hung Reviewed-by: Emil Tsalapatis Link: https://lore.kernel.org/r/20250730185903.3574598-5-ameryhung@gmail.com Signed-off-by: Alexei Starovoitov --- .../bpf/prog_tests/test_task_local_data.c | 105 +++++++++++++++++++++ 1 file changed, 105 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/test_task_local_data.c b/tools/testing/selftests/bpf/prog_tests/test_task_local_data.c index 2e77d3fa2534..3b5cd2cd89c7 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_task_local_data.c +++ b/tools/testing/selftests/bpf/prog_tests/test_task_local_data.c @@ -185,8 +185,113 @@ out: test_task_local_data__destroy(skel); } +#define TEST_RACE_THREAD_NUM (TLD_MAX_DATA_CNT - 3) + +void *test_task_local_data_race_thread(void *arg) +{ + int err = 0, id = (intptr_t)arg; + char key_name[32]; + tld_key_t key; + + key = tld_create_key("value_not_exist", TLD_PAGE_SIZE + 1); + if (tld_key_err_or_zero(key) != -E2BIG) { + err = 1; + goto out; + } + + /* Only one thread will succeed in creating value1 */ + key = tld_create_key("value1", sizeof(int)); + if (!tld_key_is_err(key)) + tld_keys[1] = key; + + /* Only one thread will succeed in creating value2 */ + key = tld_create_key("value2", sizeof(struct test_tld_struct)); + if (!tld_key_is_err(key)) + tld_keys[2] = key; + + snprintf(key_name, 32, "thread_%d", id); + tld_keys[id] = tld_create_key(key_name, sizeof(int)); + if (tld_key_is_err(tld_keys[id])) + err = 2; +out: + return (void *)(intptr_t)err; +} + +static void test_task_local_data_race(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts); + pthread_t thread[TEST_RACE_THREAD_NUM]; + struct test_task_local_data *skel; + int fd, i, j, err, *data; + void *ret = NULL; + + skel = test_task_local_data__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + return; + + tld_keys = calloc(TLD_MAX_DATA_CNT, sizeof(tld_key_t)); + if (!ASSERT_OK_PTR(tld_keys, "calloc tld_keys")) + goto out; + + fd = bpf_map__fd(skel->maps.tld_data_map); + + ASSERT_FALSE(tld_key_is_err(value0_key), "TLD_DEFINE_KEY"); + tld_keys[0] = value0_key; + + for (j = 0; j < 100; j++) { + reset_tld(); + + for (i = 0; i < TEST_RACE_THREAD_NUM; i++) { + /* + * Try to make tld_create_key() race with each other. Call + * tld_create_key(), both valid and invalid, from different threads. + */ + err = pthread_create(&thread[i], NULL, test_task_local_data_race_thread, + (void *)(intptr_t)(i + 3)); + if (CHECK_FAIL(err)) + break; + } + + /* Wait for all tld_create_key() to return */ + for (i = 0; i < TEST_RACE_THREAD_NUM; i++) { + pthread_join(thread[i], &ret); + if (CHECK_FAIL(ret)) + break; + } + + /* Write a unique number to each TLD */ + for (i = 0; i < TLD_MAX_DATA_CNT; i++) { + data = tld_get_data(fd, tld_keys[i]); + if (CHECK_FAIL(!data)) + break; + *data = i; + } + + /* Read TLDs and check the value to see if any address collides with another */ + for (i = 0; i < TLD_MAX_DATA_CNT; i++) { + data = tld_get_data(fd, tld_keys[i]); + if (CHECK_FAIL(*data != i)) + break; + } + + /* Run task_main to make sure no invalid TLDs are added */ + err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.task_main), &opts); + ASSERT_OK(err, "run task_main"); + ASSERT_OK(opts.retval, "task_main retval"); + } +out: + if (tld_keys) { + free(tld_keys); + tld_keys = NULL; + } + tld_free(); + test_task_local_data__destroy(skel); +} + void test_task_local_data(void) { if (test__start_subtest("task_local_data_basic")) test_task_local_data_basic(); + if (test__start_subtest("task_local_data_race")) + test_task_local_data_race(); } -- cgit v1.2.3 From eeb52b6279cfebae07703f772621fb9d5972e541 Mon Sep 17 00:00:00 2001 From: Amery Hung Date: Wed, 6 Aug 2025 09:25:39 -0700 Subject: selftests/bpf: Add multi_st_ops that supports multiple instances Current struct_ops in bpf_testmod only support attaching single instance. Add multi_st_ops that supports multiple instances. The struct_ops uses map id as the struct_ops id and will reject attachment with an existing id. Signed-off-by: Amery Hung Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/20250806162540.681679-3-ameryhung@gmail.com --- .../testing/selftests/bpf/test_kmods/bpf_testmod.c | 112 +++++++++++++++++++++ .../testing/selftests/bpf/test_kmods/bpf_testmod.h | 6 ++ .../selftests/bpf/test_kmods/bpf_testmod_kfunc.h | 2 + 3 files changed, 120 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c index e9e918cdf31f..2beb9b2fcbd8 100644 --- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c +++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c @@ -1057,6 +1057,8 @@ __bpf_kfunc int bpf_kfunc_st_ops_inc10(struct st_ops_args *args) return args->a; } +__bpf_kfunc int bpf_kfunc_multi_st_ops_test_1(struct st_ops_args *args, u32 id); + BTF_KFUNCS_START(bpf_testmod_check_kfunc_ids) BTF_ID_FLAGS(func, bpf_testmod_test_mod_kfunc) BTF_ID_FLAGS(func, bpf_kfunc_call_test1) @@ -1097,6 +1099,7 @@ BTF_ID_FLAGS(func, bpf_kfunc_st_ops_test_prologue, KF_TRUSTED_ARGS | KF_SLEEPABL BTF_ID_FLAGS(func, bpf_kfunc_st_ops_test_epilogue, KF_TRUSTED_ARGS | KF_SLEEPABLE) BTF_ID_FLAGS(func, bpf_kfunc_st_ops_test_pro_epilogue, KF_TRUSTED_ARGS | KF_SLEEPABLE) BTF_ID_FLAGS(func, bpf_kfunc_st_ops_inc10, KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_kfunc_multi_st_ops_test_1, KF_TRUSTED_ARGS) BTF_KFUNCS_END(bpf_testmod_check_kfunc_ids) static int bpf_testmod_ops_init(struct btf *btf) @@ -1528,6 +1531,114 @@ static struct bpf_struct_ops testmod_st_ops = { .owner = THIS_MODULE, }; +struct hlist_head multi_st_ops_list; +static DEFINE_SPINLOCK(multi_st_ops_lock); + +static int multi_st_ops_init(struct btf *btf) +{ + spin_lock_init(&multi_st_ops_lock); + INIT_HLIST_HEAD(&multi_st_ops_list); + + return 0; +} + +static int multi_st_ops_init_member(const struct btf_type *t, + const struct btf_member *member, + void *kdata, const void *udata) +{ + return 0; +} + +static struct bpf_testmod_multi_st_ops *multi_st_ops_find_nolock(u32 id) +{ + struct bpf_testmod_multi_st_ops *st_ops; + + hlist_for_each_entry(st_ops, &multi_st_ops_list, node) { + if (st_ops->id == id) + return st_ops; + } + + return NULL; +} + +int bpf_kfunc_multi_st_ops_test_1(struct st_ops_args *args, u32 id) +{ + struct bpf_testmod_multi_st_ops *st_ops; + unsigned long flags; + int ret = -1; + + spin_lock_irqsave(&multi_st_ops_lock, flags); + st_ops = multi_st_ops_find_nolock(id); + if (st_ops) + ret = st_ops->test_1(args); + spin_unlock_irqrestore(&multi_st_ops_lock, flags); + + return ret; +} + +static int multi_st_ops_reg(void *kdata, struct bpf_link *link) +{ + struct bpf_testmod_multi_st_ops *st_ops = + (struct bpf_testmod_multi_st_ops *)kdata; + unsigned long flags; + int err = 0; + u32 id; + + if (!st_ops->test_1) + return -EINVAL; + + id = bpf_struct_ops_id(kdata); + + spin_lock_irqsave(&multi_st_ops_lock, flags); + if (multi_st_ops_find_nolock(id)) { + pr_err("multi_st_ops(id:%d) has already been registered\n", id); + err = -EEXIST; + goto unlock; + } + + st_ops->id = id; + hlist_add_head(&st_ops->node, &multi_st_ops_list); +unlock: + spin_unlock_irqrestore(&multi_st_ops_lock, flags); + + return err; +} + +static void multi_st_ops_unreg(void *kdata, struct bpf_link *link) +{ + struct bpf_testmod_multi_st_ops *st_ops; + unsigned long flags; + u32 id; + + id = bpf_struct_ops_id(kdata); + + spin_lock_irqsave(&multi_st_ops_lock, flags); + st_ops = multi_st_ops_find_nolock(id); + if (st_ops) + hlist_del(&st_ops->node); + spin_unlock_irqrestore(&multi_st_ops_lock, flags); +} + +static int bpf_testmod_multi_st_ops__test_1(struct st_ops_args *args) +{ + return 0; +} + +static struct bpf_testmod_multi_st_ops multi_st_ops_cfi_stubs = { + .test_1 = bpf_testmod_multi_st_ops__test_1, +}; + +struct bpf_struct_ops testmod_multi_st_ops = { + .verifier_ops = &bpf_testmod_verifier_ops, + .init = multi_st_ops_init, + .init_member = multi_st_ops_init_member, + .reg = multi_st_ops_reg, + .unreg = multi_st_ops_unreg, + .cfi_stubs = &multi_st_ops_cfi_stubs, + .name = "bpf_testmod_multi_st_ops", + .owner = THIS_MODULE, +}; + extern int bpf_fentry_test1(int a); static int bpf_testmod_init(void) @@ -1550,6 +1661,7 @@ static int bpf_testmod_init(void) ret = ret ?: register_bpf_struct_ops(&bpf_testmod_ops2, bpf_testmod_ops2); ret = ret ?: register_bpf_struct_ops(&bpf_testmod_ops3, bpf_testmod_ops3); ret = ret ?: register_bpf_struct_ops(&testmod_st_ops, bpf_testmod_st_ops); + ret = ret ?: register_bpf_struct_ops(&testmod_multi_st_ops, bpf_testmod_multi_st_ops); ret = ret ?: register_btf_id_dtor_kfuncs(bpf_testmod_dtors, ARRAY_SIZE(bpf_testmod_dtors), THIS_MODULE); diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.h b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.h index c9fab51f16e2..f6e492f9d042 100644 --- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.h +++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.h @@ -116,4 +116,10 @@ struct bpf_testmod_st_ops { struct module *owner; }; +struct bpf_testmod_multi_st_ops { + int (*test_1)(struct st_ops_args *args); + struct hlist_node node; + int id; +}; + #endif /* _BPF_TESTMOD_H */ diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h b/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h index b58817938deb..286e7faa4754 100644 --- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h +++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod_kfunc.h @@ -159,4 +159,6 @@ void bpf_kfunc_trusted_task_test(struct task_struct *ptr) __ksym; void bpf_kfunc_trusted_num_test(int *ptr) __ksym; void bpf_kfunc_rcu_task_test(struct task_struct *ptr) __ksym; +int bpf_kfunc_multi_st_ops_test_1(struct st_ops_args *args, u32 id) __ksym; + #endif /* _BPF_TESTMOD_KFUNC_H */ -- cgit v1.2.3 From ba7000f1c360f34286f48bd5e670cefbab77ce8f Mon Sep 17 00:00:00 2001 From: Amery Hung Date: Wed, 6 Aug 2025 09:25:40 -0700 Subject: selftests/bpf: Test multi_st_ops and calling kfuncs from different programs Test multi_st_ops and demonstrate how different bpf programs can call a kfuncs that refers to the struct_ops instance in the same source file by id. The id is defined as a global vairable and initialized before attaching the skeleton. Kfuncs that take the id can hide the argument with a macro to make it almost transparent to bpf program developers. The test involves two struct_ops returning different values from .test_1. In syscall and tracing programs, check if the correct value is returned by a kfunc that calls .test_1. Signed-off-by: Amery Hung Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/20250806162540.681679-4-ameryhung@gmail.com --- .../prog_tests/test_struct_ops_id_ops_mapping.c | 74 ++++++++++++++++++++++ .../bpf/progs/struct_ops_id_ops_mapping1.c | 59 +++++++++++++++++ .../bpf/progs/struct_ops_id_ops_mapping2.c | 59 +++++++++++++++++ 3 files changed, 192 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/test_struct_ops_id_ops_mapping.c create mode 100644 tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping1.c create mode 100644 tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping2.c (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_id_ops_mapping.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_id_ops_mapping.c new file mode 100644 index 000000000000..fd8762ba4b67 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_id_ops_mapping.c @@ -0,0 +1,74 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include "struct_ops_id_ops_mapping1.skel.h" +#include "struct_ops_id_ops_mapping2.skel.h" + +static void test_st_ops_id_ops_mapping(void) +{ + struct struct_ops_id_ops_mapping1 *skel1 = NULL; + struct struct_ops_id_ops_mapping2 *skel2 = NULL; + struct bpf_map_info info = {}; + __u32 len = sizeof(info); + int err, pid, prog1_fd, prog2_fd; + + skel1 = struct_ops_id_ops_mapping1__open_and_load(); + if (!ASSERT_OK_PTR(skel1, "struct_ops_id_ops_mapping1__open")) + goto out; + + skel2 = struct_ops_id_ops_mapping2__open_and_load(); + if (!ASSERT_OK_PTR(skel2, "struct_ops_id_ops_mapping2__open")) + goto out; + + err = bpf_map_get_info_by_fd(bpf_map__fd(skel1->maps.st_ops_map), + &info, &len); + if (!ASSERT_OK(err, "bpf_map_get_info_by_fd")) + goto out; + + skel1->bss->st_ops_id = info.id; + + err = bpf_map_get_info_by_fd(bpf_map__fd(skel2->maps.st_ops_map), + &info, &len); + if (!ASSERT_OK(err, "bpf_map_get_info_by_fd")) + goto out; + + skel2->bss->st_ops_id = info.id; + + err = struct_ops_id_ops_mapping1__attach(skel1); + if (!ASSERT_OK(err, "struct_ops_id_ops_mapping1__attach")) + goto out; + + err = struct_ops_id_ops_mapping2__attach(skel2); + if (!ASSERT_OK(err, "struct_ops_id_ops_mapping2__attach")) + goto out; + + /* run tracing prog that calls .test_1 and checks return */ + pid = getpid(); + skel1->bss->test_pid = pid; + skel2->bss->test_pid = pid; + sys_gettid(); + skel1->bss->test_pid = 0; + skel2->bss->test_pid = 0; + + /* run syscall_prog that calls .test_1 and checks return */ + prog1_fd = bpf_program__fd(skel1->progs.syscall_prog); + err = bpf_prog_test_run_opts(prog1_fd, NULL); + ASSERT_OK(err, "bpf_prog_test_run_opts"); + + prog2_fd = bpf_program__fd(skel2->progs.syscall_prog); + err = bpf_prog_test_run_opts(prog2_fd, NULL); + ASSERT_OK(err, "bpf_prog_test_run_opts"); + + ASSERT_EQ(skel1->bss->test_err, 0, "skel1->bss->test_err"); + ASSERT_EQ(skel2->bss->test_err, 0, "skel2->bss->test_err"); + +out: + struct_ops_id_ops_mapping1__destroy(skel1); + struct_ops_id_ops_mapping2__destroy(skel2); +} + +void test_struct_ops_id_ops_mapping(void) +{ + if (test__start_subtest("st_ops_id_ops_mapping")) + test_st_ops_id_ops_mapping(); +} diff --git a/tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping1.c b/tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping1.c new file mode 100644 index 000000000000..ad8bb546c9bf --- /dev/null +++ b/tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping1.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include "bpf_misc.h" +#include "../test_kmods/bpf_testmod.h" +#include "../test_kmods/bpf_testmod_kfunc.h" + +char _license[] SEC("license") = "GPL"; + +#define bpf_kfunc_multi_st_ops_test_1(args) bpf_kfunc_multi_st_ops_test_1(args, st_ops_id) +int st_ops_id; + +int test_pid; +int test_err; + +#define MAP1_MAGIC 1234 + +SEC("struct_ops") +int BPF_PROG(test_1, struct st_ops_args *args) +{ + return MAP1_MAGIC; +} + +SEC("tp_btf/sys_enter") +int BPF_PROG(sys_enter, struct pt_regs *regs, long id) +{ + struct st_ops_args args = {}; + struct task_struct *task; + int ret; + + task = bpf_get_current_task_btf(); + if (!test_pid || task->pid != test_pid) + return 0; + + ret = bpf_kfunc_multi_st_ops_test_1(&args); + if (ret != MAP1_MAGIC) + test_err++; + + return 0; +} + +SEC("syscall") +int syscall_prog(void *ctx) +{ + struct st_ops_args args = {}; + int ret; + + ret = bpf_kfunc_multi_st_ops_test_1(&args); + if (ret != MAP1_MAGIC) + test_err++; + + return 0; +} + +SEC(".struct_ops.link") +struct bpf_testmod_multi_st_ops st_ops_map = { + .test_1 = (void *)test_1, +}; diff --git a/tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping2.c b/tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping2.c new file mode 100644 index 000000000000..cea1a2f4b62f --- /dev/null +++ b/tools/testing/selftests/bpf/progs/struct_ops_id_ops_mapping2.c @@ -0,0 +1,59 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include "bpf_misc.h" +#include "../test_kmods/bpf_testmod.h" +#include "../test_kmods/bpf_testmod_kfunc.h" + +char _license[] SEC("license") = "GPL"; + +#define bpf_kfunc_multi_st_ops_test_1(args) bpf_kfunc_multi_st_ops_test_1(args, st_ops_id) +int st_ops_id; + +int test_pid; +int test_err; + +#define MAP2_MAGIC 4567 + +SEC("struct_ops") +int BPF_PROG(test_1, struct st_ops_args *args) +{ + return MAP2_MAGIC; +} + +SEC("tp_btf/sys_enter") +int BPF_PROG(sys_enter, struct pt_regs *regs, long id) +{ + struct st_ops_args args = {}; + struct task_struct *task; + int ret; + + task = bpf_get_current_task_btf(); + if (!test_pid || task->pid != test_pid) + return 0; + + ret = bpf_kfunc_multi_st_ops_test_1(&args); + if (ret != MAP2_MAGIC) + test_err++; + + return 0; +} + +SEC("syscall") +int syscall_prog(void *ctx) +{ + struct st_ops_args args = {}; + int ret; + + ret = bpf_kfunc_multi_st_ops_test_1(&args); + if (ret != MAP2_MAGIC) + test_err++; + + return 0; +} + +SEC(".struct_ops.link") +struct bpf_testmod_multi_st_ops st_ops_map = { + .test_1 = (void *)test_1, +}; -- cgit v1.2.3 From 85acc29f90e0183997dea27277057c9aec2769aa Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 6 Aug 2025 18:13:41 +0100 Subject: KVM: arm64: selftest: Add standalone test checking for KVM's own UUID Tinkering with UUIDs is a perilious task, and the KVM UUID gets broken at times. In order to spot this early enough, add a selftest that will shout if the expected value isn't found. Signed-off-by: Marc Zyngier Reviewed-by: Sebastian Ott Link: https://lore.kernel.org/r/20250721130558.50823-1-jackabt.amazon@gmail.com Link: https://lore.kernel.org/r/20250806171341.1521210-1-maz@kernel.org Signed-off-by: Oliver Upton --- tools/testing/selftests/kvm/Makefile.kvm | 1 + tools/testing/selftests/kvm/arm64/kvm-uuid.c | 70 ++++++++++++++++++++++++++++ 2 files changed, 71 insertions(+) create mode 100644 tools/testing/selftests/kvm/arm64/kvm-uuid.c (limited to 'tools/testing') diff --git a/tools/testing/selftests/kvm/Makefile.kvm b/tools/testing/selftests/kvm/Makefile.kvm index ce817a975e50..e1eb1ba238a2 100644 --- a/tools/testing/selftests/kvm/Makefile.kvm +++ b/tools/testing/selftests/kvm/Makefile.kvm @@ -167,6 +167,7 @@ TEST_GEN_PROGS_arm64 += arm64/vgic_irq TEST_GEN_PROGS_arm64 += arm64/vgic_lpi_stress TEST_GEN_PROGS_arm64 += arm64/vpmu_counter_access TEST_GEN_PROGS_arm64 += arm64/no-vgic-v3 +TEST_GEN_PROGS_arm64 += arm64/kvm-uuid TEST_GEN_PROGS_arm64 += access_tracking_perf_test TEST_GEN_PROGS_arm64 += arch_timer TEST_GEN_PROGS_arm64 += coalesced_io_test diff --git a/tools/testing/selftests/kvm/arm64/kvm-uuid.c b/tools/testing/selftests/kvm/arm64/kvm-uuid.c new file mode 100644 index 000000000000..af9581b860f1 --- /dev/null +++ b/tools/testing/selftests/kvm/arm64/kvm-uuid.c @@ -0,0 +1,70 @@ +// SPDX-License-Identifier: GPL-2.0 + +// Check that nobody has tampered with KVM's UID + +#include +#include +#include +#include + +#include "processor.h" + +/* + * Do NOT redefine these constants, or try to replace them with some + * "common" version. They are hardcoded here to detect any potential + * breakage happening in the rest of the kernel. + * + * KVM UID value: 28b46fb6-2ec5-11e9-a9ca-4b564d003a74 + */ +#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_0 0xb66fb428U +#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_1 0xe911c52eU +#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_2 0x564bcaa9U +#define ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_3 0x743a004dU + +static void guest_code(void) +{ + struct arm_smccc_res res = {}; + + smccc_hvc(ARM_SMCCC_VENDOR_HYP_CALL_UID_FUNC_ID, 0, 0, 0, 0, 0, 0, 0, &res); + + __GUEST_ASSERT(res.a0 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_0 && + res.a1 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_1 && + res.a2 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_2 && + res.a3 == ARM_SMCCC_VENDOR_HYP_UID_KVM_REG_3, + "Unexpected KVM-specific UID %lx %lx %lx %lx\n", res.a0, res.a1, res.a2, res.a3); + GUEST_DONE(); +} + +int main (int argc, char *argv[]) +{ + struct kvm_vcpu *vcpu; + struct kvm_vm *vm; + struct ucall uc; + bool guest_done = false; + + vm = vm_create_with_one_vcpu(&vcpu, guest_code); + + while (!guest_done) { + vcpu_run(vcpu); + + switch (get_ucall(vcpu, &uc)) { + case UCALL_SYNC: + break; + case UCALL_DONE: + guest_done = true; + break; + case UCALL_ABORT: + REPORT_GUEST_ASSERT(uc); + break; + case UCALL_PRINTF: + printf("%s", uc.buffer); + break; + default: + TEST_FAIL("Unexpected guest exit"); + } + } + + kvm_vm_free(vm); + + return 0; +} -- cgit v1.2.3 From 81e4b9cf365df4cde30157a85cc9f3d673946118 Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Fri, 8 Aug 2025 03:55:06 +1000 Subject: selftests/mount_setattr: add smoke tests for open_tree_attr(2) bug There appear to be no other open_tree_attr(2) tests at the moment, but as a minimal solution just add some additional checks in the existing MOUNT_ATTR_IDMAP tests to make sure that open_tree_attr(2) cannot be used to bypass the tested restrictions that apply to mount_setattr(2). Signed-off-by: Aleksa Sarai Link: https://lore.kernel.org/20250808-open_tree_attr-bugfix-idmap-v1-2-0ec7bc05646c@cyphar.com Signed-off-by: Christian Brauner --- .../selftests/mount_setattr/mount_setattr_test.c | 77 ++++++++++++++++++---- 1 file changed, 64 insertions(+), 13 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/mount_setattr/mount_setattr_test.c b/tools/testing/selftests/mount_setattr/mount_setattr_test.c index b1e4618399be..a688871a98eb 100644 --- a/tools/testing/selftests/mount_setattr/mount_setattr_test.c +++ b/tools/testing/selftests/mount_setattr/mount_setattr_test.c @@ -107,6 +107,26 @@ #endif #endif +#ifndef __NR_open_tree_attr + #if defined __alpha__ + #define __NR_open_tree_attr 577 + #elif defined _MIPS_SIM + #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */ + #define __NR_open_tree_attr (467 + 4000) + #endif + #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */ + #define __NR_open_tree_attr (467 + 6000) + #endif + #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */ + #define __NR_open_tree_attr (467 + 5000) + #endif + #elif defined __ia64__ + #define __NR_open_tree_attr (467 + 1024) + #else + #define __NR_open_tree_attr 467 + #endif +#endif + #ifndef MOUNT_ATTR_IDMAP #define MOUNT_ATTR_IDMAP 0x00100000 #endif @@ -121,6 +141,12 @@ static inline int sys_mount_setattr(int dfd, const char *path, unsigned int flag return syscall(__NR_mount_setattr, dfd, path, flags, attr, size); } +static inline int sys_open_tree_attr(int dfd, const char *path, unsigned int flags, + struct mount_attr *attr, size_t size) +{ + return syscall(__NR_open_tree_attr, dfd, path, flags, attr, size); +} + static ssize_t write_nointr(int fd, const void *buf, size_t count) { ssize_t ret; @@ -1222,6 +1248,12 @@ TEST_F(mount_setattr_idmapped, attached_mount_inside_current_mount_namespace) attr.userns_fd = get_userns_fd(0, 10000, 10000); ASSERT_GE(attr.userns_fd, 0); ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + /* + * Make sure that open_tree_attr() without OPEN_TREE_CLONE is not a way + * to bypass this mount_setattr() restriction. + */ + ASSERT_LT(sys_open_tree_attr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + ASSERT_EQ(close(attr.userns_fd), 0); ASSERT_EQ(close(open_tree_fd), 0); } @@ -1255,6 +1287,12 @@ TEST_F(mount_setattr_idmapped, attached_mount_outside_current_mount_namespace) ASSERT_GE(attr.userns_fd, 0); ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + /* + * Make sure that open_tree_attr() without OPEN_TREE_CLONE is not a way + * to bypass this mount_setattr() restriction. + */ + ASSERT_LT(sys_open_tree_attr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + ASSERT_EQ(close(attr.userns_fd), 0); ASSERT_EQ(close(open_tree_fd), 0); } @@ -1321,6 +1359,19 @@ TEST_F(mount_setattr_idmapped, detached_mount_outside_current_mount_namespace) ASSERT_EQ(close(open_tree_fd), 0); } +static bool expected_uid_gid(int dfd, const char *path, int flags, + uid_t expected_uid, gid_t expected_gid) +{ + int ret; + struct stat st; + + ret = fstatat(dfd, path, &st, flags); + if (ret < 0) + return false; + + return st.st_uid == expected_uid && st.st_gid == expected_gid; +} + /** * Validate that currently changing the idmapping of an idmapped mount fails. */ @@ -1331,6 +1382,8 @@ TEST_F(mount_setattr_idmapped, change_idmapping) .attr_set = MOUNT_ATTR_IDMAP, }; + ASSERT_TRUE(expected_uid_gid(-EBADF, "/mnt/D", 0, 0, 0)); + if (!mount_setattr_supported()) SKIP(return, "mount_setattr syscall not supported"); @@ -1348,27 +1401,25 @@ TEST_F(mount_setattr_idmapped, change_idmapping) AT_EMPTY_PATH, &attr, sizeof(attr)), 0); ASSERT_EQ(close(attr.userns_fd), 0); + EXPECT_FALSE(expected_uid_gid(open_tree_fd, ".", 0, 0, 0)); + EXPECT_TRUE(expected_uid_gid(open_tree_fd, ".", 0, 10000, 10000)); + /* Change idmapping on a detached mount that is already idmapped. */ attr.userns_fd = get_userns_fd(0, 20000, 10000); ASSERT_GE(attr.userns_fd, 0); ASSERT_NE(sys_mount_setattr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + /* + * Make sure that open_tree_attr() without OPEN_TREE_CLONE is not a way + * to bypass this mount_setattr() restriction. + */ + EXPECT_LT(sys_open_tree_attr(open_tree_fd, "", AT_EMPTY_PATH, &attr, sizeof(attr)), 0); + EXPECT_FALSE(expected_uid_gid(open_tree_fd, ".", 0, 20000, 20000)); + EXPECT_TRUE(expected_uid_gid(open_tree_fd, ".", 0, 10000, 10000)); + ASSERT_EQ(close(attr.userns_fd), 0); ASSERT_EQ(close(open_tree_fd), 0); } -static bool expected_uid_gid(int dfd, const char *path, int flags, - uid_t expected_uid, gid_t expected_gid) -{ - int ret; - struct stat st; - - ret = fstatat(dfd, path, &st, flags); - if (ret < 0) - return false; - - return st.st_uid == expected_uid && st.st_gid == expected_gid; -} - TEST_F(mount_setattr_idmapped, idmap_mount_tree_invalid) { int open_tree_fd = -EBADF; -- cgit v1.2.3 From df579e471111b9f8691d75c980f59cc085fa97a3 Mon Sep 17 00:00:00 2001 From: Aleksa Sarai Date: Thu, 7 Aug 2025 03:55:24 +1000 Subject: selftests/filesystems: add basic fscontext log tests Signed-off-by: Aleksa Sarai Link: https://lore.kernel.org/20250807-fscontext-log-cleanups-v3-2-8d91d6242dc3@cyphar.com Signed-off-by: Christian Brauner --- tools/testing/selftests/filesystems/.gitignore | 1 + tools/testing/selftests/filesystems/Makefile | 2 +- tools/testing/selftests/filesystems/fclog.c | 130 +++++++++++++++++++++++++ 3 files changed, 132 insertions(+), 1 deletion(-) create mode 100644 tools/testing/selftests/filesystems/fclog.c (limited to 'tools/testing') diff --git a/tools/testing/selftests/filesystems/.gitignore b/tools/testing/selftests/filesystems/.gitignore index fcbdb1297e24..64ac0dfa46b7 100644 --- a/tools/testing/selftests/filesystems/.gitignore +++ b/tools/testing/selftests/filesystems/.gitignore @@ -1,6 +1,7 @@ # SPDX-License-Identifier: GPL-2.0-only dnotify_test devpts_pts +fclog file_stressor anon_inode_test kernfs_test diff --git a/tools/testing/selftests/filesystems/Makefile b/tools/testing/selftests/filesystems/Makefile index 73d4650af1a5..85427d7f19b9 100644 --- a/tools/testing/selftests/filesystems/Makefile +++ b/tools/testing/selftests/filesystems/Makefile @@ -1,7 +1,7 @@ # SPDX-License-Identifier: GPL-2.0 CFLAGS += $(KHDR_INCLUDES) -TEST_GEN_PROGS := devpts_pts file_stressor anon_inode_test kernfs_test +TEST_GEN_PROGS := devpts_pts file_stressor anon_inode_test kernfs_test fclog TEST_GEN_PROGS_EXTENDED := dnotify_test include ../lib.mk diff --git a/tools/testing/selftests/filesystems/fclog.c b/tools/testing/selftests/filesystems/fclog.c new file mode 100644 index 000000000000..912a8b755c3b --- /dev/null +++ b/tools/testing/selftests/filesystems/fclog.c @@ -0,0 +1,130 @@ +// SPDX-License-Identifier: GPL-2.0-or-later +/* + * Author: Aleksa Sarai + * Copyright (C) 2025 SUSE LLC. + */ + +#include +#include +#include +#include +#include +#include +#include +#include + +#include "../kselftest_harness.h" + +#define ASSERT_ERRNO(expected, _t, seen) \ + __EXPECT(expected, #expected, \ + ({__typeof__(seen) _tmp_seen = (seen); \ + _tmp_seen >= 0 ? _tmp_seen : -errno; }), #seen, _t, 1) + +#define ASSERT_ERRNO_EQ(expected, seen) \ + ASSERT_ERRNO(expected, ==, seen) + +#define ASSERT_SUCCESS(seen) \ + ASSERT_ERRNO(0, <=, seen) + +FIXTURE(ns) +{ + int host_mntns; +}; + +FIXTURE_SETUP(ns) +{ + /* Stash the old mntns. */ + self->host_mntns = open("/proc/self/ns/mnt", O_RDONLY|O_CLOEXEC); + ASSERT_SUCCESS(self->host_mntns); + + /* Create a new mount namespace and make it private. */ + ASSERT_SUCCESS(unshare(CLONE_NEWNS)); + ASSERT_SUCCESS(mount(NULL, "/", NULL, MS_PRIVATE|MS_REC, NULL)); +} + +FIXTURE_TEARDOWN(ns) +{ + ASSERT_SUCCESS(setns(self->host_mntns, CLONE_NEWNS)); + ASSERT_SUCCESS(close(self->host_mntns)); +} + +TEST_F(ns, fscontext_log_enodata) +{ + int fsfd = fsopen("tmpfs", FSOPEN_CLOEXEC); + ASSERT_SUCCESS(fsfd); + + /* A brand new fscontext has no log entries. */ + char buf[128] = {}; + for (int i = 0; i < 16; i++) + ASSERT_ERRNO_EQ(-ENODATA, read(fsfd, buf, sizeof(buf))); + + ASSERT_SUCCESS(close(fsfd)); +} + +TEST_F(ns, fscontext_log_errorfc) +{ + int fsfd = fsopen("tmpfs", FSOPEN_CLOEXEC); + ASSERT_SUCCESS(fsfd); + + ASSERT_ERRNO_EQ(-EINVAL, fsconfig(fsfd, FSCONFIG_SET_STRING, "invalid-arg", "123", 0)); + + char buf[128] = {}; + ASSERT_SUCCESS(read(fsfd, buf, sizeof(buf))); + EXPECT_STREQ("e tmpfs: Unknown parameter 'invalid-arg'\n", buf); + + /* The message has been consumed. */ + ASSERT_ERRNO_EQ(-ENODATA, read(fsfd, buf, sizeof(buf))); + ASSERT_SUCCESS(close(fsfd)); +} + +TEST_F(ns, fscontext_log_errorfc_after_fsmount) +{ + int fsfd = fsopen("tmpfs", FSOPEN_CLOEXEC); + ASSERT_SUCCESS(fsfd); + + ASSERT_ERRNO_EQ(-EINVAL, fsconfig(fsfd, FSCONFIG_SET_STRING, "invalid-arg", "123", 0)); + + ASSERT_SUCCESS(fsconfig(fsfd, FSCONFIG_CMD_CREATE, NULL, NULL, 0)); + int mfd = fsmount(fsfd, FSMOUNT_CLOEXEC, MOUNT_ATTR_NOEXEC | MOUNT_ATTR_NOSUID); + ASSERT_SUCCESS(mfd); + ASSERT_SUCCESS(move_mount(mfd, "", AT_FDCWD, "/tmp", MOVE_MOUNT_F_EMPTY_PATH)); + + /* + * The fscontext log should still contain data even after + * FSCONFIG_CMD_CREATE and fsmount(). + */ + char buf[128] = {}; + ASSERT_SUCCESS(read(fsfd, buf, sizeof(buf))); + EXPECT_STREQ("e tmpfs: Unknown parameter 'invalid-arg'\n", buf); + + /* The message has been consumed. */ + ASSERT_ERRNO_EQ(-ENODATA, read(fsfd, buf, sizeof(buf))); + ASSERT_SUCCESS(close(fsfd)); +} + +TEST_F(ns, fscontext_log_emsgsize) +{ + int fsfd = fsopen("tmpfs", FSOPEN_CLOEXEC); + ASSERT_SUCCESS(fsfd); + + ASSERT_ERRNO_EQ(-EINVAL, fsconfig(fsfd, FSCONFIG_SET_STRING, "invalid-arg", "123", 0)); + + char buf[128] = {}; + /* + * Attempting to read a message with too small a buffer should not + * result in the message getting consumed. + */ + ASSERT_ERRNO_EQ(-EMSGSIZE, read(fsfd, buf, 0)); + ASSERT_ERRNO_EQ(-EMSGSIZE, read(fsfd, buf, 1)); + for (int i = 0; i < 16; i++) + ASSERT_ERRNO_EQ(-EMSGSIZE, read(fsfd, buf, 16)); + + ASSERT_SUCCESS(read(fsfd, buf, sizeof(buf))); + EXPECT_STREQ("e tmpfs: Unknown parameter 'invalid-arg'\n", buf); + + /* The message has been consumed. */ + ASSERT_ERRNO_EQ(-ENODATA, read(fsfd, buf, sizeof(buf))); + ASSERT_SUCCESS(close(fsfd)); +} + +TEST_HARNESS_MAIN -- cgit v1.2.3 From 2319f9d0aa644eb9666c7be903078f50ecc2eb5b Mon Sep 17 00:00:00 2001 From: Nam Cao Date: Mon, 11 Aug 2025 09:49:57 +0200 Subject: selftests/coredump: Remove the read() that fails the test Resolve a conflict between commit 6a68d28066b6 ("selftests/coredump: Fix "socket_detect_userspace_client" test failure") and commit 994dc26302ed ("selftests/coredump: fix build") The first commit adds a read() to wait for write() from another thread to finish. But the second commit removes the write(). Now that the two commits are in the same tree, the read() now gets EOF and the test fails. Remove this read() so that the test passes. Signed-off-by: Nam Cao Link: https://lore.kernel.org/20250811074957.4079616-1-namcao@linutronix.de Signed-off-by: Christian Brauner --- tools/testing/selftests/coredump/stackdump_test.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/coredump/stackdump_test.c b/tools/testing/selftests/coredump/stackdump_test.c index 5a5a7a5f7e1d..a4ac80bb1003 100644 --- a/tools/testing/selftests/coredump/stackdump_test.c +++ b/tools/testing/selftests/coredump/stackdump_test.c @@ -446,9 +446,6 @@ TEST_F(coredump, socket_detect_userspace_client) if (info.coredump_mask & PIDFD_COREDUMPED) goto out; - if (read(fd_coredump, &c, 1) < 1) - goto out; - exit_code = EXIT_SUCCESS; out: if (fd_peer_pidfd >= 0) -- cgit v1.2.3 From e69980bd16f264581c3f606bae987e54f0ba8c4a Mon Sep 17 00:00:00 2001 From: Jiapeng Chong Date: Fri, 25 Jul 2025 17:04:12 +0800 Subject: selftests/sched_ext: Remove duplicate sched.h header ./tools/testing/selftests/sched_ext/hotplug.c: sched.h is included more than once. Reported-by: Abaci Robot Closes: https://bugzilla.openanolis.cn/show_bug.cgi?id=22941 Signed-off-by: Jiapeng Chong Acked-by: Andrea Righi Signed-off-by: Tejun Heo --- tools/testing/selftests/sched_ext/hotplug.c | 1 - 1 file changed, 1 deletion(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/sched_ext/hotplug.c b/tools/testing/selftests/sched_ext/hotplug.c index 1c9ceb661c43..0cfbb111a2d0 100644 --- a/tools/testing/selftests/sched_ext/hotplug.c +++ b/tools/testing/selftests/sched_ext/hotplug.c @@ -6,7 +6,6 @@ #include #include #include -#include #include #include -- cgit v1.2.3 From c36748e8733ef9c5f4cd1d7c4327994e5b88b8df Mon Sep 17 00:00:00 2001 From: Wake Liu Date: Thu, 7 Aug 2025 16:09:32 +0800 Subject: selftests/net: Replace non-standard __WORDSIZE with sizeof(long) * 8 The `__WORDSIZE` macro, defined in the non-standard `` header, is a GNU extension and not universally available with all toolchains, such as Clang when used with musl libc. This can lead to build failures in environments where this header is missing. The intention of the code is to determine the bit width of a C `long`. Replace the non-portable `__WORDSIZE` with the standard and portable `sizeof(long) * 8` expression to achieve the same result. This change also removes the inclusion of the now-unused `` header. Signed-off-by: Wake Liu Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/psock_tpacket.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/psock_tpacket.c b/tools/testing/selftests/net/psock_tpacket.c index 221270cee3ea..0dd909e325d9 100644 --- a/tools/testing/selftests/net/psock_tpacket.c +++ b/tools/testing/selftests/net/psock_tpacket.c @@ -33,7 +33,6 @@ #include #include #include -#include #include #include #include @@ -785,7 +784,7 @@ static int test_kernel_bit_width(void) static int test_user_bit_width(void) { - return __WORDSIZE; + return sizeof(long) * 8; } static const char *tpacket_str[] = { -- cgit v1.2.3 From bc4c0a48bdad7f225740b8e750fdc1da6d85e1eb Mon Sep 17 00:00:00 2001 From: Wake Liu Date: Sat, 9 Aug 2025 14:20:13 +0800 Subject: selftests/net: Ensure assert() triggers in psock_tpacket.c The get_next_frame() function in psock_tpacket.c was missing a return statement in its default switch case, leading to a compiler warning. This was caused by a `bug_on(1)` call, which is defined as an `assert()`, being compiled out because NDEBUG is defined during the build. Instead of adding a `return NULL;` which would silently hide the error and could lead to crashes later, this change restores the original author's intent. By adding `#undef NDEBUG` before including , we ensure the assertion is active and will cause the test to abort if this unreachable code is ever executed. Signed-off-by: Wake Liu Link: https://patch.msgid.link/20250809062013.2407822-1-wakel@google.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/psock_tpacket.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/psock_tpacket.c b/tools/testing/selftests/net/psock_tpacket.c index 0dd909e325d9..2938045c5cf9 100644 --- a/tools/testing/selftests/net/psock_tpacket.c +++ b/tools/testing/selftests/net/psock_tpacket.c @@ -22,6 +22,7 @@ * - TPACKET_V3: RX_RING */ +#undef NDEBUG #include #include #include -- cgit v1.2.3 From 942224e6baca0d1bdd1801e935f3544f2df37baf Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Aug 2025 21:53:05 +0000 Subject: selftest: af_unix: Silence -Wflex-array-member-not-at-end warning for scm_inq.c. scm_inq.c has no problem in functionality, but when compiled with -Wflex-array-member-not-at-end, it shows this warning: scm_inq.c:15:24: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end] 15 | struct cmsghdr cmsghdr; | ^~~~~~~ Let's silence it. Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250811215432.3379570-3-kuniyu@google.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/af_unix/scm_inq.c | 26 ++++++++++++-------------- 1 file changed, 12 insertions(+), 14 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/af_unix/scm_inq.c b/tools/testing/selftests/net/af_unix/scm_inq.c index 9d22561e7b8f..fc467714387e 100644 --- a/tools/testing/selftests/net/af_unix/scm_inq.c +++ b/tools/testing/selftests/net/af_unix/scm_inq.c @@ -11,11 +11,6 @@ #define NR_CHUNKS 100 #define MSG_LEN 256 -struct scm_inq { - struct cmsghdr cmsghdr; - int inq; -}; - FIXTURE(scm_inq) { int fd[2]; @@ -70,35 +65,38 @@ static void send_chunks(struct __test_metadata *_metadata, static void recv_chunks(struct __test_metadata *_metadata, FIXTURE_DATA(scm_inq) *self) { + char cmsg_buf[CMSG_SPACE(sizeof(int))]; struct msghdr msg = {}; struct iovec iov = {}; - struct scm_inq cmsg; + struct cmsghdr *cmsg; char buf[MSG_LEN]; int i, ret; int inq; msg.msg_iov = &iov; msg.msg_iovlen = 1; - msg.msg_control = &cmsg; - msg.msg_controllen = CMSG_SPACE(sizeof(cmsg.inq)); + msg.msg_control = cmsg_buf; + msg.msg_controllen = sizeof(cmsg_buf); iov.iov_base = buf; iov.iov_len = sizeof(buf); for (i = 0; i < NR_CHUNKS; i++) { memset(buf, 0, sizeof(buf)); - memset(&cmsg, 0, sizeof(cmsg)); + memset(cmsg_buf, 0, sizeof(cmsg_buf)); ret = recvmsg(self->fd[1], &msg, 0); ASSERT_EQ(MSG_LEN, ret); - ASSERT_NE(NULL, CMSG_FIRSTHDR(&msg)); - ASSERT_EQ(CMSG_LEN(sizeof(cmsg.inq)), cmsg.cmsghdr.cmsg_len); - ASSERT_EQ(SOL_SOCKET, cmsg.cmsghdr.cmsg_level); - ASSERT_EQ(SCM_INQ, cmsg.cmsghdr.cmsg_type); + + cmsg = CMSG_FIRSTHDR(&msg); + ASSERT_NE(NULL, cmsg); + ASSERT_EQ(CMSG_LEN(sizeof(int)), cmsg->cmsg_len); + ASSERT_EQ(SOL_SOCKET, cmsg->cmsg_level); + ASSERT_EQ(SCM_INQ, cmsg->cmsg_type); ret = ioctl(self->fd[1], SIOCINQ, &inq); ASSERT_EQ(0, ret); - ASSERT_EQ(cmsg.inq, inq); + ASSERT_EQ(*(int *)CMSG_DATA(cmsg), inq); } } -- cgit v1.2.3 From 9a58d8e68252af07138a2530a22f081ef0b070c1 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Aug 2025 21:53:06 +0000 Subject: selftest: af_unix: Silence -Wflex-array-member-not-at-end warning for scm_rights.c. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit scm_rights.c has no problem in functionality, but when compiled with -Wflex-array-member-not-at-end, it shows this warning: scm_rights.c: In function ‘__send_fd’: scm_rights.c:275:32: warning: structure containing a flexible array member is not at the end of another structure [-Wflex-array-member-not-at-end] 275 | struct cmsghdr cmsghdr; | ^~~~~~~ Let's silence it. Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250811215432.3379570-4-kuniyu@google.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/af_unix/scm_rights.c | 28 +++++++++++------------- 1 file changed, 13 insertions(+), 15 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/af_unix/scm_rights.c b/tools/testing/selftests/net/af_unix/scm_rights.c index 8b015f16c03d..914f99d153ce 100644 --- a/tools/testing/selftests/net/af_unix/scm_rights.c +++ b/tools/testing/selftests/net/af_unix/scm_rights.c @@ -271,20 +271,11 @@ void __send_fd(struct __test_metadata *_metadata, { #define MSG "x" #define MSGLEN 1 - struct { - struct cmsghdr cmsghdr; - int fd[2]; - } cmsg = { - .cmsghdr = { - .cmsg_len = CMSG_LEN(sizeof(cmsg.fd)), - .cmsg_level = SOL_SOCKET, - .cmsg_type = SCM_RIGHTS, - }, - .fd = { - self->fd[inflight * 2], - self->fd[inflight * 2], - }, + int fds[2] = { + self->fd[inflight * 2], + self->fd[inflight * 2], }; + char cmsg_buf[CMSG_SPACE(sizeof(fds))]; struct iovec iov = { .iov_base = MSG, .iov_len = MSGLEN, @@ -294,11 +285,18 @@ void __send_fd(struct __test_metadata *_metadata, .msg_namelen = 0, .msg_iov = &iov, .msg_iovlen = 1, - .msg_control = &cmsg, - .msg_controllen = CMSG_SPACE(sizeof(cmsg.fd)), + .msg_control = cmsg_buf, + .msg_controllen = sizeof(cmsg_buf), }; + struct cmsghdr *cmsg; int ret; + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + cmsg->cmsg_len = CMSG_LEN(sizeof(fds)); + memcpy(CMSG_DATA(cmsg), fds, sizeof(fds)); + ret = sendmsg(self->fd[receiver * 2 + 1], &msg, variant->flags); if (variant->disabled) { -- cgit v1.2.3 From fd9faac372cc5f95ca537be5e3daf044c87ffae1 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Aug 2025 21:53:07 +0000 Subject: selftest: af_unix: Silence -Wall warning for scm_pid.c. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit -Wall found 2 unused variables in scm_pid.c: scm_pidfd.c: In function ‘parse_cmsg’: scm_pidfd.c:140:13: warning: unused variable ‘data’ [-Wunused-variable] 140 | int data = 0; | ^~~~ scm_pidfd.c: In function ‘cmsg_check_dead’: scm_pidfd.c:246:15: warning: unused variable ‘client_pid’ [-Wunused-variable] 246 | pid_t client_pid; | ^~~~~~~~~~ Let's remove these variables. Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250811215432.3379570-5-kuniyu@google.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/af_unix/scm_pidfd.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/af_unix/scm_pidfd.c b/tools/testing/selftests/net/af_unix/scm_pidfd.c index 37e034874034..ef2921988e5f 100644 --- a/tools/testing/selftests/net/af_unix/scm_pidfd.c +++ b/tools/testing/selftests/net/af_unix/scm_pidfd.c @@ -137,7 +137,6 @@ struct cmsg_data { static int parse_cmsg(struct msghdr *msg, struct cmsg_data *res) { struct cmsghdr *cmsg; - int data = 0; if (msg->msg_flags & (MSG_TRUNC | MSG_CTRUNC)) { log_err("recvmsg: truncated"); @@ -243,7 +242,6 @@ static int cmsg_check_dead(int fd, int expected_pid) int data = 0; char control[CMSG_SPACE(sizeof(struct ucred)) + CMSG_SPACE(sizeof(int))] = { 0 }; - pid_t client_pid; struct pidfd_info info = { .mask = PIDFD_INFO_EXIT, }; -- cgit v1.2.3 From 1838731f1072ce859ae3350547de4792f365760c Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Mon, 11 Aug 2025 21:53:04 +0000 Subject: selftest: af_unix: Add -Wall and -Wflex-array-member-not-at-end to CFLAGS. -Wall and -Wflex-array-member-not-at-end caught some warnings that will be fixed in later patches. Signed-off-by: Kuniyuki Iwashima Link: https://patch.msgid.link/20250811215432.3379570-2-kuniyu@google.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/af_unix/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile index a4b61c6d0290..0a20c98bbcfd 100644 --- a/tools/testing/selftests/net/af_unix/Makefile +++ b/tools/testing/selftests/net/af_unix/Makefile @@ -1,4 +1,4 @@ -CFLAGS += $(KHDR_INCLUDES) +CFLAGS += $(KHDR_INCLUDES) -Wall -Wflex-array-member-not-at-end TEST_GEN_PROGS := diag_uid msg_oob scm_inq scm_pidfd scm_rights unix_connect include ../../lib.mk -- cgit v1.2.3 From 27e5b560a86e479bef247a1327182d155ed0bad6 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 11 Aug 2025 16:13:30 -0700 Subject: selftests: drv-net: add configs for zerocopy Rx Looks like neither IO_URING nor UDMABUF are enabled even tho iou-zcrx.py and devmem.py (respectively) need those. IO_URING gets enabled by default but UDMABUF is missing. Reviewed-by: Joe Damato Reviewed-by: Mina Almasry Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250811231334.561137-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/config | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/drivers/net/hw/config b/tools/testing/selftests/drivers/net/hw/config index 88ae719e6f8f..e8a06aa1471c 100644 --- a/tools/testing/selftests/drivers/net/hw/config +++ b/tools/testing/selftests/drivers/net/hw/config @@ -1,5 +1,7 @@ +CONFIG_IO_URING=y CONFIG_IPV6=y CONFIG_IPV6_GRE=y CONFIG_NET_IPGRE=y CONFIG_NET_IPGRE_DEMUX=y +CONFIG_UDMABUF=y CONFIG_VXLAN=y -- cgit v1.2.3 From a94e9cf79ceee04c3e2d042fdf417b8c235c6117 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 11 Aug 2025 16:13:31 -0700 Subject: selftests: drv-net: devmem: remove sudo from system() calls The general expectations for network HW selftests is that they will be run as root. sudo doesn't seem to work on NIPA VMs. While it's probably something solvable in the setup I think we should remove the sudos. devmem is the only networking test using sudo. Reviewed-by: Joe Damato Reviewed-by: Mina Almasry Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250811231334.561137-3-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/ncdevmem.c | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c index 72f828021f83..be937542b4c0 100644 --- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c +++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c @@ -287,10 +287,10 @@ static int reset_flow_steering(void) * the exit status. */ - run_command("sudo ethtool -K %s ntuple off >&2", ifname); - run_command("sudo ethtool -K %s ntuple on >&2", ifname); + run_command("ethtool -K %s ntuple off >&2", ifname); + run_command("ethtool -K %s ntuple on >&2", ifname); run_command( - "sudo ethtool -n %s | grep 'Filter:' | awk '{print $2}' | xargs -n1 ethtool -N %s delete >&2", + "ethtool -n %s | grep 'Filter:' | awk '{print $2}' | xargs -n1 ethtool -N %s delete >&2", ifname, ifname); return 0; } @@ -351,12 +351,12 @@ static int configure_headersplit(bool on) static int configure_rss(void) { - return run_command("sudo ethtool -X %s equal %d >&2", ifname, start_queue); + return run_command("ethtool -X %s equal %d >&2", ifname, start_queue); } static int configure_channels(unsigned int rx, unsigned int tx) { - return run_command("sudo ethtool -L %s rx %u tx %u", ifname, rx, tx); + return run_command("ethtool -L %s rx %u tx %u", ifname, rx, tx); } static int configure_flow_steering(struct sockaddr_in6 *server_sin) @@ -374,7 +374,7 @@ static int configure_flow_steering(struct sockaddr_in6 *server_sin) } /* Try configure 5-tuple */ - if (run_command("sudo ethtool -N %s flow-type %s %s %s dst-ip %s %s %s dst-port %s queue %d >&2", + if (run_command("ethtool -N %s flow-type %s %s %s dst-ip %s %s %s dst-port %s queue %d >&2", ifname, type, client_ip ? "src-ip" : "", @@ -384,7 +384,7 @@ static int configure_flow_steering(struct sockaddr_in6 *server_sin) client_ip ? port : "", port, start_queue)) /* If that fails, try configure 3-tuple */ - if (run_command("sudo ethtool -N %s flow-type %s dst-ip %s dst-port %s queue %d >&2", + if (run_command("ethtool -N %s flow-type %s dst-ip %s dst-port %s queue %d >&2", ifname, type, server_addr, -- cgit v1.2.3 From 424e96de30230aac2061f941961be645cf0070d5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 11 Aug 2025 16:13:32 -0700 Subject: selftests: drv-net: devmem: add / correct the IPv6 support We need to use bracketed IPv6 addresses for socat. Reviewed-by: Joe Damato Reviewed-by: Mina Almasry Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250811231334.561137-4-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/devmem.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/drivers/net/hw/devmem.py b/tools/testing/selftests/drivers/net/hw/devmem.py index baa2f24240ba..0a2533a3d6d6 100755 --- a/tools/testing/selftests/drivers/net/hw/devmem.py +++ b/tools/testing/selftests/drivers/net/hw/devmem.py @@ -24,7 +24,7 @@ def check_rx(cfg) -> None: require_devmem(cfg) port = rand_port() - socat = f"socat -u - TCP{cfg.addr_ipver}:{cfg.addr}:{port},bind={cfg.remote_addr}:{port}" + socat = f"socat -u - TCP{cfg.addr_ipver}:{cfg.baddr}:{port},bind={cfg.remote_baddr}:{port}" listen_cmd = f"{cfg.bin_local} -l -f {cfg.ifname} -s {cfg.addr} -p {port} -c {cfg.remote_addr} -v 7" with bkg(listen_cmd, exit_wait=True) as ncdevmem: -- cgit v1.2.3 From 6e9a12f85a7567bb9a41d5230468886bd6a27b20 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 11 Aug 2025 16:13:33 -0700 Subject: selftests: net: terminate bkg() commands on exception There is a number of: with bkg("socat ..LISTEN..", exit_wait=True) uses in the tests. If whatever is supposed to send the traffic fails we will get stuck in the bkg(). Try to kill the process in case of exception, to avoid the long wait. A specific example where this happens is the devmem Tx tests. Reviewed-by: Joe Damato Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250811231334.561137-5-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/lib/py/utils.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py index f395c90fb0f1..4ac9249c85ab 100644 --- a/tools/testing/selftests/net/lib/py/utils.py +++ b/tools/testing/selftests/net/lib/py/utils.py @@ -117,6 +117,7 @@ class bkg(cmd): shell=shell, fail=fail, ns=ns, host=host, ksft_wait=ksft_wait) self.terminate = not exit_wait and not ksft_wait + self._exit_wait = exit_wait self.check_fail = fail if shell and self.terminate: @@ -127,7 +128,9 @@ class bkg(cmd): return self def __exit__(self, ex_type, ex_value, ex_tb): - return self.process(terminate=self.terminate, fail=self.check_fail) + # Force termination on exception + terminate = self.terminate or (self._exit_wait and ex_type) + return self.process(terminate=terminate, fail=self.check_fail) global_defer_queue = [] -- cgit v1.2.3 From c378c497f3fe8dc8f08b487fce49c3d96e4cada8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 11 Aug 2025 16:13:34 -0700 Subject: selftests: drv-net: devmem: flip the direction of Tx tests The Device Under Test should always be the local system. While the Rx test gets this right the Tx test is sending from remote to local. So Tx of DMABUF memory happens on remote. These tests never run in NIPA since we don't have a compatible device so we haven't caught this. Reviewed-by: Joe Damato Reviewed-by: Mina Almasry Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/20250811231334.561137-6-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/devmem.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/drivers/net/hw/devmem.py b/tools/testing/selftests/drivers/net/hw/devmem.py index 0a2533a3d6d6..45c2d49d55b6 100755 --- a/tools/testing/selftests/drivers/net/hw/devmem.py +++ b/tools/testing/selftests/drivers/net/hw/devmem.py @@ -42,9 +42,9 @@ def check_tx(cfg) -> None: port = rand_port() listen_cmd = f"socat -U - TCP{cfg.addr_ipver}-LISTEN:{port}" - with bkg(listen_cmd) as socat: - wait_port_listen(port) - cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_remote} -f {cfg.ifname} -s {cfg.addr} -p {port}", host=cfg.remote, shell=True) + with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as socat: + wait_port_listen(port, host=cfg.remote) + cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_local} -f {cfg.ifname} -s {cfg.remote_addr} -p {port}", shell=True) ksft_eq(socat.stdout.strip(), "hello\nworld") @@ -56,9 +56,9 @@ def check_tx_chunks(cfg) -> None: port = rand_port() listen_cmd = f"socat -U - TCP{cfg.addr_ipver}-LISTEN:{port}" - with bkg(listen_cmd, exit_wait=True) as socat: - wait_port_listen(port) - cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_remote} -f {cfg.ifname} -s {cfg.addr} -p {port} -z 3", host=cfg.remote, shell=True) + with bkg(listen_cmd, host=cfg.remote, exit_wait=True) as socat: + wait_port_listen(port, host=cfg.remote) + cmd(f"echo -e \"hello\\nworld\"| {cfg.bin_local} -f {cfg.ifname} -s {cfg.remote_addr} -p {port} -z 3", shell=True) ksft_eq(socat.stdout.strip(), "hello\nworld") -- cgit v1.2.3 From 07866544e410e4c895a729971e4164861b41fad5 Mon Sep 17 00:00:00 2001 From: Amery Hung Date: Tue, 12 Aug 2025 10:50:39 -0700 Subject: selftests/bpf: Copy test_kmods when installing selftest Commit d6212d82bf26 ("selftests/bpf: Consolidate kernel modules into common directory") consolidated the Makefile of test_kmods. However, since it removed test_kmods from TEST_GEN_PROGS_EXTENDED, the kernel modules required by bpf selftests are now missing from kselftest_install when "make install". Fix it by adding test_kmod to TEST_GEN_FILES. Fixes: d6212d82bf26 ("selftests/bpf: Consolidate kernel modules into common directory") Signed-off-by: Amery Hung Signed-off-by: Andrii Nakryiko Link: https://lore.kernel.org/bpf/20250812175039.2323570-1-ameryhung@gmail.com --- tools/testing/selftests/bpf/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile index 4863106034df..77794efc020e 100644 --- a/tools/testing/selftests/bpf/Makefile +++ b/tools/testing/selftests/bpf/Makefile @@ -137,7 +137,7 @@ TEST_GEN_PROGS_EXTENDED = \ xdping \ xskxceiver -TEST_GEN_FILES += liburandom_read.so urandom_read sign-file uprobe_multi +TEST_GEN_FILES += $(TEST_KMODS) liburandom_read.so urandom_read sign-file uprobe_multi ifneq ($(V),1) submake_extras := feature_display=0 -- cgit v1.2.3 From 5e88777a382480d0b1f7eafb6d0fb680ec7a40bb Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 12 Aug 2025 10:18:10 +0300 Subject: selftests: forwarding: Add a test for FDB activity notification control Test various aspects of FDB activity notification control: * Transitioning of an FDB entry from inactive to active state. * Transitioning of an FDB entry from active to inactive state. * Avoiding the resetting of an FDB entry's last activity time (i.e., "updated" time) using the "norefresh" keyword. Reviewed-by: Petr Machata Signed-off-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20250812071810.312346-1-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/forwarding/Makefile | 4 +- .../net/forwarding/bridge_activity_notify.sh | 173 +++++++++++++++++++++ 2 files changed, 176 insertions(+), 1 deletion(-) create mode 100755 tools/testing/selftests/net/forwarding/bridge_activity_notify.sh (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index d7bb2e80e88c..0a0d4c2a85f7 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -1,6 +1,8 @@ # SPDX-License-Identifier: GPL-2.0+ OR MIT -TEST_PROGS = bridge_fdb_learning_limit.sh \ +TEST_PROGS = \ + bridge_activity_notify.sh \ + bridge_fdb_learning_limit.sh \ bridge_igmp.sh \ bridge_locked_port.sh \ bridge_mdb.sh \ diff --git a/tools/testing/selftests/net/forwarding/bridge_activity_notify.sh b/tools/testing/selftests/net/forwarding/bridge_activity_notify.sh new file mode 100755 index 000000000000..a20ef4bd310b --- /dev/null +++ b/tools/testing/selftests/net/forwarding/bridge_activity_notify.sh @@ -0,0 +1,173 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# +-----------------------+ +------------------------+ +# | H1 (vrf) | | H2 (vrf) | +# | 192.0.2.1/28 | | 192.0.2.2/28 | +# | + $h1 | | + $h2 | +# +----|------------------+ +----|-------------------+ +# | | +# +----|--------------------------------------------------|-------------------+ +# | SW | | | +# | +--|--------------------------------------------------|-----------------+ | +# | | + $swp1 BR1 (802.1d) + $swp2 | | +# | | | | +# | +-----------------------------------------------------------------------+ | +# +---------------------------------------------------------------------------+ + +ALL_TESTS=" + new_inactive_test + existing_active_test + norefresh_test +" + +NUM_NETIFS=4 +source lib.sh + +h1_create() +{ + simple_if_init "$h1" 192.0.2.1/28 + defer simple_if_fini "$h1" 192.0.2.1/28 +} + +h2_create() +{ + simple_if_init "$h2" 192.0.2.2/28 + defer simple_if_fini "$h2" 192.0.2.2/28 +} + +switch_create() +{ + ip_link_add br1 type bridge vlan_filtering 0 mcast_snooping 0 \ + ageing_time "$LOW_AGEING_TIME" + ip_link_set_up br1 + + ip_link_set_master "$swp1" br1 + ip_link_set_up "$swp1" + + ip_link_set_master "$swp2" br1 + ip_link_set_up "$swp2" +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + vrf_prepare + defer vrf_cleanup + + h1_create + h2_create + switch_create +} + +fdb_active_wait() +{ + local mac=$1; shift + + bridge -d fdb get "$mac" br br1 | grep -q -v "inactive" +} + +fdb_inactive_wait() +{ + local mac=$1; shift + + bridge -d fdb get "$mac" br br1 | grep -q "inactive" +} + +new_inactive_test() +{ + local mac="00:11:22:33:44:55" + + # Add a new FDB entry as static and inactive and check that it + # becomes active upon traffic. + RET=0 + + bridge fdb add "$mac" dev "$swp1" master static activity_notify inactive + bridge -d fdb get "$mac" br br1 | grep -q "inactive" + check_err $? "FDB entry not present as \"inactive\" when should" + + $MZ "$h1" -c 1 -p 64 -a "$mac" -b bcast -t ip -q + + busywait "$BUSYWAIT_TIMEOUT" fdb_active_wait "$mac" + check_err $? "FDB entry present as \"inactive\" when should not" + + log_test "Transition from inactive to active" + + bridge fdb del "$mac" dev "$swp1" master +} + +existing_active_test() +{ + local mac="00:11:22:33:44:55" + local ageing_time + + # Enable activity notifications on an existing dynamic FDB entry and + # check that it becomes inactive after the ageing time passed. + RET=0 + + bridge fdb add "$mac" dev "$swp1" master dynamic + bridge fdb replace "$mac" dev "$swp1" master static activity_notify norefresh + + bridge -d fdb get "$mac" br br1 | grep -q "activity_notify" + check_err $? "FDB entry not present as \"activity_notify\" when should" + + bridge -d fdb get "$mac" br br1 | grep -q "inactive" + check_fail $? "FDB entry present as \"inactive\" when should not" + + ageing_time=$(bridge_ageing_time_get br1) + slowwait $((ageing_time * 2)) fdb_inactive_wait "$mac" + check_err $? "FDB entry not present as \"inactive\" when should" + + log_test "Transition from active to inactive" + + bridge fdb del "$mac" dev "$swp1" master +} + +norefresh_test() +{ + local mac="00:11:22:33:44:55" + local updated_time + + # Check that the "updated" time is reset when replacing an FDB entry + # without the "norefresh" keyword and that it is not reset when + # replacing with the "norefresh" keyword. + RET=0 + + bridge fdb add "$mac" dev "$swp1" master static + sleep 1 + + bridge fdb replace "$mac" dev "$swp1" master static activity_notify + updated_time=$(bridge -d -s -j fdb get "$mac" br br1 | jq '.[]["updated"]') + if [[ $updated_time -ne 0 ]]; then + check_err 1 "\"updated\" time was not reset when should" + fi + + sleep 1 + bridge fdb replace "$mac" dev "$swp1" master static norefresh + updated_time=$(bridge -d -s -j fdb get "$mac" br br1 | jq '.[]["updated"]') + if [[ $updated_time -eq 0 ]]; then + check_err 1 "\"updated\" time was reset when should not" + fi + + log_test "Resetting of \"updated\" time" + + bridge fdb del "$mac" dev "$swp1" master +} + +if ! bridge fdb help 2>&1 | grep -q "activity_notify"; then + echo "SKIP: iproute2 too old, missing bridge FDB activity notification control" + exit "$ksft_skip" +fi + +trap cleanup EXIT + +setup_prepare +setup_wait +tests_run + +exit "$EXIT_STATUS" -- cgit v1.2.3 From acfea9361073134e828fddbd5f8201d428d7a7b1 Mon Sep 17 00:00:00 2001 From: Andre Carvalho Date: Tue, 12 Aug 2025 20:38:23 +0100 Subject: selftests: netconsole: Validate interface selection by MAC address Extend the existing netconsole cmdline selftest to also validate that interface selection can be performed via MAC address. The test now validates that netconsole works with both interface name and MAC address, improving test coverage. Suggested-by: Breno Leitao Reviewed-by: Breno Leitao Signed-off-by: Andre Carvalho Link: https://patch.msgid.link/20250812-netcons-cmdline-selftest-v2-1-8099fb7afa9e@gmail.com Signed-off-by: Jakub Kicinski --- .../selftests/drivers/net/lib/sh/lib_netcons.sh | 10 +++- .../selftests/drivers/net/netcons_cmdline.sh | 55 +++++++++++++--------- 2 files changed, 43 insertions(+), 22 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh index b6071e80ebbb..8e1085e89647 100644 --- a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh +++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh @@ -148,12 +148,20 @@ function create_dynamic_target() { # Generate the command line argument for netconsole following: # netconsole=[+][src-port]@[src-ip]/[],[tgt-port]@/[tgt-macaddr] function create_cmdline_str() { + local BINDMODE=${1:-"ifname"} + if [ "${BINDMODE}" == "ifname" ] + then + SRCDEV=${SRCIF} + else + SRCDEV=$(mac_get "${SRCIF}") + fi + DSTMAC=$(ip netns exec "${NAMESPACE}" \ ip link show "${DSTIF}" | awk '/ether/ {print $2}') SRCPORT="1514" TGTPORT="6666" - echo "netconsole=\"+${SRCPORT}@${SRCIP}/${SRCIF},${TGTPORT}@${DSTIP}/${DSTMAC}\"" + echo "netconsole=\"+${SRCPORT}@${SRCIP}/${SRCDEV},${TGTPORT}@${DSTIP}/${DSTMAC}\"" } # Do not append the release to the header of the message diff --git a/tools/testing/selftests/drivers/net/netcons_cmdline.sh b/tools/testing/selftests/drivers/net/netcons_cmdline.sh index ad2fb8b1c463..d1d23dc67f99 100755 --- a/tools/testing/selftests/drivers/net/netcons_cmdline.sh +++ b/tools/testing/selftests/drivers/net/netcons_cmdline.sh @@ -19,9 +19,6 @@ check_netconsole_module modprobe netdevsim 2> /dev/null || true rmmod netconsole 2> /dev/null || true -# The content of kmsg will be save to the following file -OUTPUT_FILE="/tmp/${TARGET}" - # Check for basic system dependency and exit if not found # check_for_dependencies # Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5) @@ -30,23 +27,39 @@ echo "6 5" > /proc/sys/kernel/printk trap do_cleanup EXIT # Create one namespace and two interfaces set_network -# Create the command line for netconsole, with the configuration from the -# function above -CMDLINE="$(create_cmdline_str)" - -# Load the module, with the cmdline set -modprobe netconsole "${CMDLINE}" - -# Listed for netconsole port inside the namespace and destination interface -listen_port_and_save_to "${OUTPUT_FILE}" & -# Wait for socat to start and listen to the port. -wait_local_port_listen "${NAMESPACE}" "${PORT}" udp -# Send the message -echo "${MSG}: ${TARGET}" > /dev/kmsg -# Wait until socat saves the file to disk -busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" -# Make sure the message was received in the dst part -# and exit -validate_msg "${OUTPUT_FILE}" + +# Run the test twice, with different cmdline parameters +for BINDMODE in "ifname" "mac" +do + echo "Running with bind mode: ${BINDMODE}" >&2 + # Create the command line for netconsole, with the configuration from + # the function above + CMDLINE=$(create_cmdline_str "${BINDMODE}") + + # The content of kmsg will be save to the following file + OUTPUT_FILE="/tmp/${TARGET}-${BINDMODE}" + + # Load the module, with the cmdline set + modprobe netconsole "${CMDLINE}" + + # Listed for netconsole port inside the namespace and destination + # interface + listen_port_and_save_to "${OUTPUT_FILE}" & + # Wait for socat to start and listen to the port. + wait_local_port_listen "${NAMESPACE}" "${PORT}" udp + # Send the message + echo "${MSG}: ${TARGET}" > /dev/kmsg + # Wait until socat saves the file to disk + busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" + # Make sure the message was received in the dst part + # and exit + validate_msg "${OUTPUT_FILE}" + + # kill socat in case it is still running + pkill_socat + # Unload the module + rmmod netconsole + echo "${BINDMODE} : Test passed" >&2 +done exit "${ksft_pass}" -- cgit v1.2.3 From 26dbe030ff08930bb243af3c66dd3b7e7bb8406d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 11 Aug 2025 16:42:12 -0700 Subject: selftests: drv-net: add test for RSS on flow label Add a simple test for checking that RSS on flow label works, and that its rejected for IPv4 flows. # ./tools/testing/selftests/drivers/net/hw/rss_flow_label.py TAP version 13 1..2 ok 1 rss_flow_label.test_rss_flow_label ok 2 rss_flow_label.test_rss_flow_label_6only # Totals: pass:2 fail:0 xfail:0 xpass:0 skip:0 error:0 Reviewed-by: Willem de Bruijn Signed-off-by: Jakub Kicinski Reviewed-by: Joe Damato Link: https://patch.msgid.link/20250811234212.580748-5-kuba@kernel.org Signed-off-by: Paolo Abeni --- tools/testing/selftests/drivers/net/hw/Makefile | 1 + .../selftests/drivers/net/hw/rss_flow_label.py | 167 +++++++++++++++++++++ 2 files changed, 168 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/hw/rss_flow_label.py (limited to 'tools/testing') diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile index fdc97355588c..5159fd34cb33 100644 --- a/tools/testing/selftests/drivers/net/hw/Makefile +++ b/tools/testing/selftests/drivers/net/hw/Makefile @@ -18,6 +18,7 @@ TEST_PROGS = \ pp_alloc_fail.py \ rss_api.py \ rss_ctx.py \ + rss_flow_label.py \ rss_input_xfrm.py \ tso.py \ xsk_reconfig.py \ diff --git a/tools/testing/selftests/drivers/net/hw/rss_flow_label.py b/tools/testing/selftests/drivers/net/hw/rss_flow_label.py new file mode 100755 index 000000000000..6fa95fe27c47 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/rss_flow_label.py @@ -0,0 +1,167 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +""" +Tests for RSS hashing on IPv6 Flow Label. +""" + +import glob +import os +import socket +from lib.py import CmdExitFailure +from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_ge, ksft_in, \ + ksft_not_in, ksft_raises, KsftSkipEx +from lib.py import bkg, cmd, defer, fd_read_timeout, rand_port +from lib.py import NetDrvEpEnv + + +def _check_system(cfg): + if not hasattr(socket, "SO_INCOMING_CPU"): + raise KsftSkipEx("socket.SO_INCOMING_CPU was added in Python 3.11") + + qcnt = len(glob.glob(f"/sys/class/net/{cfg.ifname}/queues/rx-*")) + if qcnt < 2: + raise KsftSkipEx(f"Local has only {qcnt} queues") + + for f in [f"/sys/class/net/{cfg.ifname}/queues/rx-0/rps_flow_cnt", + f"/sys/class/net/{cfg.ifname}/queues/rx-0/rps_cpus"]: + try: + with open(f, 'r') as fp: + setting = fp.read().strip() + # CPU mask will be zeros and commas + if setting.replace("0", "").replace(",", ""): + raise KsftSkipEx(f"RPS/RFS is configured: {f}: {setting}") + except FileNotFoundError: + pass + + # 1 is the default, if someone changed it we probably shouldn"t mess with it + af = cmd("cat /proc/sys/net/ipv6/auto_flowlabels", host=cfg.remote).stdout + if af.strip() != "1": + raise KsftSkipEx("Remote does not have auto_flowlabels enabled") + + +def _ethtool_get_cfg(cfg, fl_type): + descr = cmd(f"ethtool -n {cfg.ifname} rx-flow-hash {fl_type}").stdout + + converter = { + "IP SA": "s", + "IP DA": "d", + "L3 proto": "t", + "L4 bytes 0 & 1 [TCP/UDP src port]": "f", + "L4 bytes 2 & 3 [TCP/UDP dst port]": "n", + "IPv6 Flow Label": "l", + } + + ret = "" + for line in descr.split("\n")[1:-2]: + # if this raises we probably need to add more keys to converter above + ret += converter[line] + return ret + + +def _traffic(cfg, one_sock, one_cpu): + local_port = rand_port(socket.SOCK_DGRAM) + remote_port = rand_port(socket.SOCK_DGRAM) + + sock = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM) + sock.bind(("", local_port)) + sock.connect((cfg.remote_addr_v["6"], 0)) + if one_sock: + send = f"exec 5<>/dev/udp/{cfg.addr_v['6']}/{local_port}; " \ + "for i in `seq 20`; do echo a >&5; sleep 0.02; done; exec 5>&-" + else: + send = "for i in `seq 20`; do echo a | socat -t0.02 - UDP6:" \ + f"[{cfg.addr_v['6']}]:{local_port},sourceport={remote_port}; done" + + cpus = set() + with bkg(send, shell=True, host=cfg.remote, exit_wait=True): + for _ in range(20): + fd_read_timeout(sock.fileno(), 1) + cpu = sock.getsockopt(socket.SOL_SOCKET, socket.SO_INCOMING_CPU) + cpus.add(cpu) + + if one_cpu: + ksft_eq(len(cpus), 1, + f"{one_sock=} - expected one CPU, got traffic on: {cpus=}") + else: + ksft_ge(len(cpus), 2, + f"{one_sock=} - expected many CPUs, got traffic on: {cpus=}") + + +def test_rss_flow_label(cfg): + """ + Test hashing on IPv6 flow label. Send traffic over a single socket + and over multiple sockets. Depend on the remote having auto-label + enabled so that it randomizes the label per socket. + """ + + cfg.require_ipver("6") + cfg.require_cmd("socat", remote=True) + _check_system(cfg) + + # Enable flow label hashing for UDP6 + initial = _ethtool_get_cfg(cfg, "udp6") + no_lbl = initial.replace("l", "") + if "l" not in initial: + try: + cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 l{no_lbl}") + except CmdExitFailure as exc: + raise KsftSkipEx("Device doesn't support Flow Label for UDP6") from exc + + defer(cmd, f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {initial}") + + _traffic(cfg, one_sock=True, one_cpu=True) + _traffic(cfg, one_sock=False, one_cpu=False) + + # Disable it, we should see no hashing (reset was already defer()ed) + cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {no_lbl}") + + _traffic(cfg, one_sock=False, one_cpu=True) + + +def _check_v4_flow_types(cfg): + for fl_type in ["tcp4", "udp4", "ah4", "esp4", "sctp4"]: + try: + cur = cmd(f"ethtool -n {cfg.ifname} rx-flow-hash {fl_type}").stdout + ksft_not_in("Flow Label", cur, + comment=f"{fl_type=} has Flow Label:" + cur) + except CmdExitFailure: + # Probably does not support this flow type + pass + + +def test_rss_flow_label_6only(cfg): + """ + Test interactions with IPv4 flow types. It should not be possible to set + IPv6 Flow Label hashing for an IPv4 flow type. The Flow Label should also + not appear in the IPv4 "current config". + """ + + with ksft_raises(CmdExitFailure) as cm: + cmd(f"ethtool -N {cfg.ifname} rx-flow-hash tcp4 sdfnl") + ksft_in("Invalid argument", cm.exception.cmd.stderr) + + _check_v4_flow_types(cfg) + + # Try to enable Flow Labels and check again, in case it leaks thru + initial = _ethtool_get_cfg(cfg, "udp6") + changed = initial.replace("l", "") if "l" in initial else initial + "l" + + cmd(f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {changed}") + restore = defer(cmd, f"ethtool -N {cfg.ifname} rx-flow-hash udp6 {initial}") + + _check_v4_flow_types(cfg) + restore.exec() + _check_v4_flow_types(cfg) + + +def main() -> None: + with NetDrvEpEnv(__file__, nsim_test=False) as cfg: + ksft_run([test_rss_flow_label, + test_rss_flow_label_6only], + args=(cfg, )) + ksft_exit() + + +if __name__ == "__main__": + main() -- cgit v1.2.3 From e95f6ccdbc71663b9f4b24a166e85bd8e6720318 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 10 Jul 2025 16:41:53 -0700 Subject: rcutorture: Fix jitter.sh spin time An embarrassing syntax error in jitter.sh makes for fixed spin time. This commit therefore makes it be variable, as intended, albeit with very coarse-grained adjustment. Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/bin/jitter.sh | 27 +++++++++++++++++++++--- 1 file changed, 24 insertions(+), 3 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/rcutorture/bin/jitter.sh b/tools/testing/selftests/rcutorture/bin/jitter.sh index fd1ffaa5a135..3c1e5d3f8805 100755 --- a/tools/testing/selftests/rcutorture/bin/jitter.sh +++ b/tools/testing/selftests/rcutorture/bin/jitter.sh @@ -39,6 +39,22 @@ do fi done +# Uses global variables startsecs, startns, endsecs, endns, and limit. +# Exit code is success for time not yet elapsed and failure otherwise. +function timecheck { + local done=`awk -v limit=$limit \ + -v startsecs=$startsecs \ + -v startns=$startns \ + -v endsecs=$endsecs \ + -v endns=$endns < /dev/null ' + BEGIN { + delta = (endsecs - startsecs) * 1000 * 1000; + delta += int((endns - startns) / 1000); + print delta >= limit; + }'` + return $done +} + while : do # Check for done. @@ -85,15 +101,20 @@ do n=$(($n+1)) sleep .$sleeptime - # Spin a random duration + # Spin a random duration, but with rather coarse granularity. limit=`awk -v me=$me -v n=$n -v spinmax=$spinmax 'BEGIN { srand(n + me + systime()); printf("%06d", int(rand() * spinmax)); }' < /dev/null` n=$(($n+1)) - for i in {1..$limit} + startsecs=`date +%s` + startns=`date +%N` + endsecs=$startns + endns=$endns + while timecheck do - echo > /dev/null + endsecs=`date +%s` + endns=`date +%N` done done -- cgit v1.2.3 From bd89367e05e8cf55eacf8627b0b3ed55b938d6fd Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 16 Jul 2025 09:10:35 -0700 Subject: torture: Add --do-normal parameter to torture.sh help text The --do-normal parameter was missing from the torture.sh script's help text, so this commit adds it. Hopefully better late than never! Signed-off-by: Paul E. McKenney --- tools/testing/selftests/rcutorture/bin/torture.sh | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/rcutorture/bin/torture.sh b/tools/testing/selftests/rcutorture/bin/torture.sh index 611bc03a8dc7..a33ba109ef0b 100755 --- a/tools/testing/selftests/rcutorture/bin/torture.sh +++ b/tools/testing/selftests/rcutorture/bin/torture.sh @@ -94,6 +94,7 @@ usage () { echo " --do-kvfree / --do-no-kvfree / --no-kvfree" echo " --do-locktorture / --do-no-locktorture / --no-locktorture" echo " --do-none" + echo " --do-normal / --do-no-normal / --no-normal" echo " --do-rcuscale / --do-no-rcuscale / --no-rcuscale" echo " --do-rcutasksflavors / --do-no-rcutasksflavors / --no-rcutasksflavors" echo " --do-rcutorture / --do-no-rcutorture / --no-rcutorture" -- cgit v1.2.3 From f09fc24dd9a5ec989dfdde7090624924ede6ddc7 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Tue, 12 Aug 2025 07:20:54 -0700 Subject: selftests: drv-net: wait for carrier On fast machines the tests run in quick succession so even when tests clean up after themselves the carrier may need some time to come back. Specifically in NIPA when ping.py runs right after netpoll_basic.py the first ping command fails. Since the context manager callbacks are now common NetDrvEpEnv gets an ip link up call as well. Reviewed-by: Joe Damato Link: https://patch.msgid.link/20250812142054.750282-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- .../selftests/drivers/net/lib/py/__init__.py | 2 +- tools/testing/selftests/drivers/net/lib/py/env.py | 41 +++++++++++----------- tools/testing/selftests/net/lib/py/utils.py | 18 ++++++++++ 3 files changed, 39 insertions(+), 22 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/drivers/net/lib/py/__init__.py b/tools/testing/selftests/drivers/net/lib/py/__init__.py index 8711c67ad658..a07b56a75c8a 100644 --- a/tools/testing/selftests/drivers/net/lib/py/__init__.py +++ b/tools/testing/selftests/drivers/net/lib/py/__init__.py @@ -15,7 +15,7 @@ try: NlError, RtnlFamily, DevlinkFamily from net.lib.py import CmdExitFailure from net.lib.py import bkg, cmd, bpftool, bpftrace, defer, ethtool, \ - fd_read_timeout, ip, rand_port, tool, wait_port_listen + fd_read_timeout, ip, rand_port, tool, wait_port_listen, wait_file from net.lib.py import fd_read_timeout from net.lib.py import KsftSkipEx, KsftFailEx, KsftXfailEx from net.lib.py import ksft_disruptive, ksft_exit, ksft_pr, ksft_run, \ diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py index 1b8bd648048f..c1f3b608c6d8 100644 --- a/tools/testing/selftests/drivers/net/lib/py/env.py +++ b/tools/testing/selftests/drivers/net/lib/py/env.py @@ -4,7 +4,7 @@ import os import time from pathlib import Path from lib.py import KsftSkipEx, KsftXfailEx -from lib.py import ksft_setup +from lib.py import ksft_setup, wait_file from lib.py import cmd, ethtool, ip, CmdExitFailure from lib.py import NetNS, NetdevSimDev from .remote import Remote @@ -25,6 +25,9 @@ class NetDrvEnvBase: self.env = self._load_env_file() + # Following attrs must be set be inheriting classes + self.dev = None + def _load_env_file(self): env = os.environ.copy() @@ -48,6 +51,22 @@ class NetDrvEnvBase: env[pair[0]] = pair[1] return ksft_setup(env) + def __del__(self): + pass + + def __enter__(self): + ip(f"link set dev {self.dev['ifname']} up") + wait_file(f"/sys/class/net/{self.dev['ifname']}/carrier", + lambda x: x.strip() == "1") + + return self + + def __exit__(self, ex_type, ex_value, ex_tb): + """ + __exit__ gets called at the end of a "with" block. + """ + self.__del__() + class NetDrvEnv(NetDrvEnvBase): """ @@ -72,17 +91,6 @@ class NetDrvEnv(NetDrvEnvBase): self.ifname = self.dev['ifname'] self.ifindex = self.dev['ifindex'] - def __enter__(self): - ip(f"link set dev {self.dev['ifname']} up") - - return self - - def __exit__(self, ex_type, ex_value, ex_tb): - """ - __exit__ gets called at the end of a "with" block. - """ - self.__del__() - def __del__(self): if self._ns: self._ns.remove() @@ -219,15 +227,6 @@ class NetDrvEpEnv(NetDrvEnvBase): raise Exception("Can't resolve remote interface name, multiple interfaces match") return v6[0]["ifname"] if v6 else v4[0]["ifname"] - def __enter__(self): - return self - - def __exit__(self, ex_type, ex_value, ex_tb): - """ - __exit__ gets called at the end of a "with" block. - """ - self.__del__() - def __del__(self): if self._ns: self._ns.remove() diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py index 4ac9249c85ab..b188cac49738 100644 --- a/tools/testing/selftests/net/lib/py/utils.py +++ b/tools/testing/selftests/net/lib/py/utils.py @@ -252,3 +252,21 @@ def wait_port_listen(port, proto="tcp", ns=None, host=None, sleep=0.005, deadlin if time.monotonic() > end: raise Exception("Waiting for port listen timed out") time.sleep(sleep) + + +def wait_file(fname, test_fn, sleep=0.005, deadline=5, encoding='utf-8'): + """ + Wait for file contents on the local system to satisfy a condition. + test_fn() should take one argument (file contents) and return whether + condition is met. + """ + end = time.monotonic() + deadline + + with open(fname, "r", encoding=encoding) as fp: + while True: + if test_fn(fp.read()): + break + fp.seek(0) + if time.monotonic() > end: + raise TimeoutError("Wait for file contents failed", fname) + time.sleep(sleep) -- cgit v1.2.3 From 51ca1e67f416000bfd9a2a49c868538e744a317e Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Tue, 12 Aug 2025 11:02:13 +0300 Subject: selftests: net: Test bridge backup port when port is administratively down Test that packets are redirected to the backup port when the primary port is administratively down. With the previous patch: # ./test_bridge_backup_port.sh [...] TEST: swp1 administratively down [ OK ] TEST: No forwarding out of swp1 [ OK ] TEST: Forwarding out of vx0 [ OK ] TEST: swp1 administratively up [ OK ] TEST: Forwarding out of swp1 [ OK ] TEST: No forwarding out of vx0 [ OK ] [...] Tests passed: 89 Tests failed: 0 Without the previous patch: # ./test_bridge_backup_port.sh [...] TEST: swp1 administratively down [ OK ] TEST: No forwarding out of swp1 [ OK ] TEST: Forwarding out of vx0 [FAIL] TEST: swp1 administratively up [ OK ] TEST: Forwarding out of swp1 [ OK ] [...] Tests passed: 85 Tests failed: 4 Signed-off-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Link: https://patch.msgid.link/20250812080213.325298-3-idosch@nvidia.com Signed-off-by: Jakub Kicinski --- .../selftests/net/test_bridge_backup_port.sh | 31 +++++++++++++++++++--- 1 file changed, 27 insertions(+), 4 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/test_bridge_backup_port.sh b/tools/testing/selftests/net/test_bridge_backup_port.sh index 1b3f89e2b86e..2a7224fe74f2 100755 --- a/tools/testing/selftests/net/test_bridge_backup_port.sh +++ b/tools/testing/selftests/net/test_bridge_backup_port.sh @@ -315,6 +315,29 @@ backup_port() tc_check_packets $sw1 "dev vx0 egress" 101 1 log_test $? 0 "No forwarding out of vx0" + # Check that packets are forwarded out of vx0 when swp1 is + # administratively down and out of swp1 when it is administratively up + # again. + run_cmd "ip -n $sw1 link set dev swp1 down" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled + log_test $? 0 "swp1 administratively down" + + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 3 + log_test $? 0 "No forwarding out of swp1" + tc_check_packets $sw1 "dev vx0 egress" 101 2 + log_test $? 0 "Forwarding out of vx0" + + run_cmd "ip -n $sw1 link set dev swp1 up" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 forwarding + log_test $? 0 "swp1 administratively up" + + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 4 + log_test $? 0 "Forwarding out of swp1" + tc_check_packets $sw1 "dev vx0 egress" 101 2 + log_test $? 0 "No forwarding out of vx0" + # Remove vx0 as the backup port of swp1 and check that packets are no # longer forwarded out of vx0 when swp1 does not have a carrier. run_cmd "bridge -n $sw1 link set dev swp1 nobackup_port" @@ -322,9 +345,9 @@ backup_port() log_test $? 1 "vx0 not configured as backup port of swp1" run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" - tc_check_packets $sw1 "dev swp1 egress" 101 4 + tc_check_packets $sw1 "dev swp1 egress" 101 5 log_test $? 0 "Forwarding out of swp1" - tc_check_packets $sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 2 log_test $? 0 "No forwarding out of vx0" run_cmd "ip -n $sw1 link set dev swp1 carrier off" @@ -332,9 +355,9 @@ backup_port() log_test $? 0 "swp1 carrier off" run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" - tc_check_packets $sw1 "dev swp1 egress" 101 4 + tc_check_packets $sw1 "dev swp1 egress" 101 5 log_test $? 0 "No forwarding out of swp1" - tc_check_packets $sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 2 log_test $? 0 "No forwarding out of vx0" } -- cgit v1.2.3 From 8c06cbdcbaea34d7b96d76df4d6669275c1d291a Mon Sep 17 00:00:00 2001 From: William Liu Date: Tue, 12 Aug 2025 23:58:26 +0000 Subject: selftests/tc-testing: Check backlog stats in gso_skb case Add tests to ensure proper backlog accounting in hhf, codel, pie, fq, fq_pie, and fq_codel qdiscs. We check for the bug pattern originally found in fq, fq_pie, and fq_codel, which was an underflow in the tbf parent backlog stats upon child qdisc removal. Signed-off-by: William Liu Reviewed-by: Savino Dicanosa Link: https://patch.msgid.link/20250812235808.45281-1-will@willsroot.io Signed-off-by: Jakub Kicinski --- .../tc-testing/tc-tests/infra/qdiscs.json | 198 +++++++++++++++++++++ 1 file changed, 198 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index 23a61e5b99d0..998e5a2f4579 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -185,6 +185,204 @@ "$IP addr del 10.10.10.10/24 dev $DUMMY || true" ] }, + { + "id": "34c0", + "name": "Test TBF with HHF Backlog Accounting in gso_skb case against underflow", + "category": [ + "qdisc", + "tbf", + "hhf" + ], + "plugins": { + "requires": [ + "nsPlugin" + ] + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: tbf rate 8bit burst 100b latency 100ms", + "$TC qdisc replace dev $DUMMY handle 2: parent 1:1 hhf limit 1000", + [ + "ping -I $DUMMY -c2 10.10.11.11", + 1 + ], + "$TC qdisc change dev $DUMMY handle 2: parent 1:1 hhf limit 1" + ], + "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 2: parent 1:1", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "backlog 0b 0p", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "fd68", + "name": "Test TBF with CODEL Backlog Accounting in gso_skb case against underflow", + "category": [ + "qdisc", + "tbf", + "codel" + ], + "plugins": { + "requires": [ + "nsPlugin" + ] + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: tbf rate 8bit burst 100b latency 100ms", + "$TC qdisc replace dev $DUMMY handle 2: parent 1:1 codel limit 1000", + [ + "ping -I $DUMMY -c2 10.10.11.11", + 1 + ], + "$TC qdisc change dev $DUMMY handle 2: parent 1:1 codel limit 1" + ], + "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 2: parent 1:1", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "backlog 0b 0p", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "514e", + "name": "Test TBF with PIE Backlog Accounting in gso_skb case against underflow", + "category": [ + "qdisc", + "tbf", + "pie" + ], + "plugins": { + "requires": [ + "nsPlugin" + ] + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: tbf rate 8bit burst 100b latency 100ms", + "$TC qdisc replace dev $DUMMY handle 2: parent 1:1 pie limit 1000", + [ + "ping -I $DUMMY -c2 10.10.11.11", + 1 + ], + "$TC qdisc change dev $DUMMY handle 2: parent 1:1 pie limit 1" + ], + "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 2: parent 1:1", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "backlog 0b 0p", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "6c97", + "name": "Test TBF with FQ Backlog Accounting in gso_skb case against underflow", + "category": [ + "qdisc", + "tbf", + "fq" + ], + "plugins": { + "requires": [ + "nsPlugin" + ] + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: tbf rate 8bit burst 100b latency 100ms", + "$TC qdisc replace dev $DUMMY handle 2: parent 1:1 fq limit 1000", + [ + "ping -I $DUMMY -c2 10.10.11.11", + 1 + ], + "$TC qdisc change dev $DUMMY handle 2: parent 1:1 fq limit 1" + ], + "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 2: parent 1:1", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "backlog 0b 0p", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "5d0b", + "name": "Test TBF with FQ_CODEL Backlog Accounting in gso_skb case against underflow", + "category": [ + "qdisc", + "tbf", + "fq_codel" + ], + "plugins": { + "requires": [ + "nsPlugin" + ] + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: tbf rate 8bit burst 100b latency 100ms", + "$TC qdisc replace dev $DUMMY handle 2: parent 1:1 fq_codel limit 1000", + [ + "ping -I $DUMMY -c2 10.10.11.11", + 1 + ], + "$TC qdisc change dev $DUMMY handle 2: parent 1:1 fq_codel limit 1" + ], + "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 2: parent 1:1", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "backlog 0b 0p", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, + { + "id": "21c3", + "name": "Test TBF with FQ_PIE Backlog Accounting in gso_skb case against underflow", + "category": [ + "qdisc", + "tbf", + "fq_pie" + ], + "plugins": { + "requires": [ + "nsPlugin" + ] + }, + "setup": [ + "$IP link set dev $DUMMY up || true", + "$IP addr add 10.10.11.10/24 dev $DUMMY || true", + "$TC qdisc add dev $DUMMY root handle 1: tbf rate 8bit burst 100b latency 100ms", + "$TC qdisc replace dev $DUMMY handle 2: parent 1:1 fq_pie limit 1000", + [ + "ping -I $DUMMY -c2 10.10.11.11", + 1 + ], + "$TC qdisc change dev $DUMMY handle 2: parent 1:1 fq_pie limit 1" + ], + "cmdUnderTest": "$TC qdisc del dev $DUMMY handle 2: parent 1:1", + "expExitCode": "0", + "verifyCmd": "$TC -s qdisc show dev $DUMMY", + "matchPattern": "backlog 0b 0p", + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY handle 1: root" + ] + }, { "id": "a4bb", "name": "Test FQ_CODEL with HTB parent - force packet drop with empty queue", -- cgit v1.2.3 From dc0fe956144d9ba906bc6b2fe35b3cd54e1bfdb0 Mon Sep 17 00:00:00 2001 From: Pu Lehui Date: Sat, 19 Jul 2025 09:17:30 +0000 Subject: selftests/bpf: Enable arena atomics tests for RV64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enable arena atomics tests for RV64. Signed-off-by: Pu Lehui Signed-off-by: Daniel Borkmann Tested-by: Björn Töpel Reviewed-by: Björn Töpel Acked-by: Björn Töpel Link: https://lore.kernel.org/bpf/20250719091730.2660197-11-pulehui@huaweicloud.com --- tools/testing/selftests/bpf/progs/arena_atomics.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/progs/arena_atomics.c b/tools/testing/selftests/bpf/progs/arena_atomics.c index a52feff98112..d1841aac94a2 100644 --- a/tools/testing/selftests/bpf/progs/arena_atomics.c +++ b/tools/testing/selftests/bpf/progs/arena_atomics.c @@ -28,7 +28,8 @@ bool skip_all_tests = true; #if defined(ENABLE_ATOMICS_TESTS) && \ defined(__BPF_FEATURE_ADDR_SPACE_CAST) && \ - (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86)) + (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) bool skip_lacq_srel_tests __attribute((__section__(".data"))) = false; #else bool skip_lacq_srel_tests = true; @@ -314,7 +315,8 @@ int load_acquire(const void *ctx) { #if defined(ENABLE_ATOMICS_TESTS) && \ defined(__BPF_FEATURE_ADDR_SPACE_CAST) && \ - (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86)) + (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) #define LOAD_ACQUIRE_ARENA(SIZEOP, SIZE, SRC, DST) \ { asm volatile ( \ @@ -365,7 +367,8 @@ int store_release(const void *ctx) { #if defined(ENABLE_ATOMICS_TESTS) && \ defined(__BPF_FEATURE_ADDR_SPACE_CAST) && \ - (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86)) + (defined(__TARGET_ARCH_arm64) || defined(__TARGET_ARCH_x86) || \ + (defined(__TARGET_ARCH_riscv) && __riscv_xlen == 64)) #define STORE_RELEASE_ARENA(SIZEOP, DST, VAL) \ { asm volatile ( \ -- cgit v1.2.3 From bd80c4d6e4b0f3060b00d9d2337c595c6bb1c88f Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Wed, 13 Aug 2025 08:32:13 +0200 Subject: kunit: tool: Parse skipped tests from kselftest.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Skipped tests reported by kselftest.h use a different format than KTAP, there is no explicit test name. Normally the test name is part of the free-form string after the SKIP keyword: ok 3 # SKIP test: some reason Extend the parser to handle those correctly. Use the free-form string as test name instead. Link: https://lore.kernel.org/r/20250813-kunit-kselftesth-skip-v1-1-57ae3de4f109@linutronix.de Signed-off-by: Thomas Weißschuh Reviewed-by: David Gow Signed-off-by: Shuah Khan --- tools/testing/kunit/kunit_parser.py | 8 +++++--- tools/testing/kunit/test_data/test_is_test_passed-kselftest.log | 3 ++- 2 files changed, 7 insertions(+), 4 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/kunit/kunit_parser.py b/tools/testing/kunit/kunit_parser.py index c176487356e6..333cd3a4a56b 100644 --- a/tools/testing/kunit/kunit_parser.py +++ b/tools/testing/kunit/kunit_parser.py @@ -352,9 +352,9 @@ def parse_test_plan(lines: LineStream, test: Test) -> bool: lines.pop() return True -TEST_RESULT = re.compile(r'^\s*(ok|not ok) ([0-9]+) (- )?([^#]*)( # .*)?$') +TEST_RESULT = re.compile(r'^\s*(ok|not ok) ([0-9]+) ?(- )?([^#]*)( # .*)?$') -TEST_RESULT_SKIP = re.compile(r'^\s*(ok|not ok) ([0-9]+) (- )?(.*) # SKIP(.*)$') +TEST_RESULT_SKIP = re.compile(r'^\s*(ok|not ok) ([0-9]+) ?(- )?(.*) # SKIP ?(.*)$') def peek_test_name_match(lines: LineStream, test: Test) -> bool: """ @@ -379,6 +379,8 @@ def peek_test_name_match(lines: LineStream, test: Test) -> bool: if not match: return False name = match.group(4) + if not name: + return False return name == test.name def parse_test_result(lines: LineStream, test: Test, @@ -416,7 +418,7 @@ def parse_test_result(lines: LineStream, test: Test, # Set name of test object if skip_match: - test.name = skip_match.group(4) + test.name = skip_match.group(4) or skip_match.group(5) else: test.name = match.group(4) diff --git a/tools/testing/kunit/test_data/test_is_test_passed-kselftest.log b/tools/testing/kunit/test_data/test_is_test_passed-kselftest.log index 65d3f27feaf2..30d9ef18bcec 100644 --- a/tools/testing/kunit/test_data/test_is_test_passed-kselftest.log +++ b/tools/testing/kunit/test_data/test_is_test_passed-kselftest.log @@ -1,5 +1,5 @@ TAP version 13 -1..2 +1..3 # selftests: membarrier: membarrier_test_single_thread # TAP version 13 # 1..2 @@ -12,3 +12,4 @@ ok 1 selftests: membarrier: membarrier_test_single_thread # ok 1 sys_membarrier available # ok 2 sys membarrier invalid command test: command = -1, flags = 0, errno = 22. Failed as expected ok 2 selftests: membarrier: membarrier_test_multi_thread +ok 3 # SKIP selftests: membarrier: membarrier_test_multi_thread -- cgit v1.2.3 From 5e0b2177bdba99c2487480e9864825f742b684ee Mon Sep 17 00:00:00 2001 From: Ido Schimmel Date: Thu, 14 Aug 2025 15:06:41 +0200 Subject: selftest: forwarding: router: Add a test case for IPv4 link-local source IP Add a test case which checks that packets with an IPv4 link-local source IP are forwarded and not dropped. Signed-off-by: Ido Schimmel Signed-off-by: Petr Machata Link: https://patch.msgid.link/3c2e0b17d99530f57bef5ddff9af284fa0c9b667.1755174341.git.petrm@nvidia.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/forwarding/router.sh | 29 ++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/forwarding/router.sh b/tools/testing/selftests/net/forwarding/router.sh index b98ea9449b8b..dfb6646cb97b 100755 --- a/tools/testing/selftests/net/forwarding/router.sh +++ b/tools/testing/selftests/net/forwarding/router.sh @@ -18,6 +18,8 @@ # | 2001:db8:1::1/64 2001:db8:2::1/64 | # | | # +-----------------------------------------------------------------+ +# +#shellcheck disable=SC2034 # SC doesn't see our uses of global variables ALL_TESTS=" ping_ipv4 @@ -27,6 +29,7 @@ ALL_TESTS=" ipv4_sip_equal_dip ipv6_sip_equal_dip ipv4_dip_link_local + ipv4_sip_link_local " NUM_NETIFS=4 @@ -330,6 +333,32 @@ ipv4_dip_link_local() tc filter del dev $rp2 egress protocol ip pref 1 handle 101 flower } +ipv4_sip_link_local() +{ + local sip=169.254.1.1 + + RET=0 + + # Disable rpfilter to prevent packets to be dropped because of it. + sysctl_set net.ipv4.conf.all.rp_filter 0 + sysctl_set net.ipv4.conf."$rp1".rp_filter 0 + + tc filter add dev "$rp2" egress protocol ip pref 1 handle 101 \ + flower src_ip "$sip" action pass + + $MZ "$h1" -t udp "sp=54321,dp=12345" -c 5 -d 1msec -b "$rp1mac" \ + -A "$sip" -B 198.51.100.2 -q + + tc_check_packets "dev $rp2 egress" 101 5 + check_err $? "Packets were dropped" + + log_test "IPv4 source IP is link-local" + + tc filter del dev "$rp2" egress protocol ip pref 1 handle 101 flower + sysctl_restore net.ipv4.conf."$rp1".rp_filter + sysctl_restore net.ipv4.conf.all.rp_filter +} + trap cleanup EXIT setup_prepare -- cgit v1.2.3 From c80d79720647ed77ebc0198abd5a0807efdaff0b Mon Sep 17 00:00:00 2001 From: Matt Bobrowski Date: Fri, 15 Aug 2025 12:12:14 +0000 Subject: bpf/selftests: Fix test_tcpnotify_user Based on a bisect, it appears that commit 7ee988770326 ("timers: Implement the hierarchical pull model") has somehow inadvertently broken BPF selftest test_tcpnotify_user. The error that is being generated by this test is as follows: FAILED: Wrong stats Expected 10 calls, got 8 It looks like the change allows timer functions to be run on CPUs different from the one they are armed on. The test had pinned itself to CPU 0, and in the past the retransmit attempts also occurred on CPU 0. The test had set the max_entries attribute for BPF_MAP_TYPE_PERF_EVENT_ARRAY to 2 and was calling bpf_perf_event_output() with BPF_F_CURRENT_CPU, so the entry was likely to be in range. With the change to allow timers to run on other CPUs, the current CPU tasked with performing the retransmit might be bumped and in turn fall out of range, as the event will be filtered out via __bpf_perf_event_output() using: if (unlikely(index >= array->map.max_entries)) return -E2BIG; A possible change would be to explicitly set the max_entries attribute for perf_event_map in test_tcpnotify_kern.c to a value that's at least as large as the number of CPUs. As it turns out however, if the field is left unset, then the libbpf will determine the number of CPUs available on the underlying system and update the max_entries attribute accordingly in map_set_def_max_entries(). A further problem with the test is that it has a thread that continues running up until the program exits. The main thread cleans up some LIBBPF data structures, while the other thread continues to use them, which inevitably will trigger a SIGSEGV. This can be dealt with by telling the thread to run for as long as necessary and doing a pthread_join on it before exiting the program. Finally, I don't think binding the process to CPU 0 is meaningful for this test any more, so get rid of that. Fixes: 435f90a338ae ("selftests/bpf: add a test case for sock_ops perf-event notification") Signed-off-by: Matt Bobrowski Signed-off-by: Martin KaFai Lau Acked-by: Stanislav Fomichev Link: https://patch.msgid.link/aJ8kHhwgATmA3rLf@google.com --- .../selftests/bpf/progs/test_tcpnotify_kern.c | 1 - tools/testing/selftests/bpf/test_tcpnotify_user.c | 20 ++++++++++---------- 2 files changed, 10 insertions(+), 11 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c index 540181c115a8..ef00d38b0a8d 100644 --- a/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tcpnotify_kern.c @@ -23,7 +23,6 @@ struct { struct { __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); - __uint(max_entries, 2); __type(key, int); __type(value, __u32); } perf_event_map SEC(".maps"); diff --git a/tools/testing/selftests/bpf/test_tcpnotify_user.c b/tools/testing/selftests/bpf/test_tcpnotify_user.c index 595194453ff8..35b4893ccdf8 100644 --- a/tools/testing/selftests/bpf/test_tcpnotify_user.c +++ b/tools/testing/selftests/bpf/test_tcpnotify_user.c @@ -15,20 +15,18 @@ #include #include #include -#include #include -#include -#include "bpf_util.h" #include "cgroup_helpers.h" #include "test_tcpnotify.h" -#include "trace_helpers.h" #include "testing_helpers.h" #define SOCKET_BUFFER_SIZE (getpagesize() < 8192L ? getpagesize() : 8192L) pthread_t tid; +static bool exit_thread; + int rx_callbacks; static void dummyfn(void *ctx, int cpu, void *data, __u32 size) @@ -45,7 +43,7 @@ void tcp_notifier_poller(struct perf_buffer *pb) { int err; - while (1) { + while (!exit_thread) { err = perf_buffer__poll(pb, 100); if (err < 0 && err != -EINTR) { printf("failed perf_buffer__poll: %d\n", err); @@ -78,15 +76,10 @@ int main(int argc, char **argv) int error = EXIT_FAILURE; struct bpf_object *obj; char test_script[80]; - cpu_set_t cpuset; __u32 key = 0; libbpf_set_strict_mode(LIBBPF_STRICT_ALL); - CPU_ZERO(&cpuset); - CPU_SET(0, &cpuset); - pthread_setaffinity_np(pthread_self(), sizeof(cpu_set_t), &cpuset); - cg_fd = cgroup_setup_and_join(cg_path); if (cg_fd < 0) goto err; @@ -151,6 +144,13 @@ int main(int argc, char **argv) sleep(10); + exit_thread = true; + int ret = pthread_join(tid, NULL); + if (ret) { + printf("FAILED: pthread_join\n"); + goto err; + } + if (verify_result(&g)) { printf("FAILED: Wrong stats Expected %d calls, got %d\n", g.ncalls, rx_callbacks); -- cgit v1.2.3 From 7f8fa9d370c11ba2fb296598267e14d3bfe4ea11 Mon Sep 17 00:00:00 2001 From: Yureka Lilian Date: Thu, 14 Aug 2025 20:01:13 +0200 Subject: selftests/bpf: Add test for DEVMAP reuse The test covers basic re-use of a pinned DEVMAP map, with both matching and mismatching parameters. Signed-off-by: Yureka Lilian Signed-off-by: Andrii Nakryiko Acked-by: Eduard Zingerman Link: https://lore.kernel.org/bpf/20250814180113.1245565-4-yuka@yuka.dev --- .../bpf/prog_tests/pinning_devmap_reuse.c | 50 ++++++++++++++++++++++ .../selftests/bpf/progs/test_pinning_devmap.c | 20 +++++++++ 2 files changed, 70 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/pinning_devmap_reuse.c create mode 100644 tools/testing/selftests/bpf/progs/test_pinning_devmap.c (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/pinning_devmap_reuse.c b/tools/testing/selftests/bpf/prog_tests/pinning_devmap_reuse.c new file mode 100644 index 000000000000..9ae49b587f3e --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/pinning_devmap_reuse.c @@ -0,0 +1,50 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include +#include +#include + + +#include "test_pinning_devmap.skel.h" + +void test_pinning_devmap_reuse(void) +{ + const char *pinpath1 = "/sys/fs/bpf/pinmap1"; + const char *pinpath2 = "/sys/fs/bpf/pinmap2"; + struct test_pinning_devmap *skel1 = NULL, *skel2 = NULL; + int err; + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts); + + /* load the object a first time */ + skel1 = test_pinning_devmap__open_and_load(); + if (!ASSERT_OK_PTR(skel1, "skel_load1")) + goto out; + + /* load the object a second time, re-using the pinned map */ + skel2 = test_pinning_devmap__open_and_load(); + if (!ASSERT_OK_PTR(skel2, "skel_load2")) + goto out; + + /* we can close the reference safely without + * the map's refcount falling to 0 + */ + test_pinning_devmap__destroy(skel1); + skel1 = NULL; + + /* now, swap the pins */ + err = renameat2(0, pinpath1, 0, pinpath2, RENAME_EXCHANGE); + if (!ASSERT_OK(err, "swap pins")) + goto out; + + /* load the object again, this time the re-use should fail */ + skel1 = test_pinning_devmap__open_and_load(); + if (!ASSERT_ERR_PTR(skel1, "skel_load3")) + goto out; + +out: + unlink(pinpath1); + unlink(pinpath2); + test_pinning_devmap__destroy(skel1); + test_pinning_devmap__destroy(skel2); +} diff --git a/tools/testing/selftests/bpf/progs/test_pinning_devmap.c b/tools/testing/selftests/bpf/progs/test_pinning_devmap.c new file mode 100644 index 000000000000..c855f8f87eff --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_pinning_devmap.c @@ -0,0 +1,20 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include +#include + +struct { + __uint(type, BPF_MAP_TYPE_DEVMAP); + __uint(max_entries, 1); + __type(key, __u32); + __type(value, __u32); + __uint(pinning, LIBBPF_PIN_BY_NAME); +} pinmap1 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_DEVMAP); + __uint(max_entries, 2); + __type(key, __u32); + __type(value, __u32); + __uint(pinning, LIBBPF_PIN_BY_NAME); +} pinmap2 SEC(".maps"); -- cgit v1.2.3 From 715c7a36d59f54162a26fac1d1ed8dc087a24cf1 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 14 Aug 2025 12:43:23 -0700 Subject: selftests: tls: make the new data_steal test less flaky The CI has hit a couple of cases of: RUN global.data_steal ... tls.c:2762:data_steal:Expected recv(cfd, buf2, sizeof(buf2), MSG_DONTWAIT) (20000) == -1 (-1) data_steal: Test terminated by timeout FAIL global.data_steal Looks like the 2msec sleep is not long enough. Make the sleep longer, and then instead of second sleep wait for the thieving process to exit. That way we can be sure it called recv() before us. While at it also avoid trying to steal more than a record, this seems to be causing issues in manual testing as well. Fixes: d7e82594a45c ("selftests: tls: test TCP stealing data from under the TLS socket") Link: https://patch.msgid.link/20250814194323.2014650-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/tls.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index d8cfcf9bb825..2b8387a83bc7 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -2748,17 +2748,18 @@ TEST(data_steal) { pid = fork(); ASSERT_GE(pid, 0); if (!pid) { - EXPECT_EQ(recv(cfd, buf, sizeof(buf), MSG_WAITALL), - sizeof(buf)); + EXPECT_EQ(recv(cfd, buf, sizeof(buf) / 2, MSG_WAITALL), + sizeof(buf) / 2); exit(!__test_passed(_metadata)); } - usleep(2000); + usleep(10000); ASSERT_EQ(setsockopt(fd, SOL_TLS, TLS_TX, &tls, tls.len), 0); ASSERT_EQ(setsockopt(cfd, SOL_TLS, TLS_RX, &tls, tls.len), 0); EXPECT_EQ(send(fd, buf, sizeof(buf), 0), sizeof(buf)); - usleep(2000); + EXPECT_EQ(wait(&status), pid); + EXPECT_EQ(status, 0); EXPECT_EQ(recv(cfd, buf2, sizeof(buf2), MSG_DONTWAIT), -1); /* Don't check errno, the error will be different depending * on what random bytes TLS interpreted as the record length. @@ -2766,9 +2767,6 @@ TEST(data_steal) { close(fd); close(cfd); - - EXPECT_EQ(wait(&status), pid); - EXPECT_EQ(status, 0); } static void __attribute__((constructor)) fips_check(void) { -- cgit v1.2.3 From 1201f6fb5bfdbd10985ac3c8f49ef8f4f88b5c94 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Thu, 31 Jul 2025 10:00:31 +0200 Subject: tools/nolibc: fix error return value of clock_nanosleep() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit clock_nanosleep() returns a positive error value. Unlike other libc functions it *does not* return -1 nor set errno. Fix the return value and also adapt nanosleep(). Fixes: 7c02bc4088af ("tools/nolibc: add support for clock_nanosleep() and nanosleep()") Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250731-nolibc-clock_nanosleep-ret-v1-1-9e4af7855e61@linutronix.de Signed-off-by: Thomas Weißschuh --- tools/include/nolibc/time.h | 5 +++-- tools/testing/selftests/nolibc/nolibc-test.c | 1 + 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'tools/testing') diff --git a/tools/include/nolibc/time.h b/tools/include/nolibc/time.h index d02bc44d2643..e9c1b976791a 100644 --- a/tools/include/nolibc/time.h +++ b/tools/include/nolibc/time.h @@ -133,7 +133,8 @@ static __attribute__((unused)) int clock_nanosleep(clockid_t clockid, int flags, const struct timespec *rqtp, struct timespec *rmtp) { - return __sysret(sys_clock_nanosleep(clockid, flags, rqtp, rmtp)); + /* Directly return a positive error number */ + return -sys_clock_nanosleep(clockid, flags, rqtp, rmtp); } static __inline__ @@ -145,7 +146,7 @@ double difftime(time_t time1, time_t time2) static __inline__ int nanosleep(const struct timespec *rqtp, struct timespec *rmtp) { - return clock_nanosleep(CLOCK_REALTIME, 0, rqtp, rmtp); + return __sysret(sys_clock_nanosleep(CLOCK_REALTIME, 0, rqtp, rmtp)); } diff --git a/tools/testing/selftests/nolibc/nolibc-test.c b/tools/testing/selftests/nolibc/nolibc-test.c index a297ee0d6d07..cc4d730ac465 100644 --- a/tools/testing/selftests/nolibc/nolibc-test.c +++ b/tools/testing/selftests/nolibc/nolibc-test.c @@ -1334,6 +1334,7 @@ int run_syscall(int min, int max) CASE_TEST(chroot_root); EXPECT_SYSZR(euid0, chroot("/")); break; CASE_TEST(chroot_blah); EXPECT_SYSER(1, chroot("/proc/self/blah"), -1, ENOENT); break; CASE_TEST(chroot_exe); EXPECT_SYSER(1, chroot(argv0), -1, ENOTDIR); break; + CASE_TEST(clock_nanosleep); ts.tv_nsec = -1; EXPECT_EQ(1, EINVAL, clock_nanosleep(CLOCK_REALTIME, 0, &ts, NULL)); break; CASE_TEST(close_m1); EXPECT_SYSER(1, close(-1), -1, EBADF); break; CASE_TEST(close_dup); EXPECT_SYSZR(1, close(dup(0))); break; CASE_TEST(dup_0); tmp = dup(0); EXPECT_SYSNE(1, tmp, -1); close(tmp); break; -- cgit v1.2.3 From 0227af355b50c526bf83ca52d67aef5d102e9b07 Mon Sep 17 00:00:00 2001 From: Akhilesh Patil Date: Sun, 17 Aug 2025 15:06:05 +0530 Subject: selftests: ublk: Use ARRAY_SIZE() macro to improve code Use ARRAY_SIZE() macro while calculating size of an array to improve code readability and reduce potential sizing errors. Implement this suggestion given by spatch tool by running coccinelle script - scripts/coccinelle/misc/array_size.cocci Follow ARRAY_SIZE() macro usage pattern in ublk.c introduced by, commit ec120093180b9 ("selftests: ublk: fix ublk_find_tgt()") wherever appropriate to maintain consistency. Signed-off-by: Akhilesh Patil Reviewed-by: Ming Lei Link: https://lore.kernel.org/r/aKGihYui6/Pcijbk@bhairav-test.ee.iitb.ac.in Signed-off-by: Jens Axboe --- tools/testing/selftests/ublk/kublk.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c index 95188065b2e9..6512dfbdbce3 100644 --- a/tools/testing/selftests/ublk/kublk.c +++ b/tools/testing/selftests/ublk/kublk.c @@ -1400,7 +1400,7 @@ static int cmd_dev_get_features(void) if (!((1ULL << i) & features)) continue; - if (i < sizeof(feat_map) / sizeof(feat_map[0])) + if (i < ARRAY_SIZE(feat_map)) feat = feat_map[i]; else feat = "unknown"; @@ -1477,7 +1477,7 @@ static void __cmd_create_help(char *exe, bool recovery) printf("\tdefault: nr_queues=2(max 32), depth=128(max 1024), dev_id=-1(auto allocation)\n"); printf("\tdefault: nthreads=nr_queues"); - for (i = 0; i < sizeof(tgt_ops_list) / sizeof(tgt_ops_list[0]); i++) { + for (i = 0; i < ARRAY_SIZE(tgt_ops_list); i++) { const struct ublk_tgt_ops *ops = tgt_ops_list[i]; if (ops->usage) -- cgit v1.2.3 From 12741630350c6ba250ffd480dbb0bbab7e8ac80a Mon Sep 17 00:00:00 2001 From: Ilya Leoshkevich Date: Wed, 13 Aug 2025 14:06:31 +0200 Subject: selftests/bpf: Clobber a lot of registers in tailcall_bpf2bpf_hierarchy tests Clobbering a lot of registers and stack slots helps exposing tail call counter overwrite bugs in JITs. Signed-off-by: Ilya Leoshkevich Signed-off-by: Daniel Borkmann Link: https://lore.kernel.org/bpf/20250813121016.163375-5-iii@linux.ibm.com --- tools/testing/selftests/bpf/progs/bpf_test_utils.h | 18 ++++++++++++++++++ .../selftests/bpf/progs/tailcall_bpf2bpf_hierarchy1.c | 3 +++ .../selftests/bpf/progs/tailcall_bpf2bpf_hierarchy2.c | 3 +++ .../selftests/bpf/progs/tailcall_bpf2bpf_hierarchy3.c | 3 +++ .../bpf/progs/tailcall_bpf2bpf_hierarchy_fentry.c | 3 +++ 5 files changed, 30 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/bpf_test_utils.h (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/progs/bpf_test_utils.h b/tools/testing/selftests/bpf/progs/bpf_test_utils.h new file mode 100644 index 000000000000..f4e67b492dd2 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/bpf_test_utils.h @@ -0,0 +1,18 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __BPF_TEST_UTILS_H__ +#define __BPF_TEST_UTILS_H__ + +#include +#include "bpf_misc.h" + +/* Clobber as many native registers and stack slots as possible. */ +static __always_inline void clobber_regs_stack(void) +{ + char tmp_str[] = "123456789"; + unsigned long tmp; + + bpf_strtoul(tmp_str, sizeof(tmp_str), 0, &tmp); + __sink(tmp); +} + +#endif diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy1.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy1.c index 327ca395e860..d556b19413d7 100644 --- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy1.c +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy1.c @@ -2,6 +2,7 @@ #include #include #include "bpf_legacy.h" +#include "bpf_test_utils.h" struct { __uint(type, BPF_MAP_TYPE_PROG_ARRAY); @@ -24,6 +25,8 @@ int entry(struct __sk_buff *skb) { int ret = 1; + clobber_regs_stack(); + count++; subprog_tail(skb); subprog_tail(skb); diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy2.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy2.c index 72fd0d577506..ae94c9c70ab7 100644 --- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy2.c +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy2.c @@ -2,6 +2,7 @@ #include #include #include "bpf_misc.h" +#include "bpf_test_utils.h" int classifier_0(struct __sk_buff *skb); int classifier_1(struct __sk_buff *skb); @@ -60,6 +61,8 @@ int tailcall_bpf2bpf_hierarchy_2(struct __sk_buff *skb) { int ret = 0; + clobber_regs_stack(); + subprog_tail0(skb); subprog_tail1(skb); diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy3.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy3.c index a7fb91cb05b7..56b6b0099840 100644 --- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy3.c +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy3.c @@ -2,6 +2,7 @@ #include #include #include "bpf_misc.h" +#include "bpf_test_utils.h" int classifier_0(struct __sk_buff *skb); @@ -53,6 +54,8 @@ int tailcall_bpf2bpf_hierarchy_3(struct __sk_buff *skb) { int ret = 0; + clobber_regs_stack(); + bpf_tail_call_static(skb, &jmp_table0, 0); __sink(ret); diff --git a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy_fentry.c b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy_fentry.c index c87f9ca982d3..5261395713cd 100644 --- a/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy_fentry.c +++ b/tools/testing/selftests/bpf/progs/tailcall_bpf2bpf_hierarchy_fentry.c @@ -4,6 +4,7 @@ #include "vmlinux.h" #include #include +#include "bpf_test_utils.h" struct { __uint(type, BPF_MAP_TYPE_PROG_ARRAY); @@ -24,6 +25,8 @@ int subprog_tail(void *ctx) SEC("fentry/dummy") int BPF_PROG(fentry, struct sk_buff *skb) { + clobber_regs_stack(); + count++; subprog_tail(ctx); subprog_tail(ctx); -- cgit v1.2.3 From 2be3fd903a6775ce21b2a138950aab09d0998427 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Mon, 18 Aug 2025 16:02:04 +0200 Subject: selftests/nolibc: be more specific about variables affecting nolibc-test MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Only one of these variables is used. $CC is preferred over $CROSS_COMPILE. Make this clear in the help message. Suggested-by: Willy Tarreau Link: https://lore.kernel.org/lkml/20250817093905.GA14213@1wt.eu/ Signed-off-by: Thomas Weißschuh --- tools/testing/selftests/nolibc/Makefile.nolibc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/nolibc/Makefile.nolibc b/tools/testing/selftests/nolibc/Makefile.nolibc index 0fb759ba992e..b013a5d724c9 100644 --- a/tools/testing/selftests/nolibc/Makefile.nolibc +++ b/tools/testing/selftests/nolibc/Makefile.nolibc @@ -267,7 +267,7 @@ help: @echo " all call the \"run\" target below" @echo " help this help" @echo " sysroot create the nolibc sysroot here (uses \$$ARCH)" - @echo " nolibc-test build the executable (uses \$$CC and \$$CROSS_COMPILE)" + @echo " nolibc-test build the executable (uses \$$CC or \$$CROSS_COMPILE)" @echo " libc-test build an executable using the compiler's default libc instead" @echo " run-user runs the executable under QEMU (uses \$$XARCH, \$$TEST)" @echo " initramfs.cpio prepare the initramfs archive with nolibc-test" -- cgit v1.2.3 From 32042f638cb839b4eb4a76b5dfc7c0a412327a6d Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Sat, 19 Jul 2025 17:38:27 +0200 Subject: selftests/nolibc: deduplicate invocations of toplevel Makefile MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Various targets of the testsuite call back into the toplevel kernel Makefile. These calls use various parameters and are quite long. Introduce a common variable to make future changes smaller and the lines shorter. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250719-nolibc-llvm-system-v1-1-1730216ce171@weissschuh.net --- tools/testing/selftests/nolibc/Makefile.nolibc | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/nolibc/Makefile.nolibc b/tools/testing/selftests/nolibc/Makefile.nolibc index b013a5d724c9..d8dbb63fe4f7 100644 --- a/tools/testing/selftests/nolibc/Makefile.nolibc +++ b/tools/testing/selftests/nolibc/Makefile.nolibc @@ -262,6 +262,9 @@ REPORT ?= awk '/\[OK\][\r]*$$/{p++} /\[FAIL\][\r]*$$/{if (!f) printf("\n"); f++ if (f || !p || !done) printf("failure\n"); else if (s) printf("warning\n"); else printf("success\n");; \ printf("\nSee all results in %s\n", ARGV[1]); }' +# Execute the toplevel kernel Makefile +KBUILD_MAKE = $(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) + help: @echo "Supported targets under selftests/nolibc:" @echo " all call the \"run\" target below" @@ -340,17 +343,17 @@ initramfs: nolibc-test $(Q)cp nolibc-test initramfs/init defconfig: - $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(DEFCONFIG) + $(Q)$(KBUILD_MAKE) $(DEFCONFIG) $(Q)if [ -n "$(EXTRACONFIG)" ]; then \ $(srctree)/scripts/config --file $(objtree)/.config $(EXTRACONFIG); \ - $(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) olddefconfig < /dev/null; \ + $(KBUILD_MAKE) olddefconfig < /dev/null; \ fi kernel: - $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(IMAGE_NAME) < /dev/null + $(Q)$(KBUILD_MAKE) $(IMAGE_NAME) < /dev/null kernel-standalone: initramfs - $(Q)$(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) $(IMAGE_NAME) CONFIG_INITRAMFS_SOURCE=$(CURDIR)/initramfs < /dev/null + $(Q)$(KBUILD_MAKE) $(IMAGE_NAME) CONFIG_INITRAMFS_SOURCE=$(CURDIR)/initramfs < /dev/null # run the tests after building the kernel run: kernel initramfs.cpio -- cgit v1.2.3 From 1a5b40317dcbcd5e498cc8d2b3e5f48755bab322 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Sat, 19 Jul 2025 17:38:28 +0200 Subject: selftests/nolibc: don't pass CC to toplevel Makefile MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The toplevel Makefile is capable of calculating CC from CROSS_COMPILE and/or ARCH. Stop passing the unnecessary variable. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250719-nolibc-llvm-system-v1-2-1730216ce171@weissschuh.net --- tools/testing/selftests/nolibc/Makefile.nolibc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/nolibc/Makefile.nolibc b/tools/testing/selftests/nolibc/Makefile.nolibc index d8dbb63fe4f7..ebb097d228b9 100644 --- a/tools/testing/selftests/nolibc/Makefile.nolibc +++ b/tools/testing/selftests/nolibc/Makefile.nolibc @@ -263,7 +263,7 @@ REPORT ?= awk '/\[OK\][\r]*$$/{p++} /\[FAIL\][\r]*$$/{if (!f) printf("\n"); f++ printf("\nSee all results in %s\n", ARGV[1]); }' # Execute the toplevel kernel Makefile -KBUILD_MAKE = $(MAKE) -C $(srctree) ARCH=$(ARCH) CC=$(CC) CROSS_COMPILE=$(CROSS_COMPILE) +KBUILD_MAKE = $(MAKE) -C $(srctree) ARCH=$(ARCH) CROSS_COMPILE=$(CROSS_COMPILE) help: @echo "Supported targets under selftests/nolibc:" -- cgit v1.2.3 From 850047b19741490631855a475ccaa3ed29316039 Mon Sep 17 00:00:00 2001 From: Thomas Weißschuh Date: Sat, 19 Jul 2025 17:38:29 +0200 Subject: selftests/nolibc: always compile the kernel with GCC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit LLVM/clang can not build the kernel for all architectures supported by nolibc. The current setup uses the same compiler to build the kernel as is used for nolibc-test. This prevents using the full qemu-system tests for LLVM builds. Instead always build the kernel with GCC. For the nolibc testsuite the kernel does not need to be built with LLVM. Signed-off-by: Thomas Weißschuh Acked-by: Willy Tarreau Link: https://lore.kernel.org/r/20250719-nolibc-llvm-system-v1-3-1730216ce171@weissschuh.net --- tools/testing/selftests/nolibc/Makefile.nolibc | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/nolibc/Makefile.nolibc b/tools/testing/selftests/nolibc/Makefile.nolibc index ebb097d228b9..330e000baeb1 100644 --- a/tools/testing/selftests/nolibc/Makefile.nolibc +++ b/tools/testing/selftests/nolibc/Makefile.nolibc @@ -263,7 +263,7 @@ REPORT ?= awk '/\[OK\][\r]*$$/{p++} /\[FAIL\][\r]*$$/{if (!f) printf("\n"); f++ printf("\nSee all results in %s\n", ARGV[1]); }' # Execute the toplevel kernel Makefile -KBUILD_MAKE = $(MAKE) -C $(srctree) ARCH=$(ARCH) CROSS_COMPILE=$(CROSS_COMPILE) +KBUILD_MAKE = $(MAKE) -C $(srctree) ARCH=$(ARCH) CROSS_COMPILE=$(CROSS_COMPILE) LLVM= help: @echo "Supported targets under selftests/nolibc:" @@ -276,8 +276,8 @@ help: @echo " initramfs.cpio prepare the initramfs archive with nolibc-test" @echo " initramfs prepare the initramfs tree with nolibc-test" @echo " defconfig create a fresh new default config (uses \$$XARCH)" - @echo " kernel (re)build the kernel (uses \$$XARCH)" - @echo " kernel-standalone (re)build the kernel with the initramfs (uses \$$XARCH)" + @echo " kernel (re)build the kernel (uses \$$XARCH, \$$CROSS_COMPILE)" + @echo " kernel-standalone (re)build the kernel with the initramfs (uses \$$XARCH, \$$CROSS_COMPILE)" @echo " run runs the kernel in QEMU after building it (uses \$$XARCH, \$$TEST)" @echo " rerun runs a previously prebuilt kernel in QEMU (uses \$$XARCH, \$$TEST)" @echo " clean clean the sysroot, initramfs, build and output files" -- cgit v1.2.3 From 0e74eb4d57f00e6103ac23ce2312766c25ad88f6 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Thu, 14 Aug 2025 11:59:29 +0200 Subject: selftests/bpf: Cover verifier checks for skb_meta dynptr type dynptr for skb metadata behaves the same way as the dynptr for skb data with one exception - writes to skb_meta dynptr don't invalidate existing skb and skb_meta slices. Duplicate those the skb dynptr tests which we can, since bpf_dynptr_from_skb_meta kfunc can be called only from TC BPF, to cover the skb_meta dynptr verifier checks. Also add a couple of new tests (skb_data_valid_*) to ensure we don't invalidate the slices in the mentioned case, which are specific to skb_meta dynptr. Signed-off-by: Jakub Sitnicki Signed-off-by: Martin KaFai Lau Reviewed-by: Jesse Brandeburg Link: https://patch.msgid.link/20250814-skb-metadata-thru-dynptr-v7-3-8a39e636e0fb@cloudflare.com --- tools/testing/selftests/bpf/prog_tests/dynptr.c | 2 + tools/testing/selftests/bpf/progs/dynptr_fail.c | 258 +++++++++++++++++++++ tools/testing/selftests/bpf/progs/dynptr_success.c | 55 +++++ 3 files changed, 315 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/dynptr.c b/tools/testing/selftests/bpf/prog_tests/dynptr.c index 9b2d9ceda210..b9f86cb91e81 100644 --- a/tools/testing/selftests/bpf/prog_tests/dynptr.c +++ b/tools/testing/selftests/bpf/prog_tests/dynptr.c @@ -32,6 +32,8 @@ static struct { {"test_ringbuf", SETUP_SYSCALL_SLEEP}, {"test_skb_readonly", SETUP_SKB_PROG}, {"test_dynptr_skb_data", SETUP_SKB_PROG}, + {"test_dynptr_skb_meta_data", SETUP_SKB_PROG}, + {"test_dynptr_skb_meta_flags", SETUP_SKB_PROG}, {"test_adjust", SETUP_SYSCALL_SLEEP}, {"test_adjust_err", SETUP_SYSCALL_SLEEP}, {"test_zero_size_dynptr", SETUP_SYSCALL_SLEEP}, diff --git a/tools/testing/selftests/bpf/progs/dynptr_fail.c b/tools/testing/selftests/bpf/progs/dynptr_fail.c index bd8f15229f5c..dda6a8dada82 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_fail.c +++ b/tools/testing/selftests/bpf/progs/dynptr_fail.c @@ -269,6 +269,26 @@ int data_slice_out_of_bounds_skb(struct __sk_buff *skb) return SK_PASS; } +/* A metadata slice can't be accessed out of bounds */ +SEC("?tc") +__failure __msg("value is outside of the allowed memory range") +int data_slice_out_of_bounds_skb_meta(struct __sk_buff *skb) +{ + struct bpf_dynptr meta; + __u8 *md; + + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md)); + if (!md) + return SK_DROP; + + /* this should fail */ + *(md + 1) = 42; + + return SK_PASS; +} + SEC("?raw_tp") __failure __msg("value is outside of the allowed memory range") int data_slice_out_of_bounds_map_value(void *ctx) @@ -1089,6 +1109,26 @@ int skb_invalid_slice_write(struct __sk_buff *skb) return SK_PASS; } +/* bpf_dynptr_slice()s are read-only and cannot be written to */ +SEC("?tc") +__failure __msg("R{{[0-9]+}} cannot write into rdonly_mem") +int skb_meta_invalid_slice_write(struct __sk_buff *skb) +{ + struct bpf_dynptr meta; + __u8 *md; + + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + md = bpf_dynptr_slice(&meta, 0, NULL, sizeof(*md)); + if (!md) + return SK_DROP; + + /* this should fail */ + *md = 42; + + return SK_PASS; +} + /* The read-only data slice is invalidated whenever a helper changes packet data */ SEC("?tc") __failure __msg("invalid mem access 'scalar'") @@ -1192,6 +1232,188 @@ int skb_invalid_data_slice4(struct __sk_buff *skb) return SK_PASS; } +/* Read-only skb data slice is invalidated on write to skb metadata */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int ro_skb_slice_invalid_after_metadata_write(struct __sk_buff *skb) +{ + struct bpf_dynptr data, meta; + __u8 *d; + + bpf_dynptr_from_skb(skb, 0, &data); + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + d = bpf_dynptr_slice(&data, 0, NULL, sizeof(*d)); + if (!d) + return SK_DROP; + + bpf_dynptr_write(&meta, 0, "x", 1, 0); + + /* this should fail */ + val = *d; + + return SK_PASS; +} + +/* Read-write skb data slice is invalidated on write to skb metadata */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int rw_skb_slice_invalid_after_metadata_write(struct __sk_buff *skb) +{ + struct bpf_dynptr data, meta; + __u8 *d; + + bpf_dynptr_from_skb(skb, 0, &data); + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + d = bpf_dynptr_slice_rdwr(&data, 0, NULL, sizeof(*d)); + if (!d) + return SK_DROP; + + bpf_dynptr_write(&meta, 0, "x", 1, 0); + + /* this should fail */ + *d = 42; + + return SK_PASS; +} + +/* Read-only skb metadata slice is invalidated on write to skb data */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int ro_skb_meta_slice_invalid_after_payload_write(struct __sk_buff *skb) +{ + struct bpf_dynptr data, meta; + __u8 *md; + + bpf_dynptr_from_skb(skb, 0, &data); + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + md = bpf_dynptr_slice(&meta, 0, NULL, sizeof(*md)); + if (!md) + return SK_DROP; + + bpf_dynptr_write(&data, 0, "x", 1, 0); + + /* this should fail */ + val = *md; + + return SK_PASS; +} + +/* Read-write skb metadata slice is invalidated on write to skb data slice */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int rw_skb_meta_slice_invalid_after_payload_write(struct __sk_buff *skb) +{ + struct bpf_dynptr data, meta; + __u8 *md; + + bpf_dynptr_from_skb(skb, 0, &data); + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md)); + if (!md) + return SK_DROP; + + bpf_dynptr_write(&data, 0, "x", 1, 0); + + /* this should fail */ + *md = 42; + + return SK_PASS; +} + +/* Read-only skb metadata slice is invalidated whenever a helper changes packet data */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int ro_skb_meta_slice_invalid_after_payload_helper(struct __sk_buff *skb) +{ + struct bpf_dynptr meta; + __u8 *md; + + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + md = bpf_dynptr_slice(&meta, 0, NULL, sizeof(*md)); + if (!md) + return SK_DROP; + + if (bpf_skb_pull_data(skb, skb->len)) + return SK_DROP; + + /* this should fail */ + val = *md; + + return SK_PASS; +} + +/* Read-write skb metadata slice is invalidated whenever a helper changes packet data */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int rw_skb_meta_slice_invalid_after_payload_helper(struct __sk_buff *skb) +{ + struct bpf_dynptr meta; + __u8 *md; + + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md)); + if (!md) + return SK_DROP; + + if (bpf_skb_pull_data(skb, skb->len)) + return SK_DROP; + + /* this should fail */ + *md = 42; + + return SK_PASS; +} + +/* Read-only skb metadata slice is invalidated on write to skb metadata */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int ro_skb_meta_slice_invalid_after_metadata_write(struct __sk_buff *skb) +{ + struct bpf_dynptr meta; + __u8 *md; + + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + md = bpf_dynptr_slice(&meta, 0, NULL, sizeof(*md)); + if (!md) + return SK_DROP; + + bpf_dynptr_write(&meta, 0, "x", 1, 0); + + /* this should fail */ + val = *md; + + return SK_PASS; +} + +/* Read-write skb metadata slice is invalidated on write to skb metadata */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int rw_skb_meta_slice_invalid_after_metadata_write(struct __sk_buff *skb) +{ + struct bpf_dynptr meta; + __u8 *md; + + bpf_dynptr_from_skb_meta(skb, 0, &meta); + + md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md)); + if (!md) + return SK_DROP; + + bpf_dynptr_write(&meta, 0, "x", 1, 0); + + /* this should fail */ + *md = 42; + + return SK_PASS; +} + /* The read-only data slice is invalidated whenever a helper changes packet data */ SEC("?xdp") __failure __msg("invalid mem access 'scalar'") @@ -1255,6 +1477,19 @@ int skb_invalid_ctx(void *ctx) return 0; } +/* Only supported prog type can create skb_meta-type dynptrs */ +SEC("?raw_tp") +__failure __msg("calling kernel function bpf_dynptr_from_skb_meta is not allowed") +int skb_meta_invalid_ctx(void *ctx) +{ + struct bpf_dynptr meta; + + /* this should fail */ + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + + return 0; +} + SEC("fentry/skb_tx_error") __failure __msg("must be referenced or trusted") int BPF_PROG(skb_invalid_ctx_fentry, void *skb) @@ -1665,6 +1900,29 @@ int clone_skb_packet_data(struct __sk_buff *skb) return 0; } +/* A skb clone's metadata slice becomes invalid anytime packet data changes */ +SEC("?tc") +__failure __msg("invalid mem access 'scalar'") +int clone_skb_packet_meta(struct __sk_buff *skb) +{ + struct bpf_dynptr clone, meta; + __u8 *md; + + bpf_dynptr_from_skb_meta(skb, 0, &meta); + bpf_dynptr_clone(&meta, &clone); + md = bpf_dynptr_slice_rdwr(&clone, 0, NULL, sizeof(*md)); + if (!md) + return SK_DROP; + + if (bpf_skb_pull_data(skb, skb->len)) + return SK_DROP; + + /* this should fail */ + *md = 42; + + return 0; +} + /* A xdp clone's data slices should be invalid anytime packet data changes */ SEC("?xdp") __failure __msg("invalid mem access 'scalar'") diff --git a/tools/testing/selftests/bpf/progs/dynptr_success.c b/tools/testing/selftests/bpf/progs/dynptr_success.c index 8315273cb900..127dea342e5a 100644 --- a/tools/testing/selftests/bpf/progs/dynptr_success.c +++ b/tools/testing/selftests/bpf/progs/dynptr_success.c @@ -211,6 +211,61 @@ int test_dynptr_skb_data(struct __sk_buff *skb) return 1; } +SEC("?tc") +int test_dynptr_skb_meta_data(struct __sk_buff *skb) +{ + struct bpf_dynptr meta; + __u8 *md; + int ret; + + err = 1; + ret = bpf_dynptr_from_skb_meta(skb, 0, &meta); + if (ret) + return 1; + + /* This should return NULL. Must use bpf_dynptr_slice API */ + err = 2; + md = bpf_dynptr_data(&meta, 0, sizeof(*md)); + if (md) + return 1; + + err = 0; + return 1; +} + +/* Check that skb metadata dynptr ops don't accept any flags. */ +SEC("?tc") +int test_dynptr_skb_meta_flags(struct __sk_buff *skb) +{ + const __u64 INVALID_FLAGS = ~0ULL; + struct bpf_dynptr meta; + __u8 buf; + int ret; + + err = 1; + ret = bpf_dynptr_from_skb_meta(skb, INVALID_FLAGS, &meta); + if (ret != -EINVAL) + return 1; + + err = 2; + ret = bpf_dynptr_from_skb_meta(skb, 0, &meta); + if (ret) + return 1; + + err = 3; + ret = bpf_dynptr_read(&buf, 0, &meta, 0, INVALID_FLAGS); + if (ret != -EINVAL) + return 1; + + err = 4; + ret = bpf_dynptr_write(&meta, 0, &buf, 0, INVALID_FLAGS); + if (ret != -EINVAL) + return 1; + + err = 0; + return 1; +} + SEC("tp/syscalls/sys_enter_nanosleep") int test_adjust(void *ctx) { -- cgit v1.2.3 From 6dfd5e01e1a7728e162f721cd8adf5ecd24fbc80 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Thu, 14 Aug 2025 11:59:30 +0200 Subject: selftests/bpf: Pass just bpf_map to xdp_context_test helper Prepare for parametrizing the xdp_context tests. The assert_test_result helper doesn't need the whole skeleton. Pass just what it needs. Signed-off-by: Jakub Sitnicki Signed-off-by: Martin KaFai Lau Reviewed-by: Jesse Brandeburg Acked-by: Eduard Zingerman Link: https://patch.msgid.link/20250814-skb-metadata-thru-dynptr-v7-4-8a39e636e0fb@cloudflare.com --- tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c index b9d9f0a502ce..0134651d94ab 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c @@ -156,15 +156,14 @@ err: return -1; } -static void assert_test_result(struct test_xdp_meta *skel) +static void assert_test_result(const struct bpf_map *result_map) { int err; __u32 map_key = 0; __u8 map_value[TEST_PAYLOAD_LEN]; - err = bpf_map__lookup_elem(skel->maps.test_result, &map_key, - sizeof(map_key), &map_value, - TEST_PAYLOAD_LEN, BPF_ANY); + err = bpf_map__lookup_elem(result_map, &map_key, sizeof(map_key), + &map_value, TEST_PAYLOAD_LEN, BPF_ANY); if (!ASSERT_OK(err, "lookup test_result")) return; @@ -248,7 +247,7 @@ void test_xdp_context_veth(void) if (!ASSERT_OK(ret, "send_test_packet")) goto close; - assert_test_result(skel); + assert_test_result(skel->maps.test_result); close: close_netns(nstoken); @@ -313,7 +312,7 @@ void test_xdp_context_tuntap(void) if (!ASSERT_EQ(ret, sizeof(packet), "write packet")) goto close; - assert_test_result(skel); + assert_test_result(skel->maps.test_result); close: if (tap_fd >= 0) -- cgit v1.2.3 From dd9f6cfb4ef4394c2afd1e2b564af25aff151bc8 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Thu, 14 Aug 2025 11:59:31 +0200 Subject: selftests/bpf: Parametrize test_xdp_context_tuntap We want to add more test cases to cover different ways to access the metadata area. Prepare for it. Pull up the skeleton management. Signed-off-by: Jakub Sitnicki Signed-off-by: Martin KaFai Lau Reviewed-by: Jesse Brandeburg Acked-by: Eduard Zingerman Link: https://patch.msgid.link/20250814-skb-metadata-thru-dynptr-v7-5-8a39e636e0fb@cloudflare.com --- .../bpf/prog_tests/xdp_context_test_run.c | 31 +++++++++++++++------- 1 file changed, 21 insertions(+), 10 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c index 0134651d94ab..6c66e27e5bc7 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c @@ -256,12 +256,13 @@ close: netns_free(tx_ns); } -void test_xdp_context_tuntap(void) +static void test_tuntap(struct bpf_program *xdp_prog, + struct bpf_program *tc_prog, + struct bpf_map *result_map) { LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS); LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1); struct netns_obj *ns = NULL; - struct test_xdp_meta *skel = NULL; __u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN]; int tap_fd = -1; int tap_ifindex; @@ -277,10 +278,6 @@ void test_xdp_context_tuntap(void) SYS(close, "ip link set dev " TAP_NAME " up"); - skel = test_xdp_meta__open_and_load(); - if (!ASSERT_OK_PTR(skel, "open and load skeleton")) - goto close; - tap_ifindex = if_nametoindex(TAP_NAME); if (!ASSERT_GE(tap_ifindex, 0, "if_nametoindex")) goto close; @@ -290,12 +287,12 @@ void test_xdp_context_tuntap(void) if (!ASSERT_OK(ret, "bpf_tc_hook_create")) goto close; - tc_opts.prog_fd = bpf_program__fd(skel->progs.ing_cls); + tc_opts.prog_fd = bpf_program__fd(tc_prog); ret = bpf_tc_attach(&tc_hook, &tc_opts); if (!ASSERT_OK(ret, "bpf_tc_attach")) goto close; - ret = bpf_xdp_attach(tap_ifindex, bpf_program__fd(skel->progs.ing_xdp), + ret = bpf_xdp_attach(tap_ifindex, bpf_program__fd(xdp_prog), 0, NULL); if (!ASSERT_GE(ret, 0, "bpf_xdp_attach")) goto close; @@ -312,11 +309,25 @@ void test_xdp_context_tuntap(void) if (!ASSERT_EQ(ret, sizeof(packet), "write packet")) goto close; - assert_test_result(skel->maps.test_result); + assert_test_result(result_map); close: if (tap_fd >= 0) close(tap_fd); - test_xdp_meta__destroy(skel); netns_free(ns); } + +void test_xdp_context_tuntap(void) +{ + struct test_xdp_meta *skel = NULL; + + skel = test_xdp_meta__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open and load skeleton")) + return; + + if (test__start_subtest("data_meta")) + test_tuntap(skel->progs.ing_xdp, skel->progs.ing_cls, + skel->maps.test_result); + + test_xdp_meta__destroy(skel); +} -- cgit v1.2.3 From 153f6bfd489076309227413e9221960712336369 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Thu, 14 Aug 2025 11:59:32 +0200 Subject: selftests/bpf: Cover read access to skb metadata via dynptr Exercise reading from SKB metadata area in two new ways: 1. indirectly, with bpf_dynptr_read(), and 2. directly, with bpf_dynptr_slice(). Signed-off-by: Jakub Sitnicki Signed-off-by: Martin KaFai Lau Reviewed-by: Jesse Brandeburg Acked-by: Eduard Zingerman Link: https://patch.msgid.link/20250814-skb-metadata-thru-dynptr-v7-6-8a39e636e0fb@cloudflare.com --- tools/testing/selftests/bpf/bpf_kfuncs.h | 3 ++ .../bpf/prog_tests/xdp_context_test_run.c | 21 +++++++++++ tools/testing/selftests/bpf/progs/test_xdp_meta.c | 42 ++++++++++++++++++++++ 3 files changed, 66 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h index 9386dfe8b884..794d44d19c88 100644 --- a/tools/testing/selftests/bpf/bpf_kfuncs.h +++ b/tools/testing/selftests/bpf/bpf_kfuncs.h @@ -19,6 +19,9 @@ extern int bpf_dynptr_from_skb(struct __sk_buff *skb, __u64 flags, extern int bpf_dynptr_from_xdp(struct xdp_md *xdp, __u64 flags, struct bpf_dynptr *ptr__uninit) __ksym __weak; +extern int bpf_dynptr_from_skb_meta(struct __sk_buff *skb, __u64 flags, + struct bpf_dynptr *ptr__uninit) __ksym __weak; + /* Description * Obtain a read-only pointer to the dynptr's data * Returns diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c index 6c66e27e5bc7..7e4526461a4c 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c @@ -171,6 +171,18 @@ static void assert_test_result(const struct bpf_map *result_map) "test_result map contains test payload"); } +static bool clear_test_result(struct bpf_map *result_map) +{ + const __u8 v[sizeof(test_payload)] = {}; + const __u32 k = 0; + int err; + + err = bpf_map__update_elem(result_map, &k, sizeof(k), v, sizeof(v), BPF_ANY); + ASSERT_OK(err, "update test_result"); + + return err == 0; +} + void test_xdp_context_veth(void) { LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS); @@ -268,6 +280,9 @@ static void test_tuntap(struct bpf_program *xdp_prog, int tap_ifindex; int ret; + if (!clear_test_result(result_map)) + return; + ns = netns_new(TAP_NETNS, true); if (!ASSERT_OK_PTR(ns, "create and open ns")) return; @@ -328,6 +343,12 @@ void test_xdp_context_tuntap(void) if (test__start_subtest("data_meta")) test_tuntap(skel->progs.ing_xdp, skel->progs.ing_cls, skel->maps.test_result); + if (test__start_subtest("dynptr_read")) + test_tuntap(skel->progs.ing_xdp, skel->progs.ing_cls_dynptr_read, + skel->maps.test_result); + if (test__start_subtest("dynptr_slice")) + test_tuntap(skel->progs.ing_xdp, skel->progs.ing_cls_dynptr_slice, + skel->maps.test_result); test_xdp_meta__destroy(skel); } diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c index fcf6ca14f2ea..0ba647fb1b1d 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c @@ -1,8 +1,10 @@ +#include #include #include #include #include +#include "bpf_kfuncs.h" #define META_SIZE 32 @@ -40,6 +42,46 @@ int ing_cls(struct __sk_buff *ctx) return TC_ACT_SHOT; } +/* Read from metadata using bpf_dynptr_read helper */ +SEC("tc") +int ing_cls_dynptr_read(struct __sk_buff *ctx) +{ + struct bpf_dynptr meta; + const __u32 zero = 0; + __u8 *dst; + + dst = bpf_map_lookup_elem(&test_result, &zero); + if (!dst) + return TC_ACT_SHOT; + + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + bpf_dynptr_read(dst, META_SIZE, &meta, 0, 0); + + return TC_ACT_SHOT; +} + +/* Read from metadata using read-only dynptr slice */ +SEC("tc") +int ing_cls_dynptr_slice(struct __sk_buff *ctx) +{ + struct bpf_dynptr meta; + const __u32 zero = 0; + __u8 *dst, *src; + + dst = bpf_map_lookup_elem(&test_result, &zero); + if (!dst) + return TC_ACT_SHOT; + + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + src = bpf_dynptr_slice(&meta, 0, NULL, META_SIZE); + if (!src) + return TC_ACT_SHOT; + + __builtin_memcpy(dst, src, META_SIZE); + + return TC_ACT_SHOT; +} + SEC("xdp") int ing_xdp(struct xdp_md *ctx) { -- cgit v1.2.3 From ed93360807801e7f69b74efec98a1bd674ba035e Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Thu, 14 Aug 2025 11:59:33 +0200 Subject: selftests/bpf: Cover write access to skb metadata via dynptr Add tests what exercise writes to skb metadata in two ways: 1. indirectly, using bpf_dynptr_write helper, 2. directly, using a read-write dynptr slice. Signed-off-by: Jakub Sitnicki Signed-off-by: Martin KaFai Lau Reviewed-by: Jesse Brandeburg Acked-by: Eduard Zingerman Link: https://patch.msgid.link/20250814-skb-metadata-thru-dynptr-v7-7-8a39e636e0fb@cloudflare.com --- .../bpf/prog_tests/xdp_context_test_run.c | 36 ++++++++++-- tools/testing/selftests/bpf/progs/test_xdp_meta.c | 67 ++++++++++++++++++++++ 2 files changed, 98 insertions(+), 5 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c index 7e4526461a4c..79c4c58276e6 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c @@ -269,7 +269,8 @@ close: } static void test_tuntap(struct bpf_program *xdp_prog, - struct bpf_program *tc_prog, + struct bpf_program *tc_prio_1_prog, + struct bpf_program *tc_prio_2_prog, struct bpf_map *result_map) { LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS); @@ -302,11 +303,20 @@ static void test_tuntap(struct bpf_program *xdp_prog, if (!ASSERT_OK(ret, "bpf_tc_hook_create")) goto close; - tc_opts.prog_fd = bpf_program__fd(tc_prog); + tc_opts.prog_fd = bpf_program__fd(tc_prio_1_prog); ret = bpf_tc_attach(&tc_hook, &tc_opts); if (!ASSERT_OK(ret, "bpf_tc_attach")) goto close; + if (tc_prio_2_prog) { + LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 2, + .prog_fd = bpf_program__fd(tc_prio_2_prog)); + + ret = bpf_tc_attach(&tc_hook, &tc_opts); + if (!ASSERT_OK(ret, "bpf_tc_attach")) + goto close; + } + ret = bpf_xdp_attach(tap_ifindex, bpf_program__fd(xdp_prog), 0, NULL); if (!ASSERT_GE(ret, 0, "bpf_xdp_attach")) @@ -341,13 +351,29 @@ void test_xdp_context_tuntap(void) return; if (test__start_subtest("data_meta")) - test_tuntap(skel->progs.ing_xdp, skel->progs.ing_cls, + test_tuntap(skel->progs.ing_xdp, + skel->progs.ing_cls, + NULL, /* tc prio 2 */ skel->maps.test_result); if (test__start_subtest("dynptr_read")) - test_tuntap(skel->progs.ing_xdp, skel->progs.ing_cls_dynptr_read, + test_tuntap(skel->progs.ing_xdp, + skel->progs.ing_cls_dynptr_read, + NULL, /* tc prio 2 */ skel->maps.test_result); if (test__start_subtest("dynptr_slice")) - test_tuntap(skel->progs.ing_xdp, skel->progs.ing_cls_dynptr_slice, + test_tuntap(skel->progs.ing_xdp, + skel->progs.ing_cls_dynptr_slice, + NULL, /* tc prio 2 */ + skel->maps.test_result); + if (test__start_subtest("dynptr_write")) + test_tuntap(skel->progs.ing_xdp_zalloc_meta, + skel->progs.ing_cls_dynptr_write, + skel->progs.ing_cls_dynptr_read, + skel->maps.test_result); + if (test__start_subtest("dynptr_slice_rdwr")) + test_tuntap(skel->progs.ing_xdp_zalloc_meta, + skel->progs.ing_cls_dynptr_slice_rdwr, + skel->progs.ing_cls_dynptr_slice, skel->maps.test_result); test_xdp_meta__destroy(skel); diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c index 0ba647fb1b1d..e7879860f403 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c @@ -60,6 +60,24 @@ int ing_cls_dynptr_read(struct __sk_buff *ctx) return TC_ACT_SHOT; } +/* Write to metadata using bpf_dynptr_write helper */ +SEC("tc") +int ing_cls_dynptr_write(struct __sk_buff *ctx) +{ + struct bpf_dynptr data, meta; + __u8 *src; + + bpf_dynptr_from_skb(ctx, 0, &data); + src = bpf_dynptr_slice(&data, sizeof(struct ethhdr), NULL, META_SIZE); + if (!src) + return TC_ACT_SHOT; + + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + bpf_dynptr_write(&meta, 0, src, META_SIZE, 0); + + return TC_ACT_UNSPEC; /* pass */ +} + /* Read from metadata using read-only dynptr slice */ SEC("tc") int ing_cls_dynptr_slice(struct __sk_buff *ctx) @@ -82,6 +100,55 @@ int ing_cls_dynptr_slice(struct __sk_buff *ctx) return TC_ACT_SHOT; } +/* Write to metadata using writeable dynptr slice */ +SEC("tc") +int ing_cls_dynptr_slice_rdwr(struct __sk_buff *ctx) +{ + struct bpf_dynptr data, meta; + __u8 *src, *dst; + + bpf_dynptr_from_skb(ctx, 0, &data); + src = bpf_dynptr_slice(&data, sizeof(struct ethhdr), NULL, META_SIZE); + if (!src) + return TC_ACT_SHOT; + + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + dst = bpf_dynptr_slice_rdwr(&meta, 0, NULL, META_SIZE); + if (!dst) + return TC_ACT_SHOT; + + __builtin_memcpy(dst, src, META_SIZE); + + return TC_ACT_UNSPEC; /* pass */ +} + +/* Reserve and clear space for metadata but don't populate it */ +SEC("xdp") +int ing_xdp_zalloc_meta(struct xdp_md *ctx) +{ + struct ethhdr *eth = ctx_ptr(ctx, data); + __u8 *meta; + int ret; + + /* Drop any non-test packets */ + if (eth + 1 > ctx_ptr(ctx, data_end)) + return XDP_DROP; + if (eth->h_proto != 0) + return XDP_DROP; + + ret = bpf_xdp_adjust_meta(ctx, -META_SIZE); + if (ret < 0) + return XDP_DROP; + + meta = ctx_ptr(ctx, data_meta); + if (meta + META_SIZE > ctx_ptr(ctx, data)) + return XDP_DROP; + + __builtin_memset(meta, 0, META_SIZE); + + return XDP_PASS; +} + SEC("xdp") int ing_xdp(struct xdp_md *ctx) { -- cgit v1.2.3 From bd1b51b319788cbc5769a44f0081a1cb012f8ae4 Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Thu, 14 Aug 2025 11:59:34 +0200 Subject: selftests/bpf: Cover read/write to skb metadata at an offset Exercise r/w access to skb metadata through an offset-adjusted dynptr, read/write helper with an offset argument, and a slice starting at an offset. Also check for the expected errors when the offset is out of bounds. Signed-off-by: Jakub Sitnicki Signed-off-by: Martin KaFai Lau Reviewed-by: Jesse Brandeburg Acked-by: Eduard Zingerman Link: https://patch.msgid.link/20250814-skb-metadata-thru-dynptr-v7-8-8a39e636e0fb@cloudflare.com --- .../bpf/prog_tests/xdp_context_test_run.c | 10 ++ tools/testing/selftests/bpf/progs/test_xdp_meta.c | 119 +++++++++++++++++++++ 2 files changed, 129 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c index 79c4c58276e6..24a7b4b7fdb6 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c @@ -375,6 +375,16 @@ void test_xdp_context_tuntap(void) skel->progs.ing_cls_dynptr_slice_rdwr, skel->progs.ing_cls_dynptr_slice, skel->maps.test_result); + if (test__start_subtest("dynptr_offset")) + test_tuntap(skel->progs.ing_xdp_zalloc_meta, + skel->progs.ing_cls_dynptr_offset_wr, + skel->progs.ing_cls_dynptr_offset_rd, + skel->maps.test_result); + if (test__start_subtest("dynptr_offset_oob")) + test_tuntap(skel->progs.ing_xdp, + skel->progs.ing_cls_dynptr_offset_oob, + skel->progs.ing_cls, + skel->maps.test_result); test_xdp_meta__destroy(skel); } diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c index e7879860f403..ee3d8adf5e9c 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c @@ -1,5 +1,6 @@ #include #include +#include #include #include @@ -122,6 +123,124 @@ int ing_cls_dynptr_slice_rdwr(struct __sk_buff *ctx) return TC_ACT_UNSPEC; /* pass */ } +/* Read skb metadata in chunks from various offsets in different ways. */ +SEC("tc") +int ing_cls_dynptr_offset_rd(struct __sk_buff *ctx) +{ + struct bpf_dynptr meta; + const __u32 chunk_len = META_SIZE / 4; + const __u32 zero = 0; + __u8 *dst, *src; + + dst = bpf_map_lookup_elem(&test_result, &zero); + if (!dst) + return TC_ACT_SHOT; + + /* 1. Regular read */ + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + bpf_dynptr_read(dst, chunk_len, &meta, 0, 0); + dst += chunk_len; + + /* 2. Read from an offset-adjusted dynptr */ + bpf_dynptr_adjust(&meta, chunk_len, bpf_dynptr_size(&meta)); + bpf_dynptr_read(dst, chunk_len, &meta, 0, 0); + dst += chunk_len; + + /* 3. Read at an offset */ + bpf_dynptr_read(dst, chunk_len, &meta, chunk_len, 0); + dst += chunk_len; + + /* 4. Read from a slice starting at an offset */ + src = bpf_dynptr_slice(&meta, 2 * chunk_len, NULL, chunk_len); + if (!src) + return TC_ACT_SHOT; + __builtin_memcpy(dst, src, chunk_len); + + return TC_ACT_SHOT; +} + +/* Write skb metadata in chunks at various offsets in different ways. */ +SEC("tc") +int ing_cls_dynptr_offset_wr(struct __sk_buff *ctx) +{ + const __u32 chunk_len = META_SIZE / 4; + __u8 payload[META_SIZE]; + struct bpf_dynptr meta; + __u8 *dst, *src; + + bpf_skb_load_bytes(ctx, sizeof(struct ethhdr), payload, sizeof(payload)); + src = payload; + + /* 1. Regular write */ + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + bpf_dynptr_write(&meta, 0, src, chunk_len, 0); + src += chunk_len; + + /* 2. Write to an offset-adjusted dynptr */ + bpf_dynptr_adjust(&meta, chunk_len, bpf_dynptr_size(&meta)); + bpf_dynptr_write(&meta, 0, src, chunk_len, 0); + src += chunk_len; + + /* 3. Write at an offset */ + bpf_dynptr_write(&meta, chunk_len, src, chunk_len, 0); + src += chunk_len; + + /* 4. Write to a slice starting at an offset */ + dst = bpf_dynptr_slice_rdwr(&meta, 2 * chunk_len, NULL, chunk_len); + if (!dst) + return TC_ACT_SHOT; + __builtin_memcpy(dst, src, chunk_len); + + return TC_ACT_UNSPEC; /* pass */ +} + +/* Pass an OOB offset to dynptr read, write, adjust, slice. */ +SEC("tc") +int ing_cls_dynptr_offset_oob(struct __sk_buff *ctx) +{ + struct bpf_dynptr meta; + __u8 md, *p; + int err; + + err = bpf_dynptr_from_skb_meta(ctx, 0, &meta); + if (err) + goto fail; + + /* read offset OOB */ + err = bpf_dynptr_read(&md, sizeof(md), &meta, META_SIZE, 0); + if (err != -E2BIG) + goto fail; + + /* write offset OOB */ + err = bpf_dynptr_write(&meta, META_SIZE, &md, sizeof(md), 0); + if (err != -E2BIG) + goto fail; + + /* adjust end offset OOB */ + err = bpf_dynptr_adjust(&meta, 0, META_SIZE + 1); + if (err != -ERANGE) + goto fail; + + /* adjust start offset OOB */ + err = bpf_dynptr_adjust(&meta, META_SIZE + 1, META_SIZE + 1); + if (err != -ERANGE) + goto fail; + + /* slice offset OOB */ + p = bpf_dynptr_slice(&meta, META_SIZE, NULL, sizeof(*p)); + if (p) + goto fail; + + /* slice rdwr offset OOB */ + p = bpf_dynptr_slice_rdwr(&meta, META_SIZE, NULL, sizeof(*p)); + if (p) + goto fail; + + return TC_ACT_UNSPEC; +fail: + return TC_ACT_SHOT; +} + /* Reserve and clear space for metadata but don't populate it */ SEC("xdp") int ing_xdp_zalloc_meta(struct xdp_md *ctx) -- cgit v1.2.3 From 403fae59781fddc699af761f38ed024d3245096b Mon Sep 17 00:00:00 2001 From: Jakub Sitnicki Date: Thu, 14 Aug 2025 11:59:35 +0200 Subject: selftests/bpf: Cover metadata access from a modified skb clone Demonstrate that, when processing an skb clone, the metadata gets truncated if the program contains a direct write to either the payload or the metadata, due to an implicit unclone in the prologue, and otherwise the dynptr to the metadata is limited to being read-only. Signed-off-by: Jakub Sitnicki Signed-off-by: Martin KaFai Lau Link: https://patch.msgid.link/20250814-skb-metadata-thru-dynptr-v7-9-8a39e636e0fb@cloudflare.com --- tools/testing/selftests/bpf/config | 1 + .../bpf/prog_tests/xdp_context_test_run.c | 123 +++++++++++-- tools/testing/selftests/bpf/progs/test_xdp_meta.c | 191 +++++++++++++++++++++ 3 files changed, 305 insertions(+), 10 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 8916ab814a3e..70b28c1e653e 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -61,6 +61,7 @@ CONFIG_MPLS_IPTUNNEL=y CONFIG_MPLS_ROUTING=y CONFIG_MPTCP=y CONFIG_NET_ACT_GACT=y +CONFIG_NET_ACT_MIRRED=y CONFIG_NET_ACT_SKBMOD=y CONFIG_NET_CLS=y CONFIG_NET_CLS_ACT=y diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c index 24a7b4b7fdb6..46e0730174ed 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c @@ -9,6 +9,7 @@ #define TX_NETNS "xdp_context_tx" #define RX_NETNS "xdp_context_rx" #define TAP_NAME "tap0" +#define DUMMY_NAME "dum0" #define TAP_NETNS "xdp_context_tuntap" #define TEST_PAYLOAD_LEN 32 @@ -156,6 +157,22 @@ err: return -1; } +static int write_test_packet(int tap_fd) +{ + __u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN]; + int n; + + /* The ethernet header doesn't need to be valid for this test */ + memset(packet, 0, sizeof(struct ethhdr)); + memcpy(packet + sizeof(struct ethhdr), test_payload, TEST_PAYLOAD_LEN); + + n = write(tap_fd, packet, sizeof(packet)); + if (!ASSERT_EQ(n, sizeof(packet), "write packet")) + return -1; + + return 0; +} + static void assert_test_result(const struct bpf_map *result_map) { int err; @@ -276,7 +293,6 @@ static void test_tuntap(struct bpf_program *xdp_prog, LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS); LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1); struct netns_obj *ns = NULL; - __u8 packet[sizeof(struct ethhdr) + TEST_PAYLOAD_LEN]; int tap_fd = -1; int tap_ifindex; int ret; @@ -322,19 +338,82 @@ static void test_tuntap(struct bpf_program *xdp_prog, if (!ASSERT_GE(ret, 0, "bpf_xdp_attach")) goto close; - /* The ethernet header is not relevant for this test and doesn't need to - * be meaningful. - */ - struct ethhdr eth = { 0 }; + ret = write_test_packet(tap_fd); + if (!ASSERT_OK(ret, "write_test_packet")) + goto close; - memcpy(packet, ð, sizeof(eth)); - memcpy(packet + sizeof(eth), test_payload, TEST_PAYLOAD_LEN); + assert_test_result(result_map); + +close: + if (tap_fd >= 0) + close(tap_fd); + netns_free(ns); +} + +/* Write a packet to a tap dev and copy it to ingress of a dummy dev */ +static void test_tuntap_mirred(struct bpf_program *xdp_prog, + struct bpf_program *tc_prog, + bool *test_pass) +{ + LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS); + LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1); + struct netns_obj *ns = NULL; + int dummy_ifindex; + int tap_fd = -1; + int tap_ifindex; + int ret; - ret = write(tap_fd, packet, sizeof(packet)); - if (!ASSERT_EQ(ret, sizeof(packet), "write packet")) + *test_pass = false; + + ns = netns_new(TAP_NETNS, true); + if (!ASSERT_OK_PTR(ns, "netns_new")) + return; + + /* Setup dummy interface */ + SYS(close, "ip link add name " DUMMY_NAME " type dummy"); + SYS(close, "ip link set dev " DUMMY_NAME " up"); + + dummy_ifindex = if_nametoindex(DUMMY_NAME); + if (!ASSERT_GE(dummy_ifindex, 0, "if_nametoindex")) goto close; - assert_test_result(result_map); + tc_hook.ifindex = dummy_ifindex; + ret = bpf_tc_hook_create(&tc_hook); + if (!ASSERT_OK(ret, "bpf_tc_hook_create")) + goto close; + + tc_opts.prog_fd = bpf_program__fd(tc_prog); + ret = bpf_tc_attach(&tc_hook, &tc_opts); + if (!ASSERT_OK(ret, "bpf_tc_attach")) + goto close; + + /* Setup TAP interface */ + tap_fd = open_tuntap(TAP_NAME, true); + if (!ASSERT_GE(tap_fd, 0, "open_tuntap")) + goto close; + + SYS(close, "ip link set dev " TAP_NAME " up"); + + tap_ifindex = if_nametoindex(TAP_NAME); + if (!ASSERT_GE(tap_ifindex, 0, "if_nametoindex")) + goto close; + + ret = bpf_xdp_attach(tap_ifindex, bpf_program__fd(xdp_prog), 0, NULL); + if (!ASSERT_GE(ret, 0, "bpf_xdp_attach")) + goto close; + + /* Copy all packets received from TAP to dummy ingress */ + SYS(close, "tc qdisc add dev " TAP_NAME " clsact"); + SYS(close, "tc filter add dev " TAP_NAME " ingress " + "protocol all matchall " + "action mirred ingress mirror dev " DUMMY_NAME); + + /* Receive a packet on TAP */ + ret = write_test_packet(tap_fd); + if (!ASSERT_OK(ret, "write_test_packet")) + goto close; + + ASSERT_TRUE(*test_pass, "test_pass"); close: if (tap_fd >= 0) @@ -385,6 +464,30 @@ void test_xdp_context_tuntap(void) skel->progs.ing_cls_dynptr_offset_oob, skel->progs.ing_cls, skel->maps.test_result); + if (test__start_subtest("clone_data_meta_empty_on_data_write")) + test_tuntap_mirred(skel->progs.ing_xdp, + skel->progs.clone_data_meta_empty_on_data_write, + &skel->bss->test_pass); + if (test__start_subtest("clone_data_meta_empty_on_meta_write")) + test_tuntap_mirred(skel->progs.ing_xdp, + skel->progs.clone_data_meta_empty_on_meta_write, + &skel->bss->test_pass); + if (test__start_subtest("clone_dynptr_empty_on_data_slice_write")) + test_tuntap_mirred(skel->progs.ing_xdp, + skel->progs.clone_dynptr_empty_on_data_slice_write, + &skel->bss->test_pass); + if (test__start_subtest("clone_dynptr_empty_on_meta_slice_write")) + test_tuntap_mirred(skel->progs.ing_xdp, + skel->progs.clone_dynptr_empty_on_meta_slice_write, + &skel->bss->test_pass); + if (test__start_subtest("clone_dynptr_rdonly_before_data_dynptr_write")) + test_tuntap_mirred(skel->progs.ing_xdp, + skel->progs.clone_dynptr_rdonly_before_data_dynptr_write, + &skel->bss->test_pass); + if (test__start_subtest("clone_dynptr_rdonly_before_meta_dynptr_write")) + test_tuntap_mirred(skel->progs.ing_xdp, + skel->progs.clone_dynptr_rdonly_before_meta_dynptr_write, + &skel->bss->test_pass); test_xdp_meta__destroy(skel); } diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c index ee3d8adf5e9c..d79cb74b571e 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c @@ -26,6 +26,8 @@ struct { __uint(value_size, META_SIZE); } test_result SEC(".maps"); +bool test_pass; + SEC("tc") int ing_cls(struct __sk_buff *ctx) { @@ -301,4 +303,193 @@ int ing_xdp(struct xdp_md *ctx) return XDP_PASS; } +/* + * Check that skb->data_meta..skb->data is empty if prog writes to packet + * _payload_ using packet pointers. Applies only to cloned skbs. + */ +SEC("tc") +int clone_data_meta_empty_on_data_write(struct __sk_buff *ctx) +{ + struct ethhdr *eth = ctx_ptr(ctx, data); + + if (eth + 1 > ctx_ptr(ctx, data_end)) + goto out; + /* Ignore non-test packets */ + if (eth->h_proto != 0) + goto out; + + /* Expect no metadata */ + if (ctx->data_meta != ctx->data) + goto out; + + /* Packet write to trigger unclone in prologue */ + eth->h_proto = 42; + + test_pass = true; +out: + return TC_ACT_SHOT; +} + +/* + * Check that skb->data_meta..skb->data is empty if prog writes to packet + * _metadata_ using packet pointers. Applies only to cloned skbs. + */ +SEC("tc") +int clone_data_meta_empty_on_meta_write(struct __sk_buff *ctx) +{ + struct ethhdr *eth = ctx_ptr(ctx, data); + __u8 *md = ctx_ptr(ctx, data_meta); + + if (eth + 1 > ctx_ptr(ctx, data_end)) + goto out; + /* Ignore non-test packets */ + if (eth->h_proto != 0) + goto out; + + if (md + 1 > ctx_ptr(ctx, data)) { + /* Expect no metadata */ + test_pass = true; + } else { + /* Metadata write to trigger unclone in prologue */ + *md = 42; + } +out: + return TC_ACT_SHOT; +} + +/* + * Check that skb_meta dynptr is writable but empty if prog writes to packet + * _payload_ using a dynptr slice. Applies only to cloned skbs. + */ +SEC("tc") +int clone_dynptr_empty_on_data_slice_write(struct __sk_buff *ctx) +{ + struct bpf_dynptr data, meta; + struct ethhdr *eth; + + bpf_dynptr_from_skb(ctx, 0, &data); + eth = bpf_dynptr_slice_rdwr(&data, 0, NULL, sizeof(*eth)); + if (!eth) + goto out; + /* Ignore non-test packets */ + if (eth->h_proto != 0) + goto out; + + /* Expect no metadata */ + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + if (bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) > 0) + goto out; + + /* Packet write to trigger unclone in prologue */ + eth->h_proto = 42; + + test_pass = true; +out: + return TC_ACT_SHOT; +} + +/* + * Check that skb_meta dynptr is writable but empty if prog writes to packet + * _metadata_ using a dynptr slice. Applies only to cloned skbs. + */ +SEC("tc") +int clone_dynptr_empty_on_meta_slice_write(struct __sk_buff *ctx) +{ + struct bpf_dynptr data, meta; + const struct ethhdr *eth; + __u8 *md; + + bpf_dynptr_from_skb(ctx, 0, &data); + eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth)); + if (!eth) + goto out; + /* Ignore non-test packets */ + if (eth->h_proto != 0) + goto out; + + /* Expect no metadata */ + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + if (bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) > 0) + goto out; + + /* Metadata write to trigger unclone in prologue */ + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + md = bpf_dynptr_slice_rdwr(&meta, 0, NULL, sizeof(*md)); + if (md) + *md = 42; + + test_pass = true; +out: + return TC_ACT_SHOT; +} + +/* + * Check that skb_meta dynptr is read-only before prog writes to packet payload + * using dynptr_write helper. Applies only to cloned skbs. + */ +SEC("tc") +int clone_dynptr_rdonly_before_data_dynptr_write(struct __sk_buff *ctx) +{ + struct bpf_dynptr data, meta; + const struct ethhdr *eth; + + bpf_dynptr_from_skb(ctx, 0, &data); + eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth)); + if (!eth) + goto out; + /* Ignore non-test packets */ + if (eth->h_proto != 0) + goto out; + + /* Expect read-only metadata before unclone */ + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + if (!bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) != META_SIZE) + goto out; + + /* Helper write to payload will unclone the packet */ + bpf_dynptr_write(&data, offsetof(struct ethhdr, h_proto), "x", 1, 0); + + /* Expect no metadata after unclone */ + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + if (bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) != 0) + goto out; + + test_pass = true; +out: + return TC_ACT_SHOT; +} + +/* + * Check that skb_meta dynptr is read-only if prog writes to packet + * metadata using dynptr_write helper. Applies only to cloned skbs. + */ +SEC("tc") +int clone_dynptr_rdonly_before_meta_dynptr_write(struct __sk_buff *ctx) +{ + struct bpf_dynptr data, meta; + const struct ethhdr *eth; + + bpf_dynptr_from_skb(ctx, 0, &data); + eth = bpf_dynptr_slice(&data, 0, NULL, sizeof(*eth)); + if (!eth) + goto out; + /* Ignore non-test packets */ + if (eth->h_proto != 0) + goto out; + + /* Expect read-only metadata */ + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + if (!bpf_dynptr_is_rdonly(&meta) || bpf_dynptr_size(&meta) != META_SIZE) + goto out; + + /* Metadata write. Expect failure. */ + bpf_dynptr_from_skb_meta(ctx, 0, &meta); + if (bpf_dynptr_write(&meta, 0, "x", 1, 0) != -EINVAL) + goto out; + + test_pass = true; +out: + return TC_ACT_SHOT; +} + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 452690be7de2f91cc0de68cb9e95252875b33503 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Fri, 15 Aug 2025 19:28:21 +0200 Subject: selftests: mptcp: pm: check flush doesn't reset limits This modification is linked to the parent commit where the received ADD_ADDR limit was accidentally reset when the endpoints were flushed. To validate that, the test is now flushing endpoints after having set new limits, and before checking them. The 'Fixes' tag here below is the same as the one from the previous commit: this patch here is not fixing anything wrong in the selftests, but it validates the previous fix for an issue introduced by this commit ID. Fixes: 01cacb00b35c ("mptcp: add netlink-based PM") Cc: stable@vger.kernel.org Reviewed-by: Mat Martineau Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250815-net-mptcp-misc-fixes-6-17-rc2-v1-3-521fe9957892@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/pm_netlink.sh | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index 2e6648a2b2c0..ac7ec6f94023 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -198,6 +198,7 @@ set_limits 1 9 2>/dev/null check "get_limits" "${default_limits}" "subflows above hard limit" set_limits 8 8 +flush_endpoint ## to make sure it doesn't affect the limits check "get_limits" "$(format_limits 8 8)" "set limits" flush_endpoint -- cgit v1.2.3 From f92199f551e617fae028c5c5905ddd63e3616e18 Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 15 Aug 2025 19:28:24 +0200 Subject: selftests: mptcp: disable add_addr retrans in endpoint_tests To prevent test instability in the "delete re-add signal" test caused by ADD_ADDR retransmissions, disable retransmissions for this test by setting net.mptcp.add_addr_timeout to 0. Suggested-by: Matthieu Baerts Signed-off-by: Geliang Tang Reviewed-by: Matthieu Baerts (NGI0) Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250815-net-mptcp-misc-fixes-6-17-rc2-v1-6-521fe9957892@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index b8af65373b3a..82cae37d9c20 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -3842,6 +3842,7 @@ endpoint_tests() # remove and re-add if reset_with_events "delete re-add signal" && mptcp_lib_kallsyms_has "subflow_rebuild_header$"; then + ip netns exec $ns1 sysctl -q net.mptcp.add_addr_timeout=0 pm_nl_set_limits $ns1 0 3 pm_nl_set_limits $ns2 3 3 pm_nl_add_endpoint $ns1 10.0.2.1 id 1 flags signal -- cgit v1.2.3 From 2eefbed30d46d5e68593baf6b52923e00e7678af Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Fri, 15 Aug 2025 19:28:25 +0200 Subject: selftests: mptcp: connect: fix C23 extension warning GCC was complaining about the new label: mptcp_connect.c:187:2: warning: label followed by a declaration is a C23 extension [-Wc23-extensions] 187 | int err = getaddrinfo(node, service, hints, res); | ^ Simply declare 'err' before the label to avoid this warning. Fixes: a862771d1aa4 ("selftests: mptcp: use IPPROTO_MPTCP for getaddrinfo") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250815-net-mptcp-misc-fixes-6-17-rc2-v1-7-521fe9957892@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_connect.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index ac1349c4b9e5..4f07ac9fa207 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -183,9 +183,10 @@ static void xgetaddrinfo(const char *node, const char *service, struct addrinfo *hints, struct addrinfo **res) { -again: - int err = getaddrinfo(node, service, hints, res); + int err; +again: + err = getaddrinfo(node, service, hints, res); if (err) { const char *errstr; -- cgit v1.2.3 From 3259889fd3c0cc165b7e9ee375c789875dd32326 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Fri, 15 Aug 2025 19:28:26 +0200 Subject: selftests: mptcp: sockopt: fix C23 extension warning GCC was complaining about the new label: mptcp_inq.c:79:2: warning: label followed by a declaration is a C23 extension [-Wc23-extensions] 79 | int err = getaddrinfo(node, service, hints, res); | ^ mptcp_sockopt.c:166:2: warning: label followed by a declaration is a C23 extension [-Wc23-extensions] 166 | int err = getaddrinfo(node, service, hints, res); | ^ Simply declare 'err' before the label to avoid this warning. Fixes: dd367e81b79a ("selftests: mptcp: sockopt: use IPPROTO_MPTCP for getaddrinfo") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20250815-net-mptcp-misc-fixes-6-17-rc2-v1-8-521fe9957892@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_inq.c | 5 +++-- tools/testing/selftests/net/mptcp/mptcp_sockopt.c | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/mptcp/mptcp_inq.c b/tools/testing/selftests/net/mptcp/mptcp_inq.c index 3cf1e2a612ce..f3bcaa48df8f 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_inq.c +++ b/tools/testing/selftests/net/mptcp/mptcp_inq.c @@ -75,9 +75,10 @@ static void xgetaddrinfo(const char *node, const char *service, struct addrinfo *hints, struct addrinfo **res) { -again: - int err = getaddrinfo(node, service, hints, res); + int err; +again: + err = getaddrinfo(node, service, hints, res); if (err) { const char *errstr; diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c index 9934a68df237..e934dd26a59d 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c @@ -162,9 +162,10 @@ static void xgetaddrinfo(const char *node, const char *service, struct addrinfo *hints, struct addrinfo **res) { -again: - int err = getaddrinfo(node, service, hints, res); + int err; +again: + err = getaddrinfo(node, service, hints, res); if (err) { const char *errstr; -- cgit v1.2.3 From f37b55ded8ed35424ebb91a4d012527071e1f601 Mon Sep 17 00:00:00 2001 From: Li Li Date: Sun, 27 Jul 2025 18:29:07 +0000 Subject: binder: add transaction_report feature entry Add "transaction_report" to the binderfs feature list, to help userspace determine if the "BINDER_CMD_REPORT" generic netlink api is supported by the binder driver. Signed-off-by: Li Li Signed-off-by: Carlos Llamas Link: https://lore.kernel.org/r/20250727182932.2499194-5-cmllamas@google.com Signed-off-by: Greg Kroah-Hartman --- drivers/android/binderfs.c | 8 ++++++++ tools/testing/selftests/filesystems/binderfs/binderfs_test.c | 1 + 2 files changed, 9 insertions(+) (limited to 'tools/testing') diff --git a/drivers/android/binderfs.c b/drivers/android/binderfs.c index 0d9d95a7fb60..be8e64eb39ec 100644 --- a/drivers/android/binderfs.c +++ b/drivers/android/binderfs.c @@ -59,6 +59,7 @@ struct binder_features { bool oneway_spam_detection; bool extended_error; bool freeze_notification; + bool transaction_report; }; static const struct constant_table binderfs_param_stats[] = { @@ -76,6 +77,7 @@ static struct binder_features binder_features = { .oneway_spam_detection = true, .extended_error = true, .freeze_notification = true, + .transaction_report = true, }; static inline struct binderfs_info *BINDERFS_SB(const struct super_block *sb) @@ -601,6 +603,12 @@ static int init_binder_features(struct super_block *sb) if (IS_ERR(dentry)) return PTR_ERR(dentry); + dentry = binderfs_create_file(dir, "transaction_report", + &binder_features_fops, + &binder_features.transaction_report); + if (IS_ERR(dentry)) + return PTR_ERR(dentry); + return 0; } diff --git a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c index 81db85a5cc16..39a68078a79b 100644 --- a/tools/testing/selftests/filesystems/binderfs/binderfs_test.c +++ b/tools/testing/selftests/filesystems/binderfs/binderfs_test.c @@ -65,6 +65,7 @@ static int __do_binderfs_test(struct __test_metadata *_metadata) "oneway_spam_detection", "extended_error", "freeze_notification", + "transaction_report", }; change_mountns(_metadata); -- cgit v1.2.3 From 0283b8f134e499a51bb972403eb47cda6b960f7c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Thu, 14 Aug 2025 18:33:14 -0700 Subject: selftests: drv-net: test the napi init state Test that threaded state (in the persistent NAPI config) gets updated even when NAPI with given ID is not allocated at the time. This test is validating commit ccba9f6baa90 ("net: update NAPI threaded config even for disabled NAPIs"). Signed-off-by: Jakub Kicinski Reviewed-by: Joe Damato Link: https://patch.msgid.link/20250815013314.2237512-1-kuba@kernel.org Signed-off-by: Paolo Abeni --- .../testing/selftests/drivers/net/napi_threaded.py | 31 +++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/drivers/net/napi_threaded.py b/tools/testing/selftests/drivers/net/napi_threaded.py index 9699a100a87d..ed66efa481b0 100755 --- a/tools/testing/selftests/drivers/net/napi_threaded.py +++ b/tools/testing/selftests/drivers/net/napi_threaded.py @@ -38,6 +38,34 @@ def _setup_deferred_cleanup(cfg) -> None: return combined +def napi_init(cfg, nl) -> None: + """ + Test that threaded state (in the persistent NAPI config) gets updated + even when NAPI with given ID is not allocated at the time. + """ + + qcnt = _setup_deferred_cleanup(cfg) + + _set_threaded_state(cfg, 1) + cmd(f"ethtool -L {cfg.ifname} combined 1") + _set_threaded_state(cfg, 0) + cmd(f"ethtool -L {cfg.ifname} combined {qcnt}") + + napis = nl.napi_get({'ifindex': cfg.ifindex}, dump=True) + for napi in napis: + ksft_eq(napi['threaded'], 'disabled') + ksft_eq(napi.get('pid'), None) + + cmd(f"ethtool -L {cfg.ifname} combined 1") + _set_threaded_state(cfg, 1) + cmd(f"ethtool -L {cfg.ifname} combined {qcnt}") + + napis = nl.napi_get({'ifindex': cfg.ifindex}, dump=True) + for napi in napis: + ksft_eq(napi['threaded'], 'enabled') + ksft_ne(napi.get('pid'), None) + + def enable_dev_threaded_disable_napi_threaded(cfg, nl) -> None: """ Test that when napi threaded is enabled at device level and @@ -103,7 +131,8 @@ def main() -> None: """ Ksft boiler plate main """ with NetDrvEnv(__file__, queue_count=2) as cfg: - ksft_run([change_num_queues, + ksft_run([napi_init, + change_num_queues, enable_dev_threaded_disable_napi_threaded], args=(cfg, NetdevFamily())) ksft_exit() -- cgit v1.2.3 From a585b87614511e4272305bb5acf406f19f7dcade Mon Sep 17 00:00:00 2001 From: James Houghton Date: Fri, 11 Jul 2025 00:17:42 +0000 Subject: KVM: selftests: Fix signedness issue with vCPU mmap size check Check that the return value of KVM_GET_VCPU_MMAP_SIZE is non-negative before comparing with sizeof(kvm_run). If KVM_GET_VCPU_MMAP_SIZE fails, it will return -1, and `-1 > sizeof(kvm_run)` is true, so the ASSERT passes. There are no other locations in tools/testing/selftests/kvm that make the same mistake. Signed-off-by: James Houghton Link: https://lore.kernel.org/r/20250711001742.1965347-1-jthoughton@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/lib/kvm_util.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index c3f5142b0a54..40e18034ac23 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -24,7 +24,7 @@ uint32_t guest_random_seed; struct guest_random_state guest_rng; static uint32_t last_guest_seed; -static int vcpu_mmap_sz(void); +static size_t vcpu_mmap_sz(void); int __open_path_or_exit(const char *path, int flags, const char *enoent_help) { @@ -1321,14 +1321,14 @@ void vm_guest_mem_fallocate(struct kvm_vm *vm, uint64_t base, uint64_t size, } /* Returns the size of a vCPU's kvm_run structure. */ -static int vcpu_mmap_sz(void) +static size_t vcpu_mmap_sz(void) { int dev_fd, ret; dev_fd = open_kvm_dev_path_or_exit(); ret = ioctl(dev_fd, KVM_GET_VCPU_MMAP_SIZE, NULL); - TEST_ASSERT(ret >= sizeof(struct kvm_run), + TEST_ASSERT(ret >= 0 && ret >= sizeof(struct kvm_run), KVM_IOCTL_ERROR(KVM_GET_VCPU_MMAP_SIZE, ret)); close(dev_fd); @@ -1369,7 +1369,7 @@ struct kvm_vcpu *__vm_vcpu_add(struct kvm_vm *vm, uint32_t vcpu_id) TEST_ASSERT_VM_VCPU_IOCTL(vcpu->fd >= 0, KVM_CREATE_VCPU, vcpu->fd, vm); TEST_ASSERT(vcpu_mmap_sz() >= sizeof(*vcpu->run), "vcpu mmap size " - "smaller than expected, vcpu_mmap_sz: %i expected_min: %zi", + "smaller than expected, vcpu_mmap_sz: %zi expected_min: %zi", vcpu_mmap_sz(), sizeof(*vcpu->run)); vcpu->run = (struct kvm_run *) mmap(NULL, vcpu_mmap_sz(), PROT_READ | PROT_WRITE, MAP_SHARED, vcpu->fd, 0); -- cgit v1.2.3 From e2bcf62a2e78f8d7e95485c0347bccfba3c5e459 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Wed, 6 Aug 2025 15:51:59 -0700 Subject: KVM: selftests: Move Intel and AMD module param helpers to x86/processor.h Move the x86 specific helpers for getting kvm_{amd,intel} module params to x86 where they belong. Expose the module-agnostic helpers globally, there is nothing secret about the logic. Link: https://lore.kernel.org/r/20250806225159.1687326-1-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/include/kvm_util.h | 17 +++++++---- .../testing/selftests/kvm/include/x86/processor.h | 20 +++++++++++++ tools/testing/selftests/kvm/lib/kvm_util.c | 34 ++-------------------- 3 files changed, 33 insertions(+), 38 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/kvm/include/kvm_util.h b/tools/testing/selftests/kvm/include/kvm_util.h index 23a506d7eca3..652ac01e1adc 100644 --- a/tools/testing/selftests/kvm/include/kvm_util.h +++ b/tools/testing/selftests/kvm/include/kvm_util.h @@ -260,13 +260,18 @@ int __open_path_or_exit(const char *path, int flags, const char *enoent_help); int open_path_or_exit(const char *path, int flags); int open_kvm_dev_path_or_exit(void); -bool get_kvm_param_bool(const char *param); -bool get_kvm_intel_param_bool(const char *param); -bool get_kvm_amd_param_bool(const char *param); +int kvm_get_module_param_integer(const char *module_name, const char *param); +bool kvm_get_module_param_bool(const char *module_name, const char *param); -int get_kvm_param_integer(const char *param); -int get_kvm_intel_param_integer(const char *param); -int get_kvm_amd_param_integer(const char *param); +static inline bool get_kvm_param_bool(const char *param) +{ + return kvm_get_module_param_bool("kvm", param); +} + +static inline int get_kvm_param_integer(const char *param) +{ + return kvm_get_module_param_integer("kvm", param); +} unsigned int kvm_check_cap(long cap); diff --git a/tools/testing/selftests/kvm/include/x86/processor.h b/tools/testing/selftests/kvm/include/x86/processor.h index 2efb05c2f2fb..488d516c4f6f 100644 --- a/tools/testing/selftests/kvm/include/x86/processor.h +++ b/tools/testing/selftests/kvm/include/x86/processor.h @@ -1314,6 +1314,26 @@ static inline uint8_t xsetbv_safe(uint32_t index, uint64_t value) bool kvm_is_tdp_enabled(void); +static inline bool get_kvm_intel_param_bool(const char *param) +{ + return kvm_get_module_param_bool("kvm_intel", param); +} + +static inline bool get_kvm_amd_param_bool(const char *param) +{ + return kvm_get_module_param_bool("kvm_amd", param); +} + +static inline int get_kvm_intel_param_integer(const char *param) +{ + return kvm_get_module_param_integer("kvm_intel", param); +} + +static inline int get_kvm_amd_param_integer(const char *param) +{ + return kvm_get_module_param_integer("kvm_amd", param); +} + static inline bool kvm_is_pmu_enabled(void) { return get_kvm_param_bool("enable_pmu"); diff --git a/tools/testing/selftests/kvm/lib/kvm_util.c b/tools/testing/selftests/kvm/lib/kvm_util.c index 40e18034ac23..98cb8073c82b 100644 --- a/tools/testing/selftests/kvm/lib/kvm_util.c +++ b/tools/testing/selftests/kvm/lib/kvm_util.c @@ -95,7 +95,7 @@ static ssize_t get_module_param(const char *module_name, const char *param, return bytes_read; } -static int get_module_param_integer(const char *module_name, const char *param) +int kvm_get_module_param_integer(const char *module_name, const char *param) { /* * 16 bytes to hold a 64-bit value (1 byte per char), 1 byte for the @@ -119,7 +119,7 @@ static int get_module_param_integer(const char *module_name, const char *param) return atoi_paranoid(value); } -static bool get_module_param_bool(const char *module_name, const char *param) +bool kvm_get_module_param_bool(const char *module_name, const char *param) { char value; ssize_t r; @@ -135,36 +135,6 @@ static bool get_module_param_bool(const char *module_name, const char *param) TEST_FAIL("Unrecognized value '%c' for boolean module param", value); } -bool get_kvm_param_bool(const char *param) -{ - return get_module_param_bool("kvm", param); -} - -bool get_kvm_intel_param_bool(const char *param) -{ - return get_module_param_bool("kvm_intel", param); -} - -bool get_kvm_amd_param_bool(const char *param) -{ - return get_module_param_bool("kvm_amd", param); -} - -int get_kvm_param_integer(const char *param) -{ - return get_module_param_integer("kvm", param); -} - -int get_kvm_intel_param_integer(const char *param) -{ - return get_module_param_integer("kvm_intel", param); -} - -int get_kvm_amd_param_integer(const char *param) -{ - return get_module_param_integer("kvm_amd", param); -} - /* * Capability * -- cgit v1.2.3 From 05f297c3e39f62f579458a59ab1b1c8bf3fc1c51 Mon Sep 17 00:00:00 2001 From: Gopi Krishna Menon Date: Wed, 13 Aug 2025 21:17:51 +0530 Subject: KVM: selftests: fix minor typo in cpumodel_subfuncs Specifically, fix spelling of "available" in main function. Signed-off-by: Gopi Krishna Menon Link: https://lore.kernel.org/r/20250813154751.5725-1-krishnagopi487@gmail.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c b/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c index 27255880dabd..aded795d42be 100644 --- a/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c +++ b/tools/testing/selftests/kvm/s390/cpumodel_subfuncs_test.c @@ -291,7 +291,7 @@ int main(int argc, char *argv[]) ksft_test_result_pass("%s\n", testlist[idx].subfunc_name); free(array); } else { - ksft_test_result_skip("%s feature is not avaialable\n", + ksft_test_result_skip("%s feature is not available\n", testlist[idx].subfunc_name); } } -- cgit v1.2.3 From 9a6a6a3191574a01dcf7a7d9385246d7bc8736bc Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Mon, 11 Aug 2025 06:26:54 +0100 Subject: tools/testing: add linux/args.h header and fix radix, VMA tests Commit 857d18f23ab1 ("cleanup: Introduce ACQUIRE() and ACQUIRE_ERR() for conditional locks") accidentally broke the radix tree, VMA userland tests by including linux/args.h which is not present in the tools/include directory. This patch copies this over and adds an #ifdef block to avoid duplicate __CONCAT declaration in conflict with system headers when we ultimately include this. Link: https://lkml.kernel.org/r/20250811052654.33286-1-lorenzo.stoakes@oracle.com Fixes: 857d18f23ab1 ("cleanup: Introduce ACQUIRE() and ACQUIRE_ERR() for conditional locks") Signed-off-by: Lorenzo Stoakes Cc: Jann Horn Cc: John Hubbard Cc: Liam Howlett Cc: Sidhartha Kumar Cc: Vlastimil Babka Cc: Dan Williams Cc: Peter Zijlstra Signed-off-by: Andrew Morton --- tools/include/linux/args.h | 28 ++++++++++++++++++++++++++++ tools/testing/shared/linux/idr.h | 4 ++++ 2 files changed, 32 insertions(+) create mode 100644 tools/include/linux/args.h (limited to 'tools/testing') diff --git a/tools/include/linux/args.h b/tools/include/linux/args.h new file mode 100644 index 000000000000..2e8e65d975c7 --- /dev/null +++ b/tools/include/linux/args.h @@ -0,0 +1,28 @@ +/* SPDX-License-Identifier: GPL-2.0 */ + +#ifndef _LINUX_ARGS_H +#define _LINUX_ARGS_H + +/* + * How do these macros work? + * + * In __COUNT_ARGS() _0 to _12 are just placeholders from the start + * in order to make sure _n is positioned over the correct number + * from 12 to 0 (depending on X, which is a variadic argument list). + * They serve no purpose other than occupying a position. Since each + * macro parameter must have a distinct identifier, those identifiers + * are as good as any. + * + * In COUNT_ARGS() we use actual integers, so __COUNT_ARGS() returns + * that as _n. + */ + +/* This counts to 15. Any more, it will return 16th argument. */ +#define __COUNT_ARGS(_0, _1, _2, _3, _4, _5, _6, _7, _8, _9, _10, _11, _12, _13, _14, _15, _n, X...) _n +#define COUNT_ARGS(X...) __COUNT_ARGS(, ##X, 15, 14, 13, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) + +/* Concatenate two parameters, but allow them to be expanded beforehand. */ +#define __CONCAT(a, b) a ## b +#define CONCATENATE(a, b) __CONCAT(a, b) + +#endif /* _LINUX_ARGS_H */ diff --git a/tools/testing/shared/linux/idr.h b/tools/testing/shared/linux/idr.h index 4e342f2e37cf..676c5564e33f 100644 --- a/tools/testing/shared/linux/idr.h +++ b/tools/testing/shared/linux/idr.h @@ -1 +1,5 @@ +/* Avoid duplicate definitions due to system headers. */ +#ifdef __CONCAT +#undef __CONCAT +#endif #include "../../../../include/linux/idr.h" -- cgit v1.2.3 From 742d3663a5775cb7b957f4ca2ddb4ccd26badb94 Mon Sep 17 00:00:00 2001 From: Lorenzo Stoakes Date: Sun, 3 Aug 2025 12:11:23 +0100 Subject: selftests/mm: add test for invalid multi VMA operations We can use UFFD to easily assert invalid multi VMA moves, so do so, asserting expected behaviour when VMAs invalid for a multi VMA operation are encountered. We assert both that such operations are not permitted, and that we do not even attempt to move the first VMA under these circumstances. We also assert that we can still move a single VMA regardless. We then assert that a partial failure can occur if the invalid VMA appears later in the range of multiple VMAs, both at the very next VMA, and also at the end of the range. As part of this change, we are using the is_range_valid() helper more aggressively. Therefore, fix a bug where stale buffered data would hang around on success, causing subsequent calls to is_range_valid() to potentially give invalid results. We simply have to fflush() the stream on success to resolve this issue. Link: https://lkml.kernel.org/r/c4fb86dd5ba37610583ad5fc0e0c2306ddf318b9.1754218667.git.lorenzo.stoakes@oracle.com Signed-off-by: Lorenzo Stoakes Cc: David Hildenbrand Cc: Jann Horn Cc: Liam Howlett Cc: Michal Hocko Cc: Mike Rapoport Cc: Suren Baghdasaryan Cc: Vlastimil Babka Signed-off-by: Andrew Morton --- tools/testing/selftests/mm/mremap_test.c | 264 ++++++++++++++++++++++++++++++- 1 file changed, 261 insertions(+), 3 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/mm/mremap_test.c b/tools/testing/selftests/mm/mremap_test.c index fccf9e797a0c..5bd52a951cbd 100644 --- a/tools/testing/selftests/mm/mremap_test.c +++ b/tools/testing/selftests/mm/mremap_test.c @@ -5,10 +5,14 @@ #define _GNU_SOURCE #include +#include +#include #include #include #include +#include #include +#include #include #include @@ -168,6 +172,7 @@ static bool is_range_mapped(FILE *maps_fp, unsigned long start, if (first_val <= start && second_val >= end) { success = true; + fflush(maps_fp); break; } } @@ -175,6 +180,15 @@ static bool is_range_mapped(FILE *maps_fp, unsigned long start, return success; } +/* Check if [ptr, ptr + size) mapped in /proc/self/maps. */ +static bool is_ptr_mapped(FILE *maps_fp, void *ptr, unsigned long size) +{ + unsigned long start = (unsigned long)ptr; + unsigned long end = start + size; + + return is_range_mapped(maps_fp, start, end); +} + /* * Returns the start address of the mapping on success, else returns * NULL on failure. @@ -733,6 +747,249 @@ out: dont_unmap ? " [dontunnmap]" : ""); } +#ifdef __NR_userfaultfd +static void mremap_move_multi_invalid_vmas(FILE *maps_fp, + unsigned long page_size) +{ + char *test_name = "mremap move multiple invalid vmas"; + const size_t size = 10 * page_size; + bool success = true; + char *ptr, *tgt_ptr; + int uffd, err, i; + void *res; + struct uffdio_api api = { + .api = UFFD_API, + .features = UFFD_EVENT_PAGEFAULT, + }; + + uffd = syscall(__NR_userfaultfd, O_NONBLOCK); + if (uffd == -1) { + err = errno; + perror("userfaultfd"); + if (err == EPERM) { + ksft_test_result_skip("%s - missing uffd", test_name); + return; + } + success = false; + goto out; + } + if (ioctl(uffd, UFFDIO_API, &api)) { + perror("ioctl UFFDIO_API"); + success = false; + goto out_close_uffd; + } + + ptr = mmap(NULL, size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON, -1, 0); + if (ptr == MAP_FAILED) { + perror("mmap"); + success = false; + goto out_close_uffd; + } + + tgt_ptr = mmap(NULL, size, PROT_NONE, MAP_PRIVATE | MAP_ANON, -1, 0); + if (tgt_ptr == MAP_FAILED) { + perror("mmap"); + success = false; + goto out_close_uffd; + } + if (munmap(tgt_ptr, size)) { + perror("munmap"); + success = false; + goto out_unmap; + } + + /* + * Unmap so we end up with: + * + * 0 2 4 6 8 10 offset in buffer + * |*| |*| |*| |*| |*| + * |*| |*| |*| |*| |*| + * + * Additionally, register each with UFFD. + */ + for (i = 0; i < 10; i += 2) { + void *unmap_ptr = &ptr[(i + 1) * page_size]; + unsigned long start = (unsigned long)&ptr[i * page_size]; + struct uffdio_register reg = { + .range = { + .start = start, + .len = page_size, + }, + .mode = UFFDIO_REGISTER_MODE_MISSING, + }; + + if (ioctl(uffd, UFFDIO_REGISTER, ®) == -1) { + perror("ioctl UFFDIO_REGISTER"); + success = false; + goto out_unmap; + } + if (munmap(unmap_ptr, page_size)) { + perror("munmap"); + success = false; + goto out_unmap; + } + } + + /* + * Now try to move the entire range which is invalid for multi VMA move. + * + * This will fail, and no VMA should be moved, as we check this ahead of + * time. + */ + res = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, tgt_ptr); + err = errno; + if (res != MAP_FAILED) { + fprintf(stderr, "mremap() succeeded for multi VMA uffd armed\n"); + success = false; + goto out_unmap; + } + if (err != EFAULT) { + errno = err; + perror("mrmeap() unexpected error"); + success = false; + goto out_unmap; + } + if (is_ptr_mapped(maps_fp, tgt_ptr, page_size)) { + fprintf(stderr, + "Invalid uffd-armed VMA at start of multi range moved\n"); + success = false; + goto out_unmap; + } + + /* + * Now try to move a single VMA, this should succeed as not multi VMA + * move. + */ + res = mremap(ptr, page_size, page_size, + MREMAP_MAYMOVE | MREMAP_FIXED, tgt_ptr); + if (res == MAP_FAILED) { + perror("mremap single invalid-multi VMA"); + success = false; + goto out_unmap; + } + + /* + * Unmap the VMA, and remap a non-uffd registered (therefore, multi VMA + * move valid) VMA at the start of ptr range. + */ + if (munmap(tgt_ptr, page_size)) { + perror("munmap"); + success = false; + goto out_unmap; + } + res = mmap(ptr, page_size, PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0); + if (res == MAP_FAILED) { + perror("mmap"); + success = false; + goto out_unmap; + } + + /* + * Now try to move the entire range, we should succeed in moving the + * first VMA, but no others, and report a failure. + */ + res = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, tgt_ptr); + err = errno; + if (res != MAP_FAILED) { + fprintf(stderr, "mremap() succeeded for multi VMA uffd armed\n"); + success = false; + goto out_unmap; + } + if (err != EFAULT) { + errno = err; + perror("mrmeap() unexpected error"); + success = false; + goto out_unmap; + } + if (!is_ptr_mapped(maps_fp, tgt_ptr, page_size)) { + fprintf(stderr, "Valid VMA not moved\n"); + success = false; + goto out_unmap; + } + + /* + * Unmap the VMA, and map valid VMA at start of ptr range, and replace + * all existing multi-move invalid VMAs, except the last, with valid + * multi-move VMAs. + */ + if (munmap(tgt_ptr, page_size)) { + perror("munmap"); + success = false; + goto out_unmap; + } + if (munmap(ptr, size - 2 * page_size)) { + perror("munmap"); + success = false; + goto out_unmap; + } + for (i = 0; i < 8; i += 2) { + res = mmap(&ptr[i * page_size], page_size, + PROT_READ | PROT_WRITE, + MAP_PRIVATE | MAP_ANON | MAP_FIXED, -1, 0); + if (res == MAP_FAILED) { + perror("mmap"); + success = false; + goto out_unmap; + } + } + + /* + * Now try to move the entire range, we should succeed in moving all but + * the last VMA, and report a failure. + */ + res = mremap(ptr, size, size, MREMAP_MAYMOVE | MREMAP_FIXED, tgt_ptr); + err = errno; + if (res != MAP_FAILED) { + fprintf(stderr, "mremap() succeeded for multi VMA uffd armed\n"); + success = false; + goto out_unmap; + } + if (err != EFAULT) { + errno = err; + perror("mrmeap() unexpected error"); + success = false; + goto out_unmap; + } + + for (i = 0; i < 10; i += 2) { + bool is_mapped = is_ptr_mapped(maps_fp, + &tgt_ptr[i * page_size], page_size); + + if (i < 8 && !is_mapped) { + fprintf(stderr, "Valid VMA not moved at %d\n", i); + success = false; + goto out_unmap; + } else if (i == 8 && is_mapped) { + fprintf(stderr, "Invalid VMA moved at %d\n", i); + success = false; + goto out_unmap; + } + } + +out_unmap: + if (munmap(tgt_ptr, size)) + perror("munmap tgt"); + if (munmap(ptr, size)) + perror("munmap src"); +out_close_uffd: + close(uffd); +out: + if (success) + ksft_test_result_pass("%s\n", test_name); + else + ksft_test_result_fail("%s\n", test_name); +} +#else +static void mremap_move_multi_invalid_vmas(FILE *maps_fp, unsigned long page_size) +{ + char *test_name = "mremap move multiple invalid vmas"; + + ksft_test_result_skip("%s - missing uffd", test_name); +} +#endif /* __NR_userfaultfd */ + /* Returns the time taken for the remap on success else returns -1. */ static long long remap_region(struct config c, unsigned int threshold_mb, char *rand_addr) @@ -1074,7 +1331,7 @@ int main(int argc, char **argv) char *rand_addr; size_t rand_size; int num_expand_tests = 2; - int num_misc_tests = 8; + int num_misc_tests = 9; struct test test_cases[MAX_TEST] = {}; struct test perf_test_cases[MAX_PERF_TEST]; int page_size; @@ -1197,8 +1454,6 @@ int main(int argc, char **argv) mremap_expand_merge(maps_fp, page_size); mremap_expand_merge_offset(maps_fp, page_size); - fclose(maps_fp); - mremap_move_within_range(pattern_seed, rand_addr); mremap_move_1mb_from_start(pattern_seed, rand_addr); mremap_shrink_multiple_vmas(page_size, /* inplace= */true); @@ -1207,6 +1462,9 @@ int main(int argc, char **argv) mremap_move_multiple_vmas(pattern_seed, page_size, /* dontunmap= */ true); mremap_move_multiple_vmas_split(pattern_seed, page_size, /* dontunmap= */ false); mremap_move_multiple_vmas_split(pattern_seed, page_size, /* dontunmap= */ true); + mremap_move_multi_invalid_vmas(maps_fp, page_size); + + fclose(maps_fp); if (run_perf_tests) { ksft_print_msg("\n%s\n", -- cgit v1.2.3 From 0cc2a4880ced1486acc34898cd85edc6ee109c4c Mon Sep 17 00:00:00 2001 From: Sang-Heon Jeon Date: Tue, 12 Aug 2025 23:00:46 +0900 Subject: selftests/damon: fix selftests by installing drgn related script drgn_dump_damon_status is not installed during kselftest setup. It can break other tests which depend on drgn_dump_damon_status. Install drgn_dump_damon_status files to fix broken test. Link: https://lkml.kernel.org/r/20250812140046.660486-1-ekffu200098@gmail.com Fixes: f3e8e1e51362 ("selftests/damon: add drgn script for extracting damon status") Signed-off-by: Sang-Heon Jeon Reviewed-by: SeongJae Park Cc: Alexandre Ghiti Cc: Honggyu Kim Signed-off-by: Andrew Morton --- tools/testing/selftests/damon/Makefile | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/damon/Makefile b/tools/testing/selftests/damon/Makefile index 5b230deb19e8..9a3499827d4b 100644 --- a/tools/testing/selftests/damon/Makefile +++ b/tools/testing/selftests/damon/Makefile @@ -4,6 +4,7 @@ TEST_GEN_FILES += access_memory access_memory_even TEST_FILES = _damon_sysfs.py +TEST_FILES += drgn_dump_damon_status.py # functionality tests TEST_PROGS += sysfs.sh -- cgit v1.2.3 From eddc821f98afaa13ecd09b02b73ac5946387ffcc Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 15 Aug 2025 15:41:00 -0700 Subject: selftests: drv-net: tso: increase the retransmit threshold We see quite a few flakes during the TSO test against virtualized devices in NIPA. There's often 10-30 retransmissions during the test. Sometimes as many as 100. Set the retransmission threshold at 1/4th of the wire frame target. Link: https://patch.msgid.link/20250815224100.363438-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/tso.py | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/drivers/net/hw/tso.py b/tools/testing/selftests/drivers/net/hw/tso.py index c13dd5efa27a..0998e68ebaf0 100755 --- a/tools/testing/selftests/drivers/net/hw/tso.py +++ b/tools/testing/selftests/drivers/net/hw/tso.py @@ -60,16 +60,17 @@ def run_one_stream(cfg, ipver, remote_v4, remote_v6, should_lso): sock_wait_drain(sock) qstat_new = cfg.netnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] - # No math behind the 10 here, but try to catch cases where - # TCP falls back to non-LSO. - ksft_lt(tcp_sock_get_retrans(sock), 10) - sock.close() - # Check that at least 90% of the data was sent as LSO packets. # System noise may cause false negatives. Also header overheads # will add up to 5% of extra packes... The check is best effort. total_lso_wire = len(buf) * 0.90 // cfg.dev["mtu"] total_lso_super = len(buf) * 0.90 // cfg.dev["tso_max_size"] + + # Make sure we have order of magnitude more LSO packets than + # retransmits, in case TCP retransmitted all the LSO packets. + ksft_lt(tcp_sock_get_retrans(sock), total_lso_wire / 4) + sock.close() + if should_lso: if cfg.have_stat_super_count: ksft_ge(qstat_new['tx-hw-gso-packets'] - -- cgit v1.2.3 From 51992f99f068fba966a680a9ac118b815f2fe08e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 15 Aug 2025 16:15:13 -0700 Subject: selftests: drv-net: ncdevmem: make configure_channels() support combined channels ncdevmem tests that the kernel correctly rejects attempts to deactivate queues with MPs bound. Make the configure_channels() test support combined channels. Currently it tries to set the queue counts to rx N tx N-1, which only makes sense for devices which have IRQs per ring type. Most modern devices used combined IRQs/channels with both Rx and Tx queues. Since the math is total Rx == combined+Rx setting Rx when combined is non-zero will be increasing the total queue count, not decreasing as the test intends. Note that the test would previously also try to set the Tx ring count to Rx - 1, for some reason. Which would be 0 if the device has only 2 queues configured. With this change (device with 2 queues): setting channel count rx:1 tx:1 YNL set channels: Kernel error: 'requested channel counts are too low for existing memory provider setting (2)' Reviewed-by: Mina Almasry Link: https://patch.msgid.link/20250815231513.381652-1-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/ncdevmem.c | 78 ++++++++++++++++++++++- 1 file changed, 76 insertions(+), 2 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c index be937542b4c0..71961a7688e6 100644 --- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c +++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c @@ -356,7 +356,81 @@ static int configure_rss(void) static int configure_channels(unsigned int rx, unsigned int tx) { - return run_command("ethtool -L %s rx %u tx %u", ifname, rx, tx); + struct ethtool_channels_get_req *gchan; + struct ethtool_channels_set_req *schan; + struct ethtool_channels_get_rsp *chan; + struct ynl_error yerr; + struct ynl_sock *ys; + int ret; + + fprintf(stderr, "setting channel count rx:%u tx:%u\n", rx, tx); + + ys = ynl_sock_create(&ynl_ethtool_family, &yerr); + if (!ys) { + fprintf(stderr, "YNL: %s\n", yerr.msg); + return -1; + } + + gchan = ethtool_channels_get_req_alloc(); + if (!gchan) { + ret = -1; + goto exit_close_sock; + } + + ethtool_channels_get_req_set_header_dev_index(gchan, ifindex); + chan = ethtool_channels_get(ys, gchan); + ethtool_channels_get_req_free(gchan); + if (!chan) { + fprintf(stderr, "YNL get channels: %s\n", ys->err.msg); + ret = -1; + goto exit_close_sock; + } + + schan = ethtool_channels_set_req_alloc(); + if (!schan) { + ret = -1; + goto exit_free_chan; + } + + ethtool_channels_set_req_set_header_dev_index(schan, ifindex); + + if (chan->_present.combined_count) { + if (chan->_present.rx_count || chan->_present.tx_count) { + ethtool_channels_set_req_set_rx_count(schan, 0); + ethtool_channels_set_req_set_tx_count(schan, 0); + } + + if (rx == tx) { + ethtool_channels_set_req_set_combined_count(schan, rx); + } else if (rx > tx) { + ethtool_channels_set_req_set_combined_count(schan, tx); + ethtool_channels_set_req_set_rx_count(schan, rx - tx); + } else { + ethtool_channels_set_req_set_combined_count(schan, rx); + ethtool_channels_set_req_set_tx_count(schan, tx - rx); + } + + ret = ethtool_channels_set(ys, schan); + if (ret) + fprintf(stderr, "YNL set channels: %s\n", ys->err.msg); + } else if (chan->_present.rx_count) { + ethtool_channels_set_req_set_rx_count(schan, rx); + ethtool_channels_set_req_set_tx_count(schan, tx); + + ret = ethtool_channels_set(ys, schan); + if (ret) + fprintf(stderr, "YNL set channels: %s\n", ys->err.msg); + } else { + fprintf(stderr, "Error: device has neither combined nor rx channels\n"); + ret = -1; + } + ethtool_channels_set_req_free(schan); +exit_free_chan: + ethtool_channels_get_rsp_free(chan); +exit_close_sock: + ynl_sock_destroy(ys); + + return ret; } static int configure_flow_steering(struct sockaddr_in6 *server_sin) @@ -752,7 +826,7 @@ void run_devmem_tests(void) error(1, 0, "Failed to bind\n"); /* Deactivating a bound queue should not be legal */ - if (!configure_channels(num_queues, num_queues - 1)) + if (!configure_channels(num_queues, num_queues)) error(1, 0, "Deactivating a bound queue should be illegal.\n"); /* Closing the netlink socket does an implicit unbind */ -- cgit v1.2.3 From 490a9591b5feb40b91052bbb0e6bc038ed8490ff Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 18 Aug 2025 13:54:22 -0700 Subject: selftests: net: Explicitly enable CONFIG_CRYPTO_SHA1 for IPsec xfrm_policy.sh, nft_flowtable.sh, and vrf-xfrm-tests.sh use 'ip xfrm' with SHA-1, either 'auth sha1' or 'auth-trunc hmac(sha1)'. That requires CONFIG_CRYPTO_SHA1, which CONFIG_INET_ESP intentionally doesn't select (as per its help text). Previously, the config for these tests relied on CONFIG_CRYPTO_SHA1 being selected by the unrelated option CONFIG_IP_SCTP. Since CONFIG_IP_SCTP is being changed to no longer do that, instead add CONFIG_CRYPTO_SHA1 to the configs explicitly. Reported-by: Paolo Abeni Closes: https://lore.kernel.org/r/766e4508-aaba-4cdc-92b4-e116e52ae13b@redhat.com Suggested-by: Florian Westphal Acked-by: Xin Long Signed-off-by: Eric Biggers Link: https://patch.msgid.link/20250818205426.30222-2-ebiggers@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/config | 1 + tools/testing/selftests/net/netfilter/config | 1 + 2 files changed, 2 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index c24417d0047b..d548611e2698 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -26,6 +26,7 @@ CONFIG_IFB=y CONFIG_INET_DIAG=y CONFIG_INET_ESP=y CONFIG_INET_ESP_OFFLOAD=y +CONFIG_CRYPTO_SHA1=y CONFIG_NET_FOU=y CONFIG_NET_FOU_IP_TUNNELS=y CONFIG_NETFILTER=y diff --git a/tools/testing/selftests/net/netfilter/config b/tools/testing/selftests/net/netfilter/config index 79d5b33966ba..305e46b819cb 100644 --- a/tools/testing/selftests/net/netfilter/config +++ b/tools/testing/selftests/net/netfilter/config @@ -13,6 +13,7 @@ CONFIG_BRIDGE_VLAN_FILTERING=y CONFIG_CGROUP_BPF=y CONFIG_DUMMY=m CONFIG_INET_ESP=m +CONFIG_CRYPTO_SHA1=m CONFIG_IP_NF_MATCH_RPFILTER=m CONFIG_IP6_NF_MATCH_RPFILTER=m CONFIG_IP_NF_IPTABLES=m -- cgit v1.2.3 From dce1b33ed7430c7189b8cc1567498f9e6bf12731 Mon Sep 17 00:00:00 2001 From: Sean Christopherson Date: Tue, 24 Jun 2025 16:19:30 -0700 Subject: selftests: harness: Rename is_signed_type() to avoid collision with overflow.h Rename is_signed_type() to is_signed_var() to avoid colliding with a macro of the same name defined by tools' linux/overflow.h. This fixes warnings (and presumably potential test failures) in tests that utilize the selftests harness and happen to (indirectly) include overflow.h. In file included from tools/include/linux/bits.h:34, from tools/include/linux/bitops.h:14, from tools/include/linux/hashtable.h:13, from include/kvm_util.h:11, from x86/userspace_msr_exit_test.c:11: tools/include/linux/overflow.h:31:9: error: "is_signed_type" redefined [-Werror] 31 | #define is_signed_type(type) (((type)(-1)) < (type)1) | ^~~~~~~~~~~~~~ In file included from include/kvm_test_harness.h:11, from x86/userspace_msr_exit_test.c:9: ../kselftest_harness.h:754:9: note: this is the location of the previous definition 754 | #define is_signed_type(var) (!!(((__typeof__(var))(-1)) < (__typeof__(var))1)) | ^~~~~~~~~~~~~~ Use a separate definition, at least for now, as many selftests build without tools/include in their include path. Fixes: fc92099902fb ("tools headers: Synchronize linux/bits.h with the kernel sources") Cc: Vincent Mailhol Cc: Arnaldo Carvalho de Melo Cc: Mark Brown Link: https://lore.kernel.org/r/20250624231930.583689-1-seanjc@google.com Signed-off-by: Sean Christopherson --- tools/testing/selftests/kselftest_harness.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h index 2925e47db995..8516e8434bc4 100644 --- a/tools/testing/selftests/kselftest_harness.h +++ b/tools/testing/selftests/kselftest_harness.h @@ -751,7 +751,7 @@ for (; _metadata->trigger; _metadata->trigger = \ __bail(_assert, _metadata)) -#define is_signed_type(var) (!!(((__typeof__(var))(-1)) < (__typeof__(var))1)) +#define is_signed_var(var) (!!(((__typeof__(var))(-1)) < (__typeof__(var))1)) #define __EXPECT(_expected, _expected_str, _seen, _seen_str, _t, _assert) do { \ /* Avoid multiple evaluation of the cases */ \ @@ -759,7 +759,7 @@ __typeof__(_seen) __seen = (_seen); \ if (!(__exp _t __seen)) { \ /* Report with actual signedness to avoid weird output. */ \ - switch (is_signed_type(__exp) * 2 + is_signed_type(__seen)) { \ + switch (is_signed_var(__exp) * 2 + is_signed_var(__seen)) { \ case 0: { \ uintmax_t __exp_print = (uintmax_t)__exp; \ uintmax_t __seen_print = (uintmax_t)__seen; \ -- cgit v1.2.3 From 6b4b1d577e1fe8a4e436a250e098b5c247d978d9 Mon Sep 17 00:00:00 2001 From: Alex Tran Date: Mon, 18 Aug 2025 19:52:27 -0700 Subject: selftests/net/socket.c: removed warnings from unused returns MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit socket.c: In function ‘run_tests’: socket.c:59:25: warning: ignoring return value of ‘strerror_r’ \ declared with attribute ‘warn_unused_result’ [-Wunused-result] 59 | strerror_r(-s->expect, err_string1, ERR_STRING_SZ); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ socket.c:60:25: warning: ignoring return value of ‘strerror_r’ \ declared with attribute ‘warn_unused_result’ [-Wunused-result] 60 | strerror_r(errno, err_string2, ERR_STRING_SZ); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ socket.c:73:33: warning: ignoring return value of ‘strerror_r’ \ declared with attribute ‘warn_unused_result’ [-Wunused-result] 73 | strerror_r(errno, err_string1, ERR_STRING_SZ); | ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ changelog: v2 - const char* messages and fixed patch warnings of max 75 chars per line Signed-off-by: Alex Tran Link: https://patch.msgid.link/20250819025227.239885-1-alex.t.tran@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/socket.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/socket.c b/tools/testing/selftests/net/socket.c index db1aeb8c5d1e..be1080003c61 100644 --- a/tools/testing/selftests/net/socket.c +++ b/tools/testing/selftests/net/socket.c @@ -39,6 +39,7 @@ static int run_tests(void) { char err_string1[ERR_STRING_SZ]; char err_string2[ERR_STRING_SZ]; + const char *msg1, *msg2; int i, err; err = 0; @@ -56,13 +57,13 @@ static int run_tests(void) errno == -s->expect) continue; - strerror_r(-s->expect, err_string1, ERR_STRING_SZ); - strerror_r(errno, err_string2, ERR_STRING_SZ); + msg1 = strerror_r(-s->expect, err_string1, ERR_STRING_SZ); + msg2 = strerror_r(errno, err_string2, ERR_STRING_SZ); fprintf(stderr, "socket(%d, %d, %d) expected " "err (%s) got (%s)\n", s->domain, s->type, s->protocol, - err_string1, err_string2); + msg1, msg2); err = -1; break; @@ -70,12 +71,12 @@ static int run_tests(void) close(fd); if (s->expect < 0) { - strerror_r(errno, err_string1, ERR_STRING_SZ); + msg1 = strerror_r(errno, err_string1, ERR_STRING_SZ); fprintf(stderr, "socket(%d, %d, %d) expected " "success got err (%s)\n", s->domain, s->type, s->protocol, - err_string1); + msg1); err = -1; break; -- cgit v1.2.3 From eacb6e408dc861fb93baf10d6837204a3b39e915 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Tue, 19 Aug 2025 07:33:48 +0000 Subject: selftests: net: bpf_offload: print loaded programs on mismatch The test sometimes fails due to an unexpected number of loaded programs. e.g FAIL: 2 BPF programs loaded, expected 1 File "/usr/libexec/kselftests/net/./bpf_offload.py", line 940, in progs = bpftool_prog_list(expected=1) File "/usr/libexec/kselftests/net/./bpf_offload.py", line 187, in bpftool_prog_list fail(True, "%d BPF programs loaded, expected %d" % File "/usr/libexec/kselftests/net/./bpf_offload.py", line 89, in fail tb = "".join(traceback.extract_stack().format()) However, the logs do not show which programs were actually loaded, making it difficult to debug the failure. Add printing of the loaded programs when a mismatch is detected to help troubleshoot such errors. The list is printed on a new line to avoid breaking the current log format. Signed-off-by: Hangbin Liu Link: https://patch.msgid.link/20250819073348.387972-1-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/bpf_offload.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/bpf_offload.py b/tools/testing/selftests/net/bpf_offload.py index b2c271b79240..c856d266c8f3 100755 --- a/tools/testing/selftests/net/bpf_offload.py +++ b/tools/testing/selftests/net/bpf_offload.py @@ -184,8 +184,8 @@ def bpftool_prog_list(expected=None, ns="", exclude_orphaned=True): progs = [ p for p in progs if not p['orphaned'] ] if expected is not None: if len(progs) != expected: - fail(True, "%d BPF programs loaded, expected %d" % - (len(progs), expected)) + fail(True, "%d BPF programs loaded, expected %d\nLoaded Progs:\n%s" % + (len(progs), expected, pp.pformat(progs))) return progs def bpftool_map_list(expected=None, ns=""): -- cgit v1.2.3 From 781bf2cc0616d44cc4a63bab3664a54b221bdc14 Mon Sep 17 00:00:00 2001 From: Hangbin Liu Date: Tue, 19 Aug 2025 07:47:49 +0000 Subject: selftests: rtnetlink: print device info on preferred_lft test failure Even with slowwait used to avoid system sleep in the preferred_lft test, failures can still occur after long runtimes. Print the device address info when the test fails to provide better troubleshooting data. Signed-off-by: Hangbin Liu Link: https://patch.msgid.link/20250819074749.388064-1-liuhangbin@gmail.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/rtnetlink.sh | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index d6c00efeb664..91b0f6cae04d 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -313,6 +313,8 @@ kci_test_addrlft() slowwait 5 check_addr_not_exist "$devdummy" "10.23.11." if [ $? -eq 1 ]; then + # troubleshoot the reason for our failure + run_cmd ip addr show dev "$devdummy" check_err 1 end_test "FAIL: preferred_lft addresses remaining" return -- cgit v1.2.3 From a5c10aa3d1ba643385534b5d40c8a67497f904b6 Mon Sep 17 00:00:00 2001 From: Kuniyuki Iwashima Date: Tue, 19 Aug 2025 23:14:57 +0000 Subject: selftests/net: packetdrill: Support single protocol test. Currently, we cannot write IPv4 or IPv6 specific packetdrill tests as ksft_runner.sh runs each .pkt file for both protocols. Let's support single protocol test by checking --ip_version in the .pkt file. Signed-off-by: Kuniyuki Iwashima Reviewed-by: Willem de Bruijn Link: https://patch.msgid.link/20250819231527.1427361-1-kuniyu@google.com Signed-off-by: Jakub Kicinski --- .../selftests/net/packetdrill/ksft_runner.sh | 47 +++++++++++++--------- 1 file changed, 28 insertions(+), 19 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/packetdrill/ksft_runner.sh b/tools/testing/selftests/net/packetdrill/ksft_runner.sh index a7e790af38ff..0ae6eeeb1a8e 100755 --- a/tools/testing/selftests/net/packetdrill/ksft_runner.sh +++ b/tools/testing/selftests/net/packetdrill/ksft_runner.sh @@ -3,21 +3,22 @@ source "$(dirname $(realpath $0))/../../kselftest/ktap_helpers.sh" -readonly ipv4_args=('--ip_version=ipv4 ' - '--local_ip=192.168.0.1 ' - '--gateway_ip=192.168.0.1 ' - '--netmask_ip=255.255.0.0 ' - '--remote_ip=192.0.2.1 ' - '-D CMSG_LEVEL_IP=SOL_IP ' - '-D CMSG_TYPE_RECVERR=IP_RECVERR ') - -readonly ipv6_args=('--ip_version=ipv6 ' - '--mtu=1520 ' - '--local_ip=fd3d:0a0b:17d6::1 ' - '--gateway_ip=fd3d:0a0b:17d6:8888::1 ' - '--remote_ip=fd3d:fa7b:d17d::1 ' - '-D CMSG_LEVEL_IP=SOL_IPV6 ' - '-D CMSG_TYPE_RECVERR=IPV6_RECVERR ') +declare -A ip_args=( + [ipv4]="--ip_version=ipv4 + --local_ip=192.168.0.1 + --gateway_ip=192.168.0.1 + --netmask_ip=255.255.0.0 + --remote_ip=192.0.2.1 + -D CMSG_LEVEL_IP=SOL_IP + -D CMSG_TYPE_RECVERR=IP_RECVERR" + [ipv6]="--ip_version=ipv6 + --mtu=1520 + --local_ip=fd3d:0a0b:17d6::1 + --gateway_ip=fd3d:0a0b:17d6:8888::1 + --remote_ip=fd3d:fa7b:d17d::1 + -D CMSG_LEVEL_IP=SOL_IPV6 + -D CMSG_TYPE_RECVERR=IPV6_RECVERR" +) if [ $# -ne 1 ]; then ktap_exit_fail_msg "usage: $0