From b09ed52db1e688eb8205b1939ca1345179ecd515 Mon Sep 17 00:00:00 2001 From: Nicolin Chen Date: Tue, 14 Oct 2025 14:48:46 -0700 Subject: iommufd/selftest: Fix ioctl return value in _test_cmd_trigger_vevents() The ioctl returns 0 upon success, so !0 returning -1 breaks the selftest. Drop the '!' to fix it. Fixes: 1d235d849425 ("iommu/selftest: prevent use of uninitialized variable") Link: https://patch.msgid.link/r/20251014214847.1113759-1-nicolinc@nvidia.com Signed-off-by: Nicolin Chen Reviewed-by: Kevin Tian Signed-off-by: Jason Gunthorpe --- tools/testing/selftests/iommu/iommufd_utils.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/iommu/iommufd_utils.h b/tools/testing/selftests/iommu/iommufd_utils.h index 772ca1db6e59..9f472c20c190 100644 --- a/tools/testing/selftests/iommu/iommufd_utils.h +++ b/tools/testing/selftests/iommu/iommufd_utils.h @@ -1044,8 +1044,8 @@ static int _test_cmd_trigger_vevents(int fd, __u32 dev_id, __u32 nvevents) }; while (nvevents--) { - if (!ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_TRIGGER_VEVENT), - &trigger_vevent_cmd)) + if (ioctl(fd, _IOMMU_TEST_CMD(IOMMU_TEST_OP_TRIGGER_VEVENT), + &trigger_vevent_cmd)) return -1; } return 0; -- cgit v1.2.3 From a24f7afce048e724be072bd063ed864f124daf81 Mon Sep 17 00:00:00 2001 From: Maximilian Dittgen Date: Mon, 20 Oct 2025 16:59:46 +0200 Subject: KVM: selftests: fix MAPC RDbase target formatting in vgic_lpi_stress Since GITS_TYPER.PTA == 0, the ITS MAPC command demands a CPU ID, rather than a physical redistributor address, for its RDbase command argument. As such, when MAPC-ing guest ITS collections, vgic_lpi_stress iterates over CPU IDs in the range [0, nr_cpus), passing them as the RDbase vcpu_id argument to its_send_mapc_cmd(). However, its_encode_target() in the its_send_mapc_cmd() selftest handler expects RDbase arguments to be formatted with a 16 bit offset, as shown by the 16-bit target_addr right shift its implementation: its_mask_encode(&cmd->raw_cmd[2], target_addr >> 16, 51, 16) At the moment, all CPU IDs passed into its_send_mapc_cmd() have no offset, therefore becoming 0x0 after the bit shift. Thus, when vgic_its_cmd_handle_mapc() receives the ITS command in vgic-its.c, it always interprets the RDbase target CPU as CPU 0. All interrupts sent to collections will be processed by vCPU 0, which defeats the purpose of this multi-vCPU test. Fix by creating procnum_to_rdbase() helper function, which left-shifts the vCPU parameter received by its_send_mapc_cmd 16 bits before passing it to its_encode_target for encoding. Signed-off-by: Maximilian Dittgen Link: https://patch.msgid.link/20251020145946.48288-1-mdittgen@amazon.de Signed-off-by: Marc Zyngier --- tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c b/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c index 09f270545646..0e2f8ed90f30 100644 --- a/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c +++ b/tools/testing/selftests/kvm/lib/arm64/gic_v3_its.c @@ -15,6 +15,8 @@ #include "gic_v3.h" #include "processor.h" +#define GITS_COLLECTION_TARGET_SHIFT 16 + static u64 its_read_u64(unsigned long offset) { return readq_relaxed(GITS_BASE_GVA + offset); @@ -163,6 +165,11 @@ static void its_encode_collection(struct its_cmd_block *cmd, u16 col) its_mask_encode(&cmd->raw_cmd[2], col, 15, 0); } +static u64 procnum_to_rdbase(u32 vcpu_id) +{ + return vcpu_id << GITS_COLLECTION_TARGET_SHIFT; +} + #define GITS_CMDQ_POLL_ITERATIONS 0 static void its_send_cmd(void *cmdq_base, struct its_cmd_block *cmd) @@ -217,7 +224,7 @@ void its_send_mapc_cmd(void *cmdq_base, u32 vcpu_id, u32 collection_id, bool val its_encode_cmd(&cmd, GITS_CMD_MAPC); its_encode_collection(&cmd, collection_id); - its_encode_target(&cmd, vcpu_id); + its_encode_target(&cmd, procnum_to_rdbase(vcpu_id)); its_encode_valid(&cmd, valid); its_send_cmd(cmdq_base, &cmd); -- cgit v1.2.3 From 92e781c93ebe75e39ecdf78fb8ef1fdf1b63a9f8 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Thu, 23 Oct 2025 22:19:29 +0100 Subject: KVM: arm64: selftests: Add SCTLR2_EL2 to get-reg-list We recently added support for SCTLR2_EL2 to the kernel but did not add it to get-reg-list, resulting in it reporting the missing register when it is available. Add it. Signed-off-by: Mark Brown Link: https://patch.msgid.link/20251023-b4-kvm-arm64-get-reg-list-sctlr-el2-v1-1-088f88ff992a@kernel.org Signed-off-by: Marc Zyngier --- tools/testing/selftests/kvm/arm64/get-reg-list.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/kvm/arm64/get-reg-list.c b/tools/testing/selftests/kvm/arm64/get-reg-list.c index c9b84eeaab6b..2abef0a86d46 100644 --- a/tools/testing/selftests/kvm/arm64/get-reg-list.c +++ b/tools/testing/selftests/kvm/arm64/get-reg-list.c @@ -63,6 +63,7 @@ static struct feature_id_reg feat_id_regs[] = { REG_FEAT(HDFGWTR2_EL2, ID_AA64MMFR0_EL1, FGT, FGT2), REG_FEAT(ZCR_EL2, ID_AA64PFR0_EL1, SVE, IMP), REG_FEAT(SCTLR2_EL1, ID_AA64MMFR3_EL1, SCTLRX, IMP), + REG_FEAT(SCTLR2_EL2, ID_AA64MMFR3_EL1, SCTLRX, IMP), REG_FEAT(VDISR_EL2, ID_AA64PFR0_EL1, RAS, IMP), REG_FEAT(VSESR_EL2, ID_AA64PFR0_EL1, RAS, IMP), REG_FEAT(VNCR_EL2, ID_AA64MMFR4_EL1, NV_frac, NV2_ONLY), @@ -718,6 +719,7 @@ static __u64 el2_regs[] = { SYS_REG(VMPIDR_EL2), SYS_REG(SCTLR_EL2), SYS_REG(ACTLR_EL2), + SYS_REG(SCTLR2_EL2), SYS_REG(HCR_EL2), SYS_REG(MDCR_EL2), SYS_REG(CPTR_EL2), -- cgit v1.2.3 From a186fbcfd845699d51809f7c7e54cf997fe32820 Mon Sep 17 00:00:00 2001 From: Mark Brown Date: Fri, 24 Oct 2025 00:43:39 +0100 Subject: KVM: arm64: selftests: Filter ZCR_EL2 in get-reg-list get-reg-list includes ZCR_EL2 in the list of EL2 registers that it looks for when NV is enabled but does not have any feature gate for this register, meaning that testing any combination of features that includes EL2 but does not include SVE will result in a test failure due to a missing register being reported: | The following lines are missing registers: | | ARM64_SYS_REG(3, 4, 1, 2, 0), Add ZCR_EL2 to feat_id_regs so that the test knows not to expect to see it without SVE being enabled. Fixes: 3a90b6f27964 ("KVM: arm64: selftests: get-reg-list: Add base EL2 registers") Signed-off-by: Mark Brown Link: https://patch.msgid.link/20251024-kvm-arm64-get-reg-list-zcr-el2-v1-1-0cd0ff75e22f@kernel.org Signed-off-by: Marc Zyngier --- tools/testing/selftests/kvm/arm64/get-reg-list.c | 1 + 1 file changed, 1 insertion(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/kvm/arm64/get-reg-list.c b/tools/testing/selftests/kvm/arm64/get-reg-list.c index 2abef0a86d46..0a3a94c4cca1 100644 --- a/tools/testing/selftests/kvm/arm64/get-reg-list.c +++ b/tools/testing/selftests/kvm/arm64/get-reg-list.c @@ -69,6 +69,7 @@ static struct feature_id_reg feat_id_regs[] = { REG_FEAT(VNCR_EL2, ID_AA64MMFR4_EL1, NV_frac, NV2_ONLY), REG_FEAT(CNTHV_CTL_EL2, ID_AA64MMFR1_EL1, VH, IMP), REG_FEAT(CNTHV_CVAL_EL2,ID_AA64MMFR1_EL1, VH, IMP), + REG_FEAT(ZCR_EL2, ID_AA64PFR0_EL1, SVE, IMP), }; bool filter_reg(__u64 reg) -- cgit v1.2.3 From 02d064de05b1fcca769391fa82d205bed8bb9bf0 Mon Sep 17 00:00:00 2001 From: Anubhav Singh Date: Thu, 30 Oct 2025 06:28:18 +0000 Subject: selftests/net: fix out-of-order delivery of FIN in gro:tcp test Due to the gro_sender sending data packets and FIN packets in very quick succession, these are received almost simultaneously by the gro_receiver. FIN packets are sometimes processed before the data packets leading to intermittent (~1/100) test failures. This change adds a delay of 100ms before sending FIN packets in gro:tcp test to avoid the out-of-order delivery. The same mitigation already exists for the gro:ip test. Fixes: 7d1575014a63 ("selftests/net: GRO coalesce test") Reviewed-by: Willem de Bruijn Signed-off-by: Anubhav Singh Link: https://patch.msgid.link/20251030062818.1562228-1-anubhavsinggh@google.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/gro.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/net/gro.c index 2b1d9f2b3e9e..3fa63bd85dea 100644 --- a/tools/testing/selftests/net/gro.c +++ b/tools/testing/selftests/net/gro.c @@ -989,6 +989,7 @@ static void check_recv_pkts(int fd, int *correct_payload, static void gro_sender(void) { + const int fin_delay_us = 100 * 1000; static char fin_pkt[MAX_HDR_LEN]; struct sockaddr_ll daddr = {}; int txfd = -1; @@ -1032,15 +1033,22 @@ static void gro_sender(void) write_packet(txfd, fin_pkt, total_hdr_len, &daddr); } else if (strcmp(testname, "tcp") == 0) { send_changed_checksum(txfd, &daddr); + /* Adding sleep before sending FIN so that it is not + * received prior to other packets. + */ + usleep(fin_delay_us); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); send_changed_seq(txfd, &daddr); + usleep(fin_delay_us); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); send_changed_ts(txfd, &daddr); + usleep(fin_delay_us); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); send_diff_opt(txfd, &daddr); + usleep(fin_delay_us); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); } else if (strcmp(testname, "ip") == 0) { send_changed_ECN(txfd, &daddr); -- cgit v1.2.3 From f8e8486702abb05b8c734093aab1606af0eac068 Mon Sep 17 00:00:00 2001 From: Anubhav Singh Date: Thu, 30 Oct 2025 06:04:36 +0000 Subject: selftests/net: use destination options instead of hop-by-hop The GRO self-test, gro.c, currently constructs IPv6 packets containing a Hop-by-Hop Options header (IPPROTO_HOPOPTS) to ensure the GRO path correctly handles IPv6 extension headers. However, network elements may be configured to drop packets with the Hop-by-Hop Options header (HBH). This causes the self-test to fail in environments where such network elements are present. To improve the robustness and reliability of this test in diverse network environments, switch from using IPPROTO_HOPOPTS to IPPROTO_DSTOPTS (Destination Options). The Destination Options header is less likely to be dropped by intermediate routers and still serves the core purpose of the test: validating GRO's handling of an IPv6 extension header. This change ensures the test can execute successfully without being incorrectly failed by network policies outside the kernel's control. Fixes: 7d1575014a63 ("selftests/net: GRO coalesce test") Reviewed-by: Willem de Bruijn Signed-off-by: Anubhav Singh Link: https://patch.msgid.link/20251030060436.1556664-1-anubhavsinggh@google.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/gro.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/net/gro.c index 3fa63bd85dea..cfc39f70635d 100644 --- a/tools/testing/selftests/net/gro.c +++ b/tools/testing/selftests/net/gro.c @@ -754,11 +754,11 @@ static void send_ipv6_exthdr(int fd, struct sockaddr_ll *daddr, char *ext_data1, static char exthdr_pck[sizeof(buf) + MIN_EXTHDR_SIZE]; create_packet(buf, 0, 0, PAYLOAD_LEN, 0); - add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_HOPOPTS, ext_data1); + add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_DSTOPTS, ext_data1); write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr); create_packet(buf, PAYLOAD_LEN * 1, 0, PAYLOAD_LEN, 0); - add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_HOPOPTS, ext_data2); + add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_DSTOPTS, ext_data2); write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr); } -- cgit v1.2.3 From d01f8136d46b925798abcf86b35a4021e4cfb8bb Mon Sep 17 00:00:00 2001 From: Wang Liang Date: Thu, 30 Oct 2025 12:03:40 +0800 Subject: selftests: netdevsim: Fix ethtool-coalesce.sh fail by installing ethtool-common.sh The script "ethtool-common.sh" is not installed in INSTALL_PATH, and triggers some errors when I try to run the test 'drivers/net/netdevsim/ethtool-coalesce.sh': TAP version 13 1..1 # timeout set to 600 # selftests: drivers/net/netdevsim: ethtool-coalesce.sh # ./ethtool-coalesce.sh: line 4: ethtool-common.sh: No such file or directory # ./ethtool-coalesce.sh: line 25: make_netdev: command not found # ethtool: bad command line argument(s) # ./ethtool-coalesce.sh: line 124: check: command not found # ./ethtool-coalesce.sh: line 126: [: -eq: unary operator expected # FAILED /0 checks not ok 1 selftests: drivers/net/netdevsim: ethtool-coalesce.sh # exit=1 Install this file to avoid this error. After this patch: TAP version 13 1..1 # timeout set to 600 # selftests: drivers/net/netdevsim: ethtool-coalesce.sh # PASSED all 22 checks ok 1 selftests: drivers/net/netdevsim: ethtool-coalesce.sh Fixes: fbb8531e58bd ("selftests: extract common functions in ethtool-common.sh") Signed-off-by: Wang Liang Link: https://patch.msgid.link/20251030040340.3258110-1-wangliang74@huawei.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/netdevsim/Makefile | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/drivers/net/netdevsim/Makefile b/tools/testing/selftests/drivers/net/netdevsim/Makefile index daf51113c827..df10c7243511 100644 --- a/tools/testing/selftests/drivers/net/netdevsim/Makefile +++ b/tools/testing/selftests/drivers/net/netdevsim/Makefile @@ -20,4 +20,8 @@ TEST_PROGS := \ udp_tunnel_nic.sh \ # end of TEST_PROGS +TEST_FILES := \ + ethtool-common.sh +# end of TEST_FILES + include ../../../lib.mk -- cgit v1.2.3 From 62d2d0a33839c28173909616db2ef16e1a4a5071 Mon Sep 17 00:00:00 2001 From: Song Liu Date: Mon, 27 Oct 2025 10:50:23 -0700 Subject: selftests/bpf: Add tests for livepatch + bpf trampoline Both livepatch and BPF trampoline use ftrace. Special attention is needed when livepatch and fexit program touch the same function at the same time, because livepatch updates a kernel function and the BPF trampoline need to call into the right version of the kernel function. Use samples/livepatch/livepatch-sample.ko for the test. The test covers two cases: 1) When a fentry program is loaded first. This exercises the modify_ftrace_direct code path. 2) When a fentry program is loaded first. This exercises the register_ftrace_direct code path. Signed-off-by: Song Liu Reviewed-by: Jiri Olsa Link: https://lore.kernel.org/r/20251027175023.1521602-4-song@kernel.org Signed-off-by: Alexei Starovoitov Acked-by: Steven Rostedt (Google) --- tools/testing/selftests/bpf/config | 3 + .../bpf/prog_tests/livepatch_trampoline.c | 107 +++++++++++++++++++++ .../selftests/bpf/progs/livepatch_trampoline.c | 30 ++++++ 3 files changed, 140 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/livepatch_trampoline.c create mode 100644 tools/testing/selftests/bpf/progs/livepatch_trampoline.c (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 70b28c1e653e..f2a2fd236ca8 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -50,6 +50,7 @@ CONFIG_IPV6_SIT=y CONFIG_IPV6_TUNNEL=y CONFIG_KEYS=y CONFIG_LIRC=y +CONFIG_LIVEPATCH=y CONFIG_LWTUNNEL=y CONFIG_MODULE_SIG=y CONFIG_MODULE_SRCVERSION_ALL=y @@ -111,6 +112,8 @@ CONFIG_IP6_NF_FILTER=y CONFIG_NF_NAT=y CONFIG_PACKET=y CONFIG_RC_CORE=y +CONFIG_SAMPLES=y +CONFIG_SAMPLE_LIVEPATCH=m CONFIG_SECURITY=y CONFIG_SECURITYFS=y CONFIG_SYN_COOKIES=y diff --git a/tools/testing/selftests/bpf/prog_tests/livepatch_trampoline.c b/tools/testing/selftests/bpf/prog_tests/livepatch_trampoline.c new file mode 100644 index 000000000000..72aa5376c30e --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/livepatch_trampoline.c @@ -0,0 +1,107 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include +#include "testing_helpers.h" +#include "livepatch_trampoline.skel.h" + +static int load_livepatch(void) +{ + char path[4096]; + + /* CI will set KBUILD_OUTPUT */ + snprintf(path, sizeof(path), "%s/samples/livepatch/livepatch-sample.ko", + getenv("KBUILD_OUTPUT") ? : "../../../.."); + + return load_module(path, env_verbosity > VERBOSE_NONE); +} + +static void unload_livepatch(void) +{ + /* Disable the livepatch before unloading the module */ + system("echo 0 > /sys/kernel/livepatch/livepatch_sample/enabled"); + + unload_module("livepatch_sample", env_verbosity > VERBOSE_NONE); +} + +static void read_proc_cmdline(void) +{ + char buf[4096]; + int fd, ret; + + fd = open("/proc/cmdline", O_RDONLY); + if (!ASSERT_OK_FD(fd, "open /proc/cmdline")) + return; + + ret = read(fd, buf, sizeof(buf)); + if (!ASSERT_GT(ret, 0, "read /proc/cmdline")) + goto out; + + ASSERT_OK(strncmp(buf, "this has been live patched", 26), "strncmp"); + +out: + close(fd); +} + +static void __test_livepatch_trampoline(bool fexit_first) +{ + struct livepatch_trampoline *skel = NULL; + int err; + + skel = livepatch_trampoline__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) + goto out; + + skel->bss->my_pid = getpid(); + + if (!fexit_first) { + /* fentry program is loaded first by default */ + err = livepatch_trampoline__attach(skel); + if (!ASSERT_OK(err, "skel_attach")) + goto out; + } else { + /* Manually load fexit program first. */ + skel->links.fexit_cmdline = bpf_program__attach(skel->progs.fexit_cmdline); + if (!ASSERT_OK_PTR(skel->links.fexit_cmdline, "attach_fexit")) + goto out; + + skel->links.fentry_cmdline = bpf_program__attach(skel->progs.fentry_cmdline); + if (!ASSERT_OK_PTR(skel->links.fentry_cmdline, "attach_fentry")) + goto out; + } + + read_proc_cmdline(); + + ASSERT_EQ(skel->bss->fentry_hit, 1, "fentry_hit"); + ASSERT_EQ(skel->bss->fexit_hit, 1, "fexit_hit"); +out: + livepatch_trampoline__destroy(skel); +} + +void test_livepatch_trampoline(void) +{ + int retry_cnt = 0; + +retry: + if (load_livepatch()) { + if (retry_cnt) { + ASSERT_OK(1, "load_livepatch"); + goto out; + } + /* + * Something else (previous run of the same test?) loaded + * the KLP module. Unload the KLP module and retry. + */ + unload_livepatch(); + retry_cnt++; + goto retry; + } + + if (test__start_subtest("fentry_first")) + __test_livepatch_trampoline(false); + + if (test__start_subtest("fexit_first")) + __test_livepatch_trampoline(true); +out: + unload_livepatch(); +} diff --git a/tools/testing/selftests/bpf/progs/livepatch_trampoline.c b/tools/testing/selftests/bpf/progs/livepatch_trampoline.c new file mode 100644 index 000000000000..15579d5bcd91 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/livepatch_trampoline.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2025 Meta Platforms, Inc. and affiliates. */ + +#include +#include +#include + +int fentry_hit; +int fexit_hit; +int my_pid; + +SEC("fentry/cmdline_proc_show") +int BPF_PROG(fentry_cmdline) +{ + if (my_pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + + fentry_hit = 1; + return 0; +} + +SEC("fexit/cmdline_proc_show") +int BPF_PROG(fexit_cmdline) +{ + if (my_pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + + fexit_hit = 1; + return 0; +} -- cgit v1.2.3 From ea0714d61dea6e00b853a0116d0afe2b2fe70ef3 Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Tue, 4 Nov 2025 22:54:25 +0000 Subject: bpf:add _impl suffix for bpf_task_work_schedule* kfuncs Rename: bpf_task_work_schedule_resume()->bpf_task_work_schedule_resume_impl() bpf_task_work_schedule_signal()->bpf_task_work_schedule_signal_impl() This aligns task work scheduling kfuncs with the established naming scheme for kfuncs with the bpf_prog_aux argument provided by the verifier implicitly. This convention will be taken advantage of with the upcoming KF_IMPLICIT_ARGS feature to preserve backwards compatibility to BPF programs. Acked-by: Andrii Nakryiko Signed-off-by: Mykyta Yatsenko Link: https://lore.kernel.org/r/20251104-implv2-v3-1-4772b9ae0e06@meta.com Signed-off-by: Alexei Starovoitov Acked-by: Ihor Solodrai --- kernel/bpf/helpers.c | 24 +++++++++++++--------- kernel/bpf/verifier.c | 12 +++++------ tools/testing/selftests/bpf/progs/task_work.c | 6 +++--- tools/testing/selftests/bpf/progs/task_work_fail.c | 8 ++++---- .../testing/selftests/bpf/progs/task_work_stress.c | 4 ++-- 5 files changed, 29 insertions(+), 25 deletions(-) (limited to 'tools/testing') diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index eb25e70e0bdc..33173b027ccf 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -4169,7 +4169,8 @@ release_prog: } /** - * bpf_task_work_schedule_signal - Schedule BPF callback using task_work_add with TWA_SIGNAL mode + * bpf_task_work_schedule_signal_impl - Schedule BPF callback using task_work_add with TWA_SIGNAL + * mode * @task: Task struct for which callback should be scheduled * @tw: Pointer to struct bpf_task_work in BPF map value for internal bookkeeping * @map__map: bpf_map that embeds struct bpf_task_work in the values @@ -4178,15 +4179,17 @@ release_prog: * * Return: 0 if task work has been scheduled successfully, negative error code otherwise */ -__bpf_kfunc int bpf_task_work_schedule_signal(struct task_struct *task, struct bpf_task_work *tw, - void *map__map, bpf_task_work_callback_t callback, - void *aux__prog) +__bpf_kfunc int bpf_task_work_schedule_signal_impl(struct task_struct *task, + struct bpf_task_work *tw, void *map__map, + bpf_task_work_callback_t callback, + void *aux__prog) { return bpf_task_work_schedule(task, tw, map__map, callback, aux__prog, TWA_SIGNAL); } /** - * bpf_task_work_schedule_resume - Schedule BPF callback using task_work_add with TWA_RESUME mode + * bpf_task_work_schedule_resume_impl - Schedule BPF callback using task_work_add with TWA_RESUME + * mode * @task: Task struct for which callback should be scheduled * @tw: Pointer to struct bpf_task_work in BPF map value for internal bookkeeping * @map__map: bpf_map that embeds struct bpf_task_work in the values @@ -4195,9 +4198,10 @@ __bpf_kfunc int bpf_task_work_schedule_signal(struct task_struct *task, struct b * * Return: 0 if task work has been scheduled successfully, negative error code otherwise */ -__bpf_kfunc int bpf_task_work_schedule_resume(struct task_struct *task, struct bpf_task_work *tw, - void *map__map, bpf_task_work_callback_t callback, - void *aux__prog) +__bpf_kfunc int bpf_task_work_schedule_resume_impl(struct task_struct *task, + struct bpf_task_work *tw, void *map__map, + bpf_task_work_callback_t callback, + void *aux__prog) { return bpf_task_work_schedule(task, tw, map__map, callback, aux__prog, TWA_RESUME); } @@ -4377,8 +4381,8 @@ BTF_ID_FLAGS(func, bpf_strnstr); BTF_ID_FLAGS(func, bpf_cgroup_read_xattr, KF_RCU) #endif BTF_ID_FLAGS(func, bpf_stream_vprintk, KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, bpf_task_work_schedule_signal, KF_TRUSTED_ARGS) -BTF_ID_FLAGS(func, bpf_task_work_schedule_resume, KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_task_work_schedule_signal_impl, KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_task_work_schedule_resume_impl, KF_TRUSTED_ARGS) BTF_KFUNCS_END(common_btf_ids) static const struct btf_kfunc_id_set common_kfunc_set = { diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index ff40e5e65c43..8314518c8d93 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -12259,8 +12259,8 @@ enum special_kfunc_type { KF_bpf_res_spin_lock_irqsave, KF_bpf_res_spin_unlock_irqrestore, KF___bpf_trap, - KF_bpf_task_work_schedule_signal, - KF_bpf_task_work_schedule_resume, + KF_bpf_task_work_schedule_signal_impl, + KF_bpf_task_work_schedule_resume_impl, }; BTF_ID_LIST(special_kfunc_list) @@ -12331,13 +12331,13 @@ BTF_ID(func, bpf_res_spin_unlock) BTF_ID(func, bpf_res_spin_lock_irqsave) BTF_ID(func, bpf_res_spin_unlock_irqrestore) BTF_ID(func, __bpf_trap) -BTF_ID(func, bpf_task_work_schedule_signal) -BTF_ID(func, bpf_task_work_schedule_resume) +BTF_ID(func, bpf_task_work_schedule_signal_impl) +BTF_ID(func, bpf_task_work_schedule_resume_impl) static bool is_task_work_add_kfunc(u32 func_id) { - return func_id == special_kfunc_list[KF_bpf_task_work_schedule_signal] || - func_id == special_kfunc_list[KF_bpf_task_work_schedule_resume]; + return func_id == special_kfunc_list[KF_bpf_task_work_schedule_signal_impl] || + func_id == special_kfunc_list[KF_bpf_task_work_schedule_resume_impl]; } static bool is_kfunc_ret_null(struct bpf_kfunc_call_arg_meta *meta) diff --git a/tools/testing/selftests/bpf/progs/task_work.c b/tools/testing/selftests/bpf/progs/task_work.c index 23217f06a3ec..663a80990f8f 100644 --- a/tools/testing/selftests/bpf/progs/task_work.c +++ b/tools/testing/selftests/bpf/progs/task_work.c @@ -66,7 +66,7 @@ int oncpu_hash_map(struct pt_regs *args) if (!work) return 0; - bpf_task_work_schedule_resume(task, &work->tw, &hmap, process_work, NULL); + bpf_task_work_schedule_resume_impl(task, &work->tw, &hmap, process_work, NULL); return 0; } @@ -80,7 +80,7 @@ int oncpu_array_map(struct pt_regs *args) work = bpf_map_lookup_elem(&arrmap, &key); if (!work) return 0; - bpf_task_work_schedule_signal(task, &work->tw, &arrmap, process_work, NULL); + bpf_task_work_schedule_signal_impl(task, &work->tw, &arrmap, process_work, NULL); return 0; } @@ -102,6 +102,6 @@ int oncpu_lru_map(struct pt_regs *args) work = bpf_map_lookup_elem(&lrumap, &key); if (!work || work->data[0]) return 0; - bpf_task_work_schedule_resume(task, &work->tw, &lrumap, process_work, NULL); + bpf_task_work_schedule_resume_impl(task, &work->tw, &lrumap, process_work, NULL); return 0; } diff --git a/tools/testing/selftests/bpf/progs/task_work_fail.c b/tools/testing/selftests/bpf/progs/task_work_fail.c index 77fe8f28facd..1270953fd092 100644 --- a/tools/testing/selftests/bpf/progs/task_work_fail.c +++ b/tools/testing/selftests/bpf/progs/task_work_fail.c @@ -53,7 +53,7 @@ int mismatch_map(struct pt_regs *args) work = bpf_map_lookup_elem(&arrmap, &key); if (!work) return 0; - bpf_task_work_schedule_resume(task, &work->tw, &hmap, process_work, NULL); + bpf_task_work_schedule_resume_impl(task, &work->tw, &hmap, process_work, NULL); return 0; } @@ -65,7 +65,7 @@ int no_map_task_work(struct pt_regs *args) struct bpf_task_work tw; task = bpf_get_current_task_btf(); - bpf_task_work_schedule_resume(task, &tw, &hmap, process_work, NULL); + bpf_task_work_schedule_resume_impl(task, &tw, &hmap, process_work, NULL); return 0; } @@ -76,7 +76,7 @@ int task_work_null(struct pt_regs *args) struct task_struct *task; task = bpf_get_current_task_btf(); - bpf_task_work_schedule_resume(task, NULL, &hmap, process_work, NULL); + bpf_task_work_schedule_resume_impl(task, NULL, &hmap, process_work, NULL); return 0; } @@ -91,6 +91,6 @@ int map_null(struct pt_regs *args) work = bpf_map_lookup_elem(&arrmap, &key); if (!work) return 0; - bpf_task_work_schedule_resume(task, &work->tw, NULL, process_work, NULL); + bpf_task_work_schedule_resume_impl(task, &work->tw, NULL, process_work, NULL); return 0; } diff --git a/tools/testing/selftests/bpf/progs/task_work_stress.c b/tools/testing/selftests/bpf/progs/task_work_stress.c index 90fca06fff56..55e555f7f41b 100644 --- a/tools/testing/selftests/bpf/progs/task_work_stress.c +++ b/tools/testing/selftests/bpf/progs/task_work_stress.c @@ -51,8 +51,8 @@ int schedule_task_work(void *ctx) if (!work) return 0; } - err = bpf_task_work_schedule_signal(bpf_get_current_task_btf(), &work->tw, &hmap, - process_work, NULL); + err = bpf_task_work_schedule_signal_impl(bpf_get_current_task_btf(), &work->tw, &hmap, + process_work, NULL); if (err) __sync_fetch_and_add(&schedule_error, 1); else -- cgit v1.2.3 From 137cc92ffe2e71705fce112656a460d924934ebe Mon Sep 17 00:00:00 2001 From: Mykyta Yatsenko Date: Tue, 4 Nov 2025 22:54:26 +0000 Subject: bpf: add _impl suffix for bpf_stream_vprintk() kfunc Rename bpf_stream_vprintk() to bpf_stream_vprintk_impl(). This makes bpf_stream_vprintk() follow the already established "_impl" suffix-based naming convention for kfuncs with the bpf_prog_aux argument provided by the verifier implicitly. This convention will be taken advantage of with the upcoming KF_IMPLICIT_ARGS feature to preserve backwards compatibility to BPF programs. Acked-by: Andrii Nakryiko Signed-off-by: Mykyta Yatsenko Link: https://lore.kernel.org/r/20251104-implv2-v3-2-4772b9ae0e06@meta.com Signed-off-by: Alexei Starovoitov Acked-by: Ihor Solodrai --- kernel/bpf/helpers.c | 2 +- kernel/bpf/stream.c | 3 ++- tools/bpf/bpftool/Documentation/bpftool-prog.rst | 2 +- tools/lib/bpf/bpf_helpers.h | 28 ++++++++++++------------ tools/testing/selftests/bpf/progs/stream_fail.c | 6 ++--- 5 files changed, 21 insertions(+), 20 deletions(-) (limited to 'tools/testing') diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c index 33173b027ccf..e4007fea4909 100644 --- a/kernel/bpf/helpers.c +++ b/kernel/bpf/helpers.c @@ -4380,7 +4380,7 @@ BTF_ID_FLAGS(func, bpf_strnstr); #if defined(CONFIG_BPF_LSM) && defined(CONFIG_CGROUPS) BTF_ID_FLAGS(func, bpf_cgroup_read_xattr, KF_RCU) #endif -BTF_ID_FLAGS(func, bpf_stream_vprintk, KF_TRUSTED_ARGS) +BTF_ID_FLAGS(func, bpf_stream_vprintk_impl, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_task_work_schedule_signal_impl, KF_TRUSTED_ARGS) BTF_ID_FLAGS(func, bpf_task_work_schedule_resume_impl, KF_TRUSTED_ARGS) BTF_KFUNCS_END(common_btf_ids) diff --git a/kernel/bpf/stream.c b/kernel/bpf/stream.c index eb6c5a21c2ef..ff16c631951b 100644 --- a/kernel/bpf/stream.c +++ b/kernel/bpf/stream.c @@ -355,7 +355,8 @@ __bpf_kfunc_start_defs(); * Avoid using enum bpf_stream_id so that kfunc users don't have to pull in the * enum in headers. */ -__bpf_kfunc int bpf_stream_vprintk(int stream_id, const char *fmt__str, const void *args, u32 len__sz, void *aux__prog) +__bpf_kfunc int bpf_stream_vprintk_impl(int stream_id, const char *fmt__str, const void *args, + u32 len__sz, void *aux__prog) { struct bpf_bprintf_data data = { .get_bin_args = true, diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst index 009633294b09..35aeeaf5f711 100644 --- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst +++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst @@ -182,7 +182,7 @@ bpftool prog tracelog bpftool prog tracelog { stdout | stderr } *PROG* Dump the BPF stream of the program. BPF programs can write to these streams - at runtime with the **bpf_stream_vprintk**\ () kfunc. The kernel may write + at runtime with the **bpf_stream_vprintk_impl**\ () kfunc. The kernel may write error messages to the standard error stream. This facility should be used only for debugging purposes. diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h index 80c028540656..d4e4e388e625 100644 --- a/tools/lib/bpf/bpf_helpers.h +++ b/tools/lib/bpf/bpf_helpers.h @@ -315,20 +315,20 @@ enum libbpf_tristate { ___param, sizeof(___param)); \ }) -extern int bpf_stream_vprintk(int stream_id, const char *fmt__str, const void *args, - __u32 len__sz, void *aux__prog) __weak __ksym; - -#define bpf_stream_printk(stream_id, fmt, args...) \ -({ \ - static const char ___fmt[] = fmt; \ - unsigned long long ___param[___bpf_narg(args)]; \ - \ - _Pragma("GCC diagnostic push") \ - _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ - ___bpf_fill(___param, args); \ - _Pragma("GCC diagnostic pop") \ - \ - bpf_stream_vprintk(stream_id, ___fmt, ___param, sizeof(___param), NULL);\ +extern int bpf_stream_vprintk_impl(int stream_id, const char *fmt__str, const void *args, + __u32 len__sz, void *aux__prog) __weak __ksym; + +#define bpf_stream_printk(stream_id, fmt, args...) \ +({ \ + static const char ___fmt[] = fmt; \ + unsigned long long ___param[___bpf_narg(args)]; \ + \ + _Pragma("GCC diagnostic push") \ + _Pragma("GCC diagnostic ignored \"-Wint-conversion\"") \ + ___bpf_fill(___param, args); \ + _Pragma("GCC diagnostic pop") \ + \ + bpf_stream_vprintk_impl(stream_id, ___fmt, ___param, sizeof(___param), NULL); \ }) /* Use __bpf_printk when bpf_printk call has 3 or fewer fmt args diff --git a/tools/testing/selftests/bpf/progs/stream_fail.c b/tools/testing/selftests/bpf/progs/stream_fail.c index b4a0d0cc8ec8..3662515f0107 100644 --- a/tools/testing/selftests/bpf/progs/stream_fail.c +++ b/tools/testing/selftests/bpf/progs/stream_fail.c @@ -10,7 +10,7 @@ SEC("syscall") __failure __msg("Possibly NULL pointer passed") int stream_vprintk_null_arg(void *ctx) { - bpf_stream_vprintk(BPF_STDOUT, "", NULL, 0, NULL); + bpf_stream_vprintk_impl(BPF_STDOUT, "", NULL, 0, NULL); return 0; } @@ -18,7 +18,7 @@ SEC("syscall") __failure __msg("R3 type=scalar expected=") int stream_vprintk_scalar_arg(void *ctx) { - bpf_stream_vprintk(BPF_STDOUT, "", (void *)46, 0, NULL); + bpf_stream_vprintk_impl(BPF_STDOUT, "", (void *)46, 0, NULL); return 0; } @@ -26,7 +26,7 @@ SEC("syscall") __failure __msg("arg#1 doesn't point to a const string") int stream_vprintk_string_arg(void *ctx) { - bpf_stream_vprintk(BPF_STDOUT, ctx, NULL, 0, NULL); + bpf_stream_vprintk_impl(BPF_STDOUT, ctx, NULL, 0, NULL); return 0; } -- cgit v1.2.3 From afb47765f9235181fddc61c8633b5a8cfae29fd2 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 4 Nov 2025 14:11:49 -0400 Subject: iommufd: Make vfio_compat's unmap succeed if the range is already empty iommufd returns ENOENT when attempting to unmap a range that is already empty, while vfio type1 returns success. Fix vfio_compat to match. Fixes: d624d6652a65 ("iommufd: vfio container FD ioctl compatibility") Link: https://patch.msgid.link/r/0-v1-76be45eff0be+5d-iommufd_unmap_compat_jgg@nvidia.com Reviewed-by: Nicolin Chen Reviewed-by: Alex Mastro Reported-by: Alex Mastro Closes: https://lore.kernel.org/r/aP0S5ZF9l3sWkJ1G@devgpu012.nha5.facebook.com Signed-off-by: Jason Gunthorpe --- drivers/iommu/iommufd/io_pagetable.c | 12 +++--------- drivers/iommu/iommufd/ioas.c | 4 ++++ tools/testing/selftests/iommu/iommufd.c | 2 ++ 3 files changed, 9 insertions(+), 9 deletions(-) (limited to 'tools/testing') diff --git a/drivers/iommu/iommufd/io_pagetable.c b/drivers/iommu/iommufd/io_pagetable.c index c0360c450880..75d60f2ad900 100644 --- a/drivers/iommu/iommufd/io_pagetable.c +++ b/drivers/iommu/iommufd/io_pagetable.c @@ -707,7 +707,8 @@ static int iopt_unmap_iova_range(struct io_pagetable *iopt, unsigned long start, struct iopt_area *area; unsigned long unmapped_bytes = 0; unsigned int tries = 0; - int rc = -ENOENT; + /* If there are no mapped entries then success */ + int rc = 0; /* * The domains_rwsem must be held in read mode any time any area->pages @@ -777,8 +778,6 @@ again: down_write(&iopt->iova_rwsem); } - if (unmapped_bytes) - rc = 0; out_unlock_iova: up_write(&iopt->iova_rwsem); @@ -815,13 +814,8 @@ int iopt_unmap_iova(struct io_pagetable *iopt, unsigned long iova, int iopt_unmap_all(struct io_pagetable *iopt, unsigned long *unmapped) { - int rc; - - rc = iopt_unmap_iova_range(iopt, 0, ULONG_MAX, unmapped); /* If the IOVAs are empty then unmap all succeeds */ - if (rc == -ENOENT) - return 0; - return rc; + return iopt_unmap_iova_range(iopt, 0, ULONG_MAX, unmapped); } /* The caller must always free all the nodes in the allowed_iova rb_root. */ diff --git a/drivers/iommu/iommufd/ioas.c b/drivers/iommu/iommufd/ioas.c index 1542c5fd10a8..459a7c516915 100644 --- a/drivers/iommu/iommufd/ioas.c +++ b/drivers/iommu/iommufd/ioas.c @@ -367,6 +367,10 @@ int iommufd_ioas_unmap(struct iommufd_ucmd *ucmd) &unmapped); if (rc) goto out_put; + if (!unmapped) { + rc = -ENOENT; + goto out_put; + } } cmd->length = unmapped; diff --git a/tools/testing/selftests/iommu/iommufd.c b/tools/testing/selftests/iommu/iommufd.c index 3eebf5e3b974..bb4d33dde3c8 100644 --- a/tools/testing/selftests/iommu/iommufd.c +++ b/tools/testing/selftests/iommu/iommufd.c @@ -2638,6 +2638,8 @@ TEST_F(vfio_compat_mock_domain, map) ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd)); ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd)); ASSERT_EQ(BUFFER_SIZE, unmap_cmd.size); + /* Unmap of empty is success */ + ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_UNMAP_DMA, &unmap_cmd)); /* UNMAP_FLAG_ALL requires 0 iova/size */ ASSERT_EQ(0, ioctl(self->fd, VFIO_IOMMU_MAP_DMA, &map_cmd)); -- cgit v1.2.3 From c9e208fa93cd66f8077ee15df0728e62b105a687 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 4 Nov 2025 22:54:04 +0100 Subject: selftests/bpf: Add stacktrace ips test for kprobe_multi/kretprobe_multi Adding test that attaches kprobe/kretprobe multi and verifies the ORC stacktrace matches expected functions. Adding bpf_testmod_stacktrace_test function to bpf_testmod kernel module which is called through several functions so we get reliable call path for stacktrace. The test is only for ORC unwinder to keep it simple. Signed-off-by: Jiri Olsa Link: https://lore.kernel.org/r/20251104215405.168643-4-jolsa@kernel.org Signed-off-by: Alexei Starovoitov Acked-by: Steven Rostedt (Google) --- .../selftests/bpf/prog_tests/stacktrace_ips.c | 104 +++++++++++++++++++++ tools/testing/selftests/bpf/progs/stacktrace_ips.c | 41 ++++++++ .../testing/selftests/bpf/test_kmods/bpf_testmod.c | 26 ++++++ 3 files changed, 171 insertions(+) create mode 100644 tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c create mode 100644 tools/testing/selftests/bpf/progs/stacktrace_ips.c (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c new file mode 100644 index 000000000000..6fca459ba550 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0 +#include +#include "stacktrace_ips.skel.h" + +#ifdef __x86_64__ +static int check_stacktrace_ips(int fd, __u32 key, int cnt, ...) +{ + __u64 ips[PERF_MAX_STACK_DEPTH]; + struct ksyms *ksyms = NULL; + int i, err = 0; + va_list args; + + /* sorted by addr */ + ksyms = load_kallsyms_local(); + if (!ASSERT_OK_PTR(ksyms, "load_kallsyms_local")) + return -1; + + /* unlikely, but... */ + if (!ASSERT_LT(cnt, PERF_MAX_STACK_DEPTH, "check_max")) + return -1; + + err = bpf_map_lookup_elem(fd, &key, ips); + if (err) + goto out; + + /* + * Compare all symbols provided via arguments with stacktrace ips, + * and their related symbol addresses.t + */ + va_start(args, cnt); + + for (i = 0; i < cnt; i++) { + unsigned long val; + struct ksym *ksym; + + val = va_arg(args, unsigned long); + ksym = ksym_search_local(ksyms, ips[i]); + if (!ASSERT_OK_PTR(ksym, "ksym_search_local")) + break; + ASSERT_EQ(ksym->addr, val, "stack_cmp"); + } + + va_end(args); + +out: + free_kallsyms_local(ksyms); + return err; +} + +static void test_stacktrace_ips_kprobe_multi(bool retprobe) +{ + LIBBPF_OPTS(bpf_kprobe_multi_opts, opts, + .retprobe = retprobe + ); + LIBBPF_OPTS(bpf_test_run_opts, topts); + struct stacktrace_ips *skel; + + skel = stacktrace_ips__open_and_load(); + if (!ASSERT_OK_PTR(skel, "stacktrace_ips__open_and_load")) + return; + + if (!skel->kconfig->CONFIG_UNWINDER_ORC) { + test__skip(); + goto cleanup; + } + + skel->links.kprobe_multi_test = bpf_program__attach_kprobe_multi_opts( + skel->progs.kprobe_multi_test, + "bpf_testmod_stacktrace_test", &opts); + if (!ASSERT_OK_PTR(skel->links.kprobe_multi_test, "bpf_program__attach_kprobe_multi_opts")) + goto cleanup; + + trigger_module_test_read(1); + + load_kallsyms(); + + check_stacktrace_ips(bpf_map__fd(skel->maps.stackmap), skel->bss->stack_key, 4, + ksym_get_addr("bpf_testmod_stacktrace_test_3"), + ksym_get_addr("bpf_testmod_stacktrace_test_2"), + ksym_get_addr("bpf_testmod_stacktrace_test_1"), + ksym_get_addr("bpf_testmod_test_read")); + +cleanup: + stacktrace_ips__destroy(skel); +} + +static void __test_stacktrace_ips(void) +{ + if (test__start_subtest("kprobe_multi")) + test_stacktrace_ips_kprobe_multi(false); + if (test__start_subtest("kretprobe_multi")) + test_stacktrace_ips_kprobe_multi(true); +} +#else +static void __test_stacktrace_ips(void) +{ + test__skip(); +} +#endif + +void test_stacktrace_ips(void) +{ + __test_stacktrace_ips(); +} diff --git a/tools/testing/selftests/bpf/progs/stacktrace_ips.c b/tools/testing/selftests/bpf/progs/stacktrace_ips.c new file mode 100644 index 000000000000..e2eb30945c1b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/stacktrace_ips.c @@ -0,0 +1,41 @@ +// SPDX-License-Identifier: GPL-2.0 +// Copyright (c) 2018 Facebook + +#include +#include +#include + +#ifndef PERF_MAX_STACK_DEPTH +#define PERF_MAX_STACK_DEPTH 127 +#endif + +typedef __u64 stack_trace_t[PERF_MAX_STACK_DEPTH]; + +struct { + __uint(type, BPF_MAP_TYPE_STACK_TRACE); + __uint(max_entries, 16384); + __type(key, __u32); + __type(value, stack_trace_t); +} stackmap SEC(".maps"); + +extern bool CONFIG_UNWINDER_ORC __kconfig __weak; + +/* + * This function is here to have CONFIG_UNWINDER_ORC + * used and added to object BTF. + */ +int unused(void) +{ + return CONFIG_UNWINDER_ORC ? 0 : 1; +} + +__u32 stack_key; + +SEC("kprobe.multi") +int kprobe_multi_test(struct pt_regs *ctx) +{ + stack_key = bpf_get_stackid(ctx, &stackmap, 0); + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c index 8074bc5f6f20..ed0a4721d8fd 100644 --- a/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c +++ b/tools/testing/selftests/bpf/test_kmods/bpf_testmod.c @@ -417,6 +417,30 @@ noinline int bpf_testmod_fentry_test11(u64 a, void *b, short c, int d, return a + (long)b + c + d + (long)e + f + g + h + i + j + k; } +noinline void bpf_testmod_stacktrace_test(void) +{ + /* used for stacktrace test as attach function */ + asm volatile (""); +} + +noinline void bpf_testmod_stacktrace_test_3(void) +{ + bpf_testmod_stacktrace_test(); + asm volatile (""); +} + +noinline void bpf_testmod_stacktrace_test_2(void) +{ + bpf_testmod_stacktrace_test_3(); + asm volatile (""); +} + +noinline void bpf_testmod_stacktrace_test_1(void) +{ + bpf_testmod_stacktrace_test_2(); + asm volatile (""); +} + int bpf_testmod_fentry_ok; noinline ssize_t @@ -497,6 +521,8 @@ bpf_testmod_test_read(struct file *file, struct kobject *kobj, 21, 22, 23, 24, 25, 26) != 231) goto out; + bpf_testmod_stacktrace_test_1(); + bpf_testmod_fentry_ok = 1; out: return -EIO; /* always fail */ -- cgit v1.2.3 From 3490d29964bdd524366d266b655112cb549c7460 Mon Sep 17 00:00:00 2001 From: Jiri Olsa Date: Tue, 4 Nov 2025 22:54:05 +0100 Subject: selftests/bpf: Add stacktrace ips test for raw_tp Adding test that verifies we get expected initial 2 entries from stacktrace for rawtp probe via ORC unwind. Signed-off-by: Jiri Olsa Link: https://lore.kernel.org/r/20251104215405.168643-5-jolsa@kernel.org Signed-off-by: Alexei Starovoitov Acked-by: Steven Rostedt (Google) --- .../selftests/bpf/prog_tests/stacktrace_ips.c | 46 ++++++++++++++++++++++ tools/testing/selftests/bpf/progs/stacktrace_ips.c | 8 ++++ 2 files changed, 54 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c b/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c index 6fca459ba550..c9efdd2a5b18 100644 --- a/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c +++ b/tools/testing/selftests/bpf/prog_tests/stacktrace_ips.c @@ -84,12 +84,58 @@ cleanup: stacktrace_ips__destroy(skel); } +static void test_stacktrace_ips_raw_tp(void) +{ + __u32 info_len = sizeof(struct bpf_prog_info); + LIBBPF_OPTS(bpf_test_run_opts, topts); + struct bpf_prog_info info = {}; + struct stacktrace_ips *skel; + __u64 bpf_prog_ksym = 0; + int err; + + skel = stacktrace_ips__open_and_load(); + if (!ASSERT_OK_PTR(skel, "stacktrace_ips__open_and_load")) + return; + + if (!skel->kconfig->CONFIG_UNWINDER_ORC) { + test__skip(); + goto cleanup; + } + + skel->links.rawtp_test = bpf_program__attach_raw_tracepoint( + skel->progs.rawtp_test, + "bpf_testmod_test_read"); + if (!ASSERT_OK_PTR(skel->links.rawtp_test, "bpf_program__attach_raw_tracepoint")) + goto cleanup; + + /* get bpf program address */ + info.jited_ksyms = ptr_to_u64(&bpf_prog_ksym); + info.nr_jited_ksyms = 1; + err = bpf_prog_get_info_by_fd(bpf_program__fd(skel->progs.rawtp_test), + &info, &info_len); + if (!ASSERT_OK(err, "bpf_prog_get_info_by_fd")) + goto cleanup; + + trigger_module_test_read(1); + + load_kallsyms(); + + check_stacktrace_ips(bpf_map__fd(skel->maps.stackmap), skel->bss->stack_key, 2, + bpf_prog_ksym, + ksym_get_addr("bpf_trace_run2")); + +cleanup: + stacktrace_ips__destroy(skel); +} + static void __test_stacktrace_ips(void) { if (test__start_subtest("kprobe_multi")) test_stacktrace_ips_kprobe_multi(false); if (test__start_subtest("kretprobe_multi")) test_stacktrace_ips_kprobe_multi(true); + if (test__start_subtest("raw_tp")) + test_stacktrace_ips_raw_tp(); } #else static void __test_stacktrace_ips(void) diff --git a/tools/testing/selftests/bpf/progs/stacktrace_ips.c b/tools/testing/selftests/bpf/progs/stacktrace_ips.c index e2eb30945c1b..a96c8150d7f5 100644 --- a/tools/testing/selftests/bpf/progs/stacktrace_ips.c +++ b/tools/testing/selftests/bpf/progs/stacktrace_ips.c @@ -38,4 +38,12 @@ int kprobe_multi_test(struct pt_regs *ctx) return 0; } +SEC("raw_tp/bpf_testmod_test_read") +int rawtp_test(void *ctx) +{ + /* Skip ebpf program entry in the stack. */ + stack_key = bpf_get_stackid(ctx, &stackmap, 0); + return 0; +} + char _license[] SEC("license") = "GPL"; -- cgit v1.2.3 From 3534e03e0ec2e00908765549828a69df5ebefb91 Mon Sep 17 00:00:00 2001 From: Bobby Eshleman Date: Wed, 5 Nov 2025 07:59:19 -0800 Subject: selftests/vsock: avoid false-positives when checking dmesg Sometimes VMs will have some intermittent dmesg warnings that are unrelated to vsock. Change the dmesg parsing to filter on strings containing 'vsock' to avoid false positive failures that are unrelated to vsock. The downside is that it is possible for some vsock related warnings to not contain the substring 'vsock', so those will be missed. Fixes: a4a65c6fe08b ("selftests/vsock: add initial vmtest.sh for vsock") Reviewed-by: Simon Horman Signed-off-by: Bobby Eshleman Reviewed-by: Stefano Garzarella Link: https://patch.msgid.link/20251105-vsock-vmtest-dmesg-fix-v2-1-1a042a14892c@meta.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/vsock/vmtest.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/vsock/vmtest.sh b/tools/testing/selftests/vsock/vmtest.sh index edacebfc1632..8ceeb8a7894f 100755 --- a/tools/testing/selftests/vsock/vmtest.sh +++ b/tools/testing/selftests/vsock/vmtest.sh @@ -389,9 +389,9 @@ run_test() { local rc host_oops_cnt_before=$(dmesg | grep -c -i 'Oops') - host_warn_cnt_before=$(dmesg --level=warn | wc -l) + host_warn_cnt_before=$(dmesg --level=warn | grep -c -i 'vsock') vm_oops_cnt_before=$(vm_ssh -- dmesg | grep -c -i 'Oops') - vm_warn_cnt_before=$(vm_ssh -- dmesg --level=warn | wc -l) + vm_warn_cnt_before=$(vm_ssh -- dmesg --level=warn | grep -c -i 'vsock') name=$(echo "${1}" | awk '{ print $1 }') eval test_"${name}" @@ -403,7 +403,7 @@ run_test() { rc=$KSFT_FAIL fi - host_warn_cnt_after=$(dmesg --level=warn | wc -l) + host_warn_cnt_after=$(dmesg --level=warn | grep -c -i 'vsock') if [[ ${host_warn_cnt_after} -gt ${host_warn_cnt_before} ]]; then echo "FAIL: kernel warning detected on host" | log_host "${name}" rc=$KSFT_FAIL @@ -415,7 +415,7 @@ run_test() { rc=$KSFT_FAIL fi - vm_warn_cnt_after=$(vm_ssh -- dmesg --level=warn | wc -l) + vm_warn_cnt_after=$(vm_ssh -- dmesg --level=warn | grep -c -i 'vsock') if [[ ${vm_warn_cnt_after} -gt ${vm_warn_cnt_before} ]]; then echo "FAIL: kernel warning detected on vm" | log_host "${name}" rc=$KSFT_FAIL -- cgit v1.2.3 From 57531b3416448d1ced36a2a974a4085ec43d57b0 Mon Sep 17 00:00:00 2001 From: Alexander Sverdlin Date: Thu, 6 Nov 2025 17:12:09 +0100 Subject: selftests: net: local_termination: Wait for interfaces to come up It seems that most of the tests prepare the interfaces once before the test run (setup_prepare()), rely on setup_wait() to wait for link and only then run the test(s). local_termination brings the physical interfaces down and up during test run but never wait for them to come up. If the auto-negotiation takes some seconds, first test packets are being lost, which leads to false-negative test results. Use setup_wait() in run_test() to make sure auto-negotiation has been completed after all simple_if_init() calls on physical interfaces and test packets will not be lost because of the race against link establishment. Fixes: 90b9566aa5cd3f ("selftests: forwarding: add a test for local_termination.sh") Reviewed-by: Vladimir Oltean Signed-off-by: Alexander Sverdlin Link: https://patch.msgid.link/20251106161213.459501-1-alexander.sverdlin@siemens.com Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/forwarding/local_termination.sh | 2 ++ 1 file changed, 2 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/forwarding/local_termination.sh b/tools/testing/selftests/net/forwarding/local_termination.sh index ecd34f364125..892895659c7e 100755 --- a/tools/testing/selftests/net/forwarding/local_termination.sh +++ b/tools/testing/selftests/net/forwarding/local_termination.sh @@ -176,6 +176,8 @@ run_test() local rcv_dmac=$(mac_get $rcv_if_name) local should_receive + setup_wait + tcpdump_start $rcv_if_name mc_route_prepare $send_if_name -- cgit v1.2.3 From 60260ad935861f6b8db7c65c23faa41c98d8fb15 Mon Sep 17 00:00:00 2001 From: Victor Nogueira Date: Thu, 6 Nov 2025 17:56:21 -0300 Subject: selftests/tc-testing: Create tests trying to add children to clsact/ingress qdiscs In response to Wang's bug report [1], add the following test cases: - Try and fail to add an fq child to an ingress qdisc - Try and fail to add an fq child to a clsact qdisc [1] https://lore.kernel.org/netdev/20251105022213.1981982-1-wangliang74@huawei.com/ Reviewed-by: Pedro Tammela Acked-by: Jamal Hadi Salim Signed-off-by: Victor Nogueira Reviewed-by: Cong Wang Link: https://patch.msgid.link/20251106205621.3307639-2-victor@mojatatu.com Signed-off-by: Jakub Kicinski --- .../tc-testing/tc-tests/infra/qdiscs.json | 44 ++++++++++++++++++++++ 1 file changed, 44 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json index 998e5a2f4579..0091bcd91c2c 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json +++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json @@ -961,5 +961,49 @@ "teardown": [ "$TC qdisc del dev $DUMMY root" ] + }, + { + "id": "4989", + "name": "Try to add an fq child to an ingress qdisc", + "category": [ + "qdisc", + "ingress" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DUMMY handle ffff:0 ingress" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY parent ffff:0 handle ffe0:0 fq", + "expExitCode": "2", + "verifyCmd": "$TC -j qdisc ls dev $DUMMY handle ffe0:", + "matchJSON": [], + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY ingress" + ] + }, + { + "id": "c2b0", + "name": "Try to add an fq child to a clsact qdisc", + "category": [ + "qdisc", + "ingress" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + "$TC qdisc add dev $DUMMY handle ffff:0 clsact" + ], + "cmdUnderTest": "$TC qdisc add dev $DUMMY parent ffff:0 handle ffe0:0 fq", + "expExitCode": "2", + "verifyCmd": "$TC -j qdisc ls dev $DUMMY handle ffe0:", + "matchJSON": [], + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DUMMY clsact" + ] } ] -- cgit v1.2.3 From dd4adb986a86727ed8f56c48b6d0695f1e211e65 Mon Sep 17 00:00:00 2001 From: Steven Rostedt Date: Tue, 28 Oct 2025 12:27:24 -0400 Subject: selftests/tracing: Run sample events to clear page cache events The tracing selftest "event-filter-function.tc" was failing because it first runs the "sample_events" function that triggers the kmem_cache_free event and it looks at what function was used during a call to "ls". But the first time it calls this, it could trigger events that are used to pull pages into the page cache. The rest of the test uses the function it finds during that call to see if it will be called in subsequent "sample_events" calls. But if there's no need to pull pages into the page cache, it will not trigger that function and the test will fail. Call the "sample_events" twice to trigger all the page cache work before it calls it to find a function to use in subsequent checks. Cc: stable@vger.kernel.org Fixes: eb50d0f250e96 ("selftests/ftrace: Choose target function for filter test from samples") Signed-off-by: Steven Rostedt (Google) Acked-by: Masami Hiramatsu (Google) Signed-off-by: Shuah Khan --- tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc index c62165fabd0c..cfa16aa1f39a 100644 --- a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc +++ b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc @@ -20,6 +20,10 @@ sample_events() { echo 0 > tracing_on echo 0 > events/enable +# Clear functions caused by page cache; run sample_events twice +sample_events +sample_events + echo "Get the most frequently calling function" echo > trace sample_events -- cgit v1.2.3 From 39acc6a95eefcf814efa226d8813f89e7e03496e Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Fri, 7 Nov 2025 06:03:38 -0800 Subject: selftest: netcons: refactor target creation Extract the netconsole target creation from create_dynamic_target(), by moving it from create_dynamic_target() into a new helper function. This enables other tests to use the creation of netconsole targets with arbitrary parameters and no sleep. The new helper will be utilized by forthcoming torture-type selftests that require dynamic target management. Signed-off-by: Breno Leitao Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251107-netconsole_torture-v10-2-749227b55f63@debian.org Signed-off-by: Jakub Kicinski --- .../selftests/drivers/net/lib/sh/lib_netcons.sh | 30 ++++++++++++++-------- 1 file changed, 19 insertions(+), 11 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh index 8e1085e89647..9b5ef8074440 100644 --- a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh +++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh @@ -113,31 +113,39 @@ function set_network() { configure_ip } -function create_dynamic_target() { - local FORMAT=${1:-"extended"} +function _create_dynamic_target() { + local FORMAT="${1:?FORMAT parameter required}" + local NCPATH="${2:?NCPATH parameter required}" DSTMAC=$(ip netns exec "${NAMESPACE}" \ ip link show "${DSTIF}" | awk '/ether/ {print $2}') # Create a dynamic target - mkdir "${NETCONS_PATH}" + mkdir "${NCPATH}" - echo "${DSTIP}" > "${NETCONS_PATH}"/remote_ip - echo "${SRCIP}" > "${NETCONS_PATH}"/local_ip - echo "${DSTMAC}" > "${NETCONS_PATH}"/remote_mac - echo "${SRCIF}" > "${NETCONS_PATH}"/dev_name + echo "${DSTIP}" > "${NCPATH}"/remote_ip + echo "${SRCIP}" > "${NCPATH}"/local_ip + echo "${DSTMAC}" > "${NCPATH}"/remote_mac + echo "${SRCIF}" > "${NCPATH}"/dev_name if [ "${FORMAT}" == "basic" ] then # Basic target does not support release - echo 0 > "${NETCONS_PATH}"/release - echo 0 > "${NETCONS_PATH}"/extended + echo 0 > "${NCPATH}"/release + echo 0 > "${NCPATH}"/extended elif [ "${FORMAT}" == "extended" ] then - echo 1 > "${NETCONS_PATH}"/extended + echo 1 > "${NCPATH}"/extended fi - echo 1 > "${NETCONS_PATH}"/enabled + echo 1 > "${NCPATH}"/enabled + +} + +function create_dynamic_target() { + local FORMAT=${1:-"extended"} + local NCPATH=${2:-"$NETCONS_PATH"} + _create_dynamic_target "${FORMAT}" "${NCPATH}" # This will make sure that the kernel was able to # load the netconsole driver configuration. The console message -- cgit v1.2.3 From 6701896eb90998ff16338f199144bd9deefb79ba Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Fri, 7 Nov 2025 06:03:39 -0800 Subject: selftest: netcons: create a torture test Create a netconsole test that puts a lot of pressure on the netconsole list manipulation. Do it by creating dynamic targets and deleting targets while messages are being sent. Also put interface down while the messages are being sent, as creating parallel targets. The code launches three background jobs on distinct schedules: * Toggle netcons target every 30 iterations * create and delete random_target every 50 iterations * toggle iface every 70 iterations This creates multiple concurrency sources that interact with netconsole states. This is good practice to simulate stress, and exercise netpoll and netconsole locks. This test already found an issue as reported in [1] Link: https://lore.kernel.org/all/20250901-netpoll_memleak-v1-1-34a181977dfc@debian.org/ [1] Signed-off-by: Breno Leitao Reviewed-by: Andre Carvalho Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251107-netconsole_torture-v10-3-749227b55f63@debian.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/Makefile | 1 + .../selftests/drivers/net/netcons_torture.sh | 130 +++++++++++++++++++++ 2 files changed, 131 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/netcons_torture.sh (limited to 'tools/testing') diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile index 6e41635bd55a..71ee69e524d7 100644 --- a/tools/testing/selftests/drivers/net/Makefile +++ b/tools/testing/selftests/drivers/net/Makefile @@ -18,6 +18,7 @@ TEST_PROGS := \ netcons_fragmented_msg.sh \ netcons_overflow.sh \ netcons_sysdata.sh \ + netcons_torture.sh \ netpoll_basic.py \ ping.py \ psp.py \ diff --git a/tools/testing/selftests/drivers/net/netcons_torture.sh b/tools/testing/selftests/drivers/net/netcons_torture.sh new file mode 100755 index 000000000000..2ce9ee3719d1 --- /dev/null +++ b/tools/testing/selftests/drivers/net/netcons_torture.sh @@ -0,0 +1,130 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: GPL-2.0 + +# Repeatedly send kernel messages, toggles netconsole targets on and off, +# creates and deletes targets in parallel, and toggles the source interface to +# simulate stress conditions. +# +# This test aims to verify the robustness of netconsole under dynamic +# configurations and concurrent operations. +# +# The major goal is to run this test with LOCKDEP, Kmemleak and KASAN to make +# sure no issues is reported. +# +# Author: Breno Leitao + +set -euo pipefail + +SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") + +source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh + +# Number of times the main loop run +ITERATIONS=${1:-150} + +# Only test extended format +FORMAT="extended" +# And ipv6 only +IP_VERSION="ipv6" + +# Create, enable and delete some targets. +create_and_delete_random_target() { + COUNT=2 + RND_PREFIX=$(mktemp -u netcons_rnd_XXXX_) + + if [ -d "${NETCONS_CONFIGFS}/${RND_PREFIX}${COUNT}" ] || \ + [ -d "${NETCONS_CONFIGFS}/${RND_PREFIX}0" ]; then + echo "Function didn't finish yet, skipping it." >&2 + return + fi + + # enable COUNT targets + for i in $(seq ${COUNT}) + do + RND_TARGET="${RND_PREFIX}"${i} + RND_TARGET_PATH="${NETCONS_CONFIGFS}"/"${RND_TARGET}" + + # Basic population so the target can come up + _create_dynamic_target "${FORMAT}" "${RND_TARGET_PATH}" + done + + echo "netconsole selftest: ${COUNT} additional targets were created" > /dev/kmsg + # disable them all + for i in $(seq ${COUNT}) + do + RND_TARGET="${RND_PREFIX}"${i} + RND_TARGET_PATH="${NETCONS_CONFIGFS}"/"${RND_TARGET}" + if [[ $(cat "${RND_TARGET_PATH}/enabled") -eq 1 ]] + then + echo 0 > "${RND_TARGET_PATH}"/enabled + fi + rmdir "${RND_TARGET_PATH}" + done +} + +# Disable and enable the target mid-air, while messages +# are being transmitted. +toggle_netcons_target() { + for i in $(seq 2) + do + if [ ! -d "${NETCONS_PATH}" ] + then + break + fi + echo 0 > "${NETCONS_PATH}"/enabled 2> /dev/null || true + # Try to enable a bit harder, given it might fail to enable + # Write to `enabled` might fail depending on the lock, which is + # highly contentious here + for _ in $(seq 5) + do + echo 1 > "${NETCONS_PATH}"/enabled 2> /dev/null || true + done + done +} + +toggle_iface(){ + ip link set "${SRCIF}" down + ip link set "${SRCIF}" up +} + +# Start here + +modprobe netdevsim 2> /dev/null || true +modprobe netconsole 2> /dev/null || true + +# Check for basic system dependency and exit if not found +check_for_dependencies +# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5) +echo "6 5" > /proc/sys/kernel/printk +# Remove the namespace, interfaces and netconsole target on exit +trap cleanup EXIT +# Create one namespace and two interfaces +set_network "${IP_VERSION}" +# Create a dynamic target for netconsole +create_dynamic_target "${FORMAT}" + +for i in $(seq "$ITERATIONS") +do + for _ in $(seq 10) + do + echo "${MSG}: ${TARGET} ${i}" > /dev/kmsg + done + wait + + if (( i % 30 == 0 )); then + toggle_netcons_target & + fi + + if (( i % 50 == 0 )); then + # create some targets, enable them, send msg and disable + # all in a parallel thread + create_and_delete_random_target & + fi + + if (( i % 70 == 0 )); then + toggle_iface & + fi +done +wait + +exit "${EXIT_STATUS}" -- cgit v1.2.3 From 236682db3b6fe71cad76ac5e920ea4c14a33178e Mon Sep 17 00:00:00 2001 From: Breno Leitao Date: Fri, 7 Nov 2025 06:03:40 -0800 Subject: selftest: netcons: add test for netconsole over bonded interfaces This patch adds a selftest that verifies netconsole functionality over bonded network interfaces using netdevsim. It sets up two bonded interfaces acting as transmit (TX) and receive (RX) ends, placed in separate network namespaces. The test sends kernel log messages and verifies that they are properly received on the bonded RX interfaces with both IPv4 and IPv6, and using basic and extended netconsole formats. This patchset aims to test a long-standing netpoll subsystem where netpoll has multiple users. (in this case netconsole and bonding). A similar selftest has been discussed in [1] and [2]. This test also tries to enable bonding and netpoll in different order, just to guarantee that all the possibilities are exercised. Link: https://lore.kernel.org/all/20250905-netconsole_torture-v3-0-875c7febd316@debian.org/ [1] Link: https://lore.kernel.org/lkml/96b940137a50e5c387687bb4f57de8b0435a653f.1404857349.git.decot@googlers.com/ [2] Signed-off-by: Breno Leitao Reviewed-by: Simon Horman Link: https://patch.msgid.link/20251107-netconsole_torture-v10-4-749227b55f63@debian.org Signed-off-by: Jakub Kicinski --- .../testing/selftests/drivers/net/bonding/Makefile | 2 + tools/testing/selftests/drivers/net/bonding/config | 4 + .../drivers/net/bonding/netcons_over_bonding.sh | 361 +++++++++++++++++++++ .../selftests/drivers/net/lib/sh/lib_netcons.sh | 54 ++- 4 files changed, 414 insertions(+), 7 deletions(-) create mode 100755 tools/testing/selftests/drivers/net/bonding/netcons_over_bonding.sh (limited to 'tools/testing') diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile index 402d4ee84f2e..6c5c60adb5e8 100644 --- a/tools/testing/selftests/drivers/net/bonding/Makefile +++ b/tools/testing/selftests/drivers/net/bonding/Makefile @@ -14,6 +14,7 @@ TEST_PROGS := \ dev_addr_lists.sh \ mode-1-recovery-updelay.sh \ mode-2-recovery-updelay.sh \ + netcons_over_bonding.sh \ # end of TEST_PROGS TEST_FILES := \ @@ -24,6 +25,7 @@ TEST_FILES := \ TEST_INCLUDES := \ ../../../net/lib.sh \ + ../lib/sh/lib_netcons.sh \ ../../../net/forwarding/lib.sh \ # end of TEST_INCLUDES diff --git a/tools/testing/selftests/drivers/net/bonding/config b/tools/testing/selftests/drivers/net/bonding/config index 6bb290abd48b..991494376223 100644 --- a/tools/testing/selftests/drivers/net/bonding/config +++ b/tools/testing/selftests/drivers/net/bonding/config @@ -1,5 +1,6 @@ CONFIG_BONDING=y CONFIG_BRIDGE=y +CONFIG_CONFIGFS_FS=y CONFIG_DUMMY=y CONFIG_INET_ESP=y CONFIG_INET_ESP_OFFLOAD=y @@ -9,6 +10,9 @@ CONFIG_MACVLAN=y CONFIG_NET_ACT_GACT=y CONFIG_NET_CLS_FLOWER=y CONFIG_NET_CLS_MATCHALL=m +CONFIG_NETCONSOLE=m +CONFIG_NETCONSOLE_DYNAMIC=y +CONFIG_NETCONSOLE_EXTENDED_LOG=y CONFIG_NETDEVSIM=m CONFIG_NET_SCH_INGRESS=y CONFIG_NLMON=y diff --git a/tools/testing/selftests/drivers/net/bonding/netcons_over_bonding.sh b/tools/testing/selftests/drivers/net/bonding/netcons_over_bonding.sh new file mode 100755 index 000000000000..477cc9379500 --- /dev/null +++ b/tools/testing/selftests/drivers/net/bonding/netcons_over_bonding.sh @@ -0,0 +1,361 @@ +#!/usr/bin/env bash +# SPDX-License-Identifier: GPL-2.0 +# +# This selftest exercises trying to have multiple netpoll users at the same +# time. +# +# This selftest has multiple smalls test inside, and the goal is to +# get interfaces with bonding and netconsole in different orders in order +# to catch any possible issue. +# +# The main test composes of four interfaces being created using netdevsim; two +# of them are bonded to serve as the netconsole's transmit interface. The +# remaining two interfaces are similarly bonded and assigned to a separate +# network namespace, which acts as the receive interface, where socat monitors +# for incoming messages. +# +# A netconsole message is then sent to ensure it is properly received across +# this configuration. +# +# Later, run a few other tests, to make sure that bonding and netconsole +# cannot coexist. +# +# The test's objective is to exercise netpoll usage when managed simultaneously +# by multiple subsystems (netconsole and bonding). +# +# Author: Breno Leitao + +set -euo pipefail + +SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") + +source "${SCRIPTDIR}"/../lib/sh/lib_netcons.sh + +modprobe netdevsim 2> /dev/null || true +modprobe netconsole 2> /dev/null || true +modprobe bonding 2> /dev/null || true +modprobe veth 2> /dev/null || true + +# The content of kmsg will be save to the following file +OUTPUT_FILE="/tmp/${TARGET}" + +# Check for basic system dependency and exit if not found +check_for_dependencies +# Set current loglevel to KERN_INFO(6), and default to KERN_NOTICE(5) +echo "6 5" > /proc/sys/kernel/printk +# Remove the namespace, interfaces and netconsole target on exit +trap cleanup_bond EXIT + +FORMAT="extended" +IP_VERSION="ipv4" +VETH0="veth"$(( RANDOM % 256)) +VETH1="veth"$((256 + RANDOM % 256)) +TXNS="" +RXNS="" + +# Create "bond_tx_XX" and "bond_rx_XX" interfaces, and set DSTIF and SRCIF with +# the bonding interfaces +function setup_bonding_ifaces() { + local RAND=$(( RANDOM % 100 )) + BOND_TX_MAIN_IF="bond_tx_$RAND" + BOND_RX_MAIN_IF="bond_rx_$RAND" + + # Setup TX + if ! ip -n "${TXNS}" link add "${BOND_TX_MAIN_IF}" type bond mode balance-rr + then + echo "Failed to create bond TX interface. Is CONFIG_BONDING set?" >&2 + # only clean nsim ifaces and namespace. Nothing else has been + # initialized + cleanup_bond_nsim + trap - EXIT + exit "${ksft_skip}" + fi + + # create_netdevsim() got the interface up, but it needs to be down + # before being enslaved. + ip -n "${TXNS}" \ + link set "${BOND_TX1_SLAVE_IF}" down + ip -n "${TXNS}" \ + link set "${BOND_TX2_SLAVE_IF}" down + ip -n "${TXNS}" \ + link set "${BOND_TX1_SLAVE_IF}" master "${BOND_TX_MAIN_IF}" + ip -n "${TXNS}" \ + link set "${BOND_TX2_SLAVE_IF}" master "${BOND_TX_MAIN_IF}" + ip -n "${TXNS}" \ + link set "${BOND_TX_MAIN_IF}" up + + # Setup RX + ip -n "${RXNS}" \ + link add "${BOND_RX_MAIN_IF}" type bond mode balance-rr + ip -n "${RXNS}" \ + link set "${BOND_RX1_SLAVE_IF}" down + ip -n "${RXNS}" \ + link set "${BOND_RX2_SLAVE_IF}" down + ip -n "${RXNS}" \ + link set "${BOND_RX1_SLAVE_IF}" master "${BOND_RX_MAIN_IF}" + ip -n "${RXNS}" \ + link set "${BOND_RX2_SLAVE_IF}" master "${BOND_RX_MAIN_IF}" + ip -n "${RXNS}" \ + link set "${BOND_RX_MAIN_IF}" up + + export DSTIF="${BOND_RX_MAIN_IF}" + export SRCIF="${BOND_TX_MAIN_IF}" +} + +# Create 4 netdevsim interfaces. Two of them will be bound to TX bonding iface +# and the other two will be bond to the RX interface (on the other namespace) +function create_ifaces_bond() { + BOND_TX1_SLAVE_IF=$(create_netdevsim "${NSIM_BOND_TX_1}" "${TXNS}") + BOND_TX2_SLAVE_IF=$(create_netdevsim "${NSIM_BOND_TX_2}" "${TXNS}") + BOND_RX1_SLAVE_IF=$(create_netdevsim "${NSIM_BOND_RX_1}" "${RXNS}") + BOND_RX2_SLAVE_IF=$(create_netdevsim "${NSIM_BOND_RX_2}" "${RXNS}") +} + +# netdevsim link BOND_TX to BOND_RX interfaces +function link_ifaces_bond() { + local BOND_TX1_SLAVE_IFIDX + local BOND_TX2_SLAVE_IFIDX + local BOND_RX1_SLAVE_IFIDX + local BOND_RX2_SLAVE_IFIDX + local TXNS_FD + local RXNS_FD + + BOND_TX1_SLAVE_IFIDX=$(ip netns exec "${TXNS}" \ + cat /sys/class/net/"$BOND_TX1_SLAVE_IF"/ifindex) + BOND_TX2_SLAVE_IFIDX=$(ip netns exec "${TXNS}" \ + cat /sys/class/net/"$BOND_TX2_SLAVE_IF"/ifindex) + BOND_RX1_SLAVE_IFIDX=$(ip netns exec "${RXNS}" \ + cat /sys/class/net/"$BOND_RX1_SLAVE_IF"/ifindex) + BOND_RX2_SLAVE_IFIDX=$(ip netns exec "${RXNS}" \ + cat /sys/class/net/"$BOND_RX2_SLAVE_IF"/ifindex) + + exec {TXNS_FD} "$NSIM_DEV_SYS_LINK" + echo "${TXNS_FD}:$BOND_TX2_SLAVE_IFIDX $RXNS_FD:$BOND_RX2_SLAVE_IFIDX" \ + > "$NSIM_DEV_SYS_LINK" + + exec {TXNS_FD}<&- + exec {RXNS_FD}<&- +} + +function create_all_ifaces() { + # setup_ns function is coming from lib.sh + setup_ns TXNS RXNS + export NAMESPACE="${RXNS}" + + # Create two interfaces for RX and two for TX + create_ifaces_bond + # Link netlink ifaces + link_ifaces_bond +} + +# configure DSTIF and SRCIF IPs +function configure_ifaces_ips() { + local IP_VERSION=${1:-"ipv4"} + select_ipv4_or_ipv6 "${IP_VERSION}" + + ip -n "${RXNS}" addr add "${DSTIP}"/24 dev "${DSTIF}" + ip -n "${RXNS}" link set "${DSTIF}" up + + ip -n "${TXNS}" addr add "${SRCIP}"/24 dev "${SRCIF}" + ip -n "${TXNS}" link set "${SRCIF}" up +} + +function test_enable_netpoll_on_enslaved_iface() { + echo 0 > "${NETCONS_PATH}"/enabled + + # At this stage, BOND_TX1_SLAVE_IF is enslaved to BOND_TX_MAIN_IF, and + # linked to BOND_RX1_SLAVE_IF inside the namespace. + echo "${BOND_TX1_SLAVE_IF}" > "${NETCONS_PATH}"/dev_name + + # This should fail with the following message in dmesg: + # netpoll: netconsole: ethX is a slave device, aborting + set +e + enable_netcons_ns 2> /dev/null + set -e + + if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 1 ]] + then + echo "test failed: Bonding and netpoll cannot co-exists." >&2 + exit "${ksft_fail}" + fi +} + +function test_delete_bond_and_reenable_target() { + ip -n "${TXNS}" \ + link delete "${BOND_TX_MAIN_IF}" type bond + + # BOND_TX1_SLAVE_IF is not attached to a bond interface anymore + # netpoll can be plugged in there + echo "${BOND_TX1_SLAVE_IF}" > "${NETCONS_PATH}"/dev_name + + # this should work, since the interface is not enslaved + enable_netcons_ns + + if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 0 ]] + then + echo "test failed: Unable to start netpoll on an unbond iface." >&2 + exit "${ksft_fail}" + fi +} + +# Send a netconsole message to the netconsole target +function test_send_netcons_msg_through_bond_iface() { + # Listen for netconsole port inside the namespace and + # destination interface + listen_port_and_save_to "${OUTPUT_FILE}" "${IP_VERSION}" & + # Wait for socat to start and listen to the port. + wait_for_port "${RXNS}" "${PORT}" "${IP_VERSION}" + # Send the message + echo "${MSG}: ${TARGET}" > /dev/kmsg + # Wait until socat saves the file to disk + busywait "${BUSYWAIT_TIMEOUT}" test -s "${OUTPUT_FILE}" + # Make sure the message was received in the dst part + # and exit + validate_result "${OUTPUT_FILE}" "${FORMAT}" + # kill socat in case it is still running + pkill_socat +} + +# BOND_TX1_SLAVE_IF has netconsole enabled on it, bind it to BOND_TX_MAIN_IF. +# Given BOND_TX_MAIN_IF was deleted, recreate it first +function test_enslave_netcons_enabled_iface { + # netconsole got disabled while the interface was down + if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 0 ]] + then + echo "test failed: netconsole expected to be enabled against BOND_TX1_SLAVE_IF" >&2 + exit "${ksft_fail}" + fi + + # recreate the bonding iface. it got deleted by previous + # test (test_delete_bond_and_reenable_target) + ip -n "${TXNS}" \ + link add "${BOND_TX_MAIN_IF}" type bond mode balance-rr + + # sub-interface need to be down before attaching to bonding + # This will also disable netconsole. + ip -n "${TXNS}" \ + link set "${BOND_TX1_SLAVE_IF}" down + ip -n "${TXNS}" \ + link set "${BOND_TX1_SLAVE_IF}" master "${BOND_TX_MAIN_IF}" + ip -n "${TXNS}" \ + link set "${BOND_TX_MAIN_IF}" up + + # netconsole got disabled while the interface was down + if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 1 ]] + then + echo "test failed: Device is part of a bond iface, cannot have netcons enabled" >&2 + exit "${ksft_fail}" + fi +} + +# Get netconsole enabled on a bonding interface and attach a second +# sub-interface. +function test_enslave_iface_to_bond { + # BOND_TX_MAIN_IF has only BOND_TX1_SLAVE_IF right now + echo "${BOND_TX_MAIN_IF}" > "${NETCONS_PATH}"/dev_name + enable_netcons_ns + + # netcons is attached to bond0 and BOND_TX1_SLAVE_IF is + # part of BOND_TX_MAIN_IF. Attach BOND_TX2_SLAVE_IF to BOND_TX_MAIN_IF. + ip -n "${TXNS}" \ + link set "${BOND_TX2_SLAVE_IF}" master "${BOND_TX_MAIN_IF}" + if [[ $(cat "${NETCONS_PATH}"/enabled) -eq 0 ]] + then + echo "test failed: Netconsole should be enabled on bonding interface. Failed" >&2 + exit "${ksft_fail}" + fi +} + +function test_enslave_iff_disabled_netpoll_iface { + local ret + + # Create two interfaces. veth interfaces it known to have + # IFF_DISABLE_NETPOLL set + if ! ip link add "${VETH0}" type veth peer name "${VETH1}" + then + echo "Failed to create veth TX interface. Is CONFIG_VETH set?" >&2 + exit "${ksft_skip}" + fi + set +e + # This will print RTNETLINK answers: Device or resource busy + ip link set "${VETH0}" master "${BOND_TX_MAIN_IF}" 2> /dev/null + ret=$? + set -e + if [[ $ret -eq 0 ]] + then + echo "test failed: veth interface could not be enslaved" + exit "${ksft_fail}" + fi +} + +# Given that netconsole picks the current net namespace, we need to enable it +# from inside the TXNS namespace +function enable_netcons_ns() { + ip netns exec "${TXNS}" sh -c \ + "mount -t configfs configfs /sys/kernel/config && echo 1 > $NETCONS_PATH/enabled" +} + +#################### +# Tests start here # +#################### + +# Create regular interfaces using netdevsim and link them +create_all_ifaces + +# Setup the bonding interfaces +# BOND_RX_MAIN_IF has BOND_RX{1,2}_SLAVE_IF +# BOND_TX_MAIN_IF has BOND_TX{1,2}_SLAVE_IF +setup_bonding_ifaces + +# Configure the ips as BOND_RX1_SLAVE_IF and BOND_TX1_SLAVE_IF +configure_ifaces_ips "${IP_VERSION}" + +_create_dynamic_target "${FORMAT}" "${NETCONS_PATH}" +enable_netcons_ns +set_user_data + +# Test #1 : Create an bonding interface and attach netpoll into +# the bonding interface. Netconsole/netpoll should work on +# the bonding interface. +test_send_netcons_msg_through_bond_iface +echo "test #1: netpoll on bonding interface worked. Test passed" >&2 + +# Test #2: Attach netpoll to an enslaved interface +# Try to attach netpoll to an enslaved sub-interface (while still being part of +# a bonding interface), which shouldn't be allowed +test_enable_netpoll_on_enslaved_iface +echo "test #2: netpoll correctly rejected enslaved interface (expected behavior). Test passed." >&2 + +# Test #3: Unplug the sub-interface from bond and enable netconsole +# Detach the interface from a bonding interface and attach netpoll again +test_delete_bond_and_reenable_target +echo "test #3: Able to attach to an unbound interface. Test passed." >&2 + +# Test #4: Enslave a sub-interface that had netconsole enabled +# Try to enslave an interface that has netconsole/netpoll enabled. +# Previous test has netconsole enabled in BOND_TX1_SLAVE_IF, try to enslave it +test_enslave_netcons_enabled_iface +echo "test #4: Enslaving an interface with netpoll attached. Test passed." >&2 + +# Test #5: Enslave a sub-interface to a bonding interface +# Enslave an interface to a bond interface that has netpoll attached +# At this stage, BOND_TX_MAIN_IF is created and BOND_TX1_SLAVE_IF is part of +# it. Netconsole is currently disabled +test_enslave_iface_to_bond +echo "test #5: Enslaving an interface to bond+netpoll. Test passed." >&2 + +# Test #6: Enslave a IFF_DISABLE_NETPOLL sub-interface to a bonding interface +# At this stage, BOND_TX_MAIN_IF has both sub interface and netconsole is +# enabled. This test will try to enslave an a veth (IFF_DISABLE_NETPOLL) interface +# and it should fail, with netpoll: veth0 doesn't support polling +test_enslave_iff_disabled_netpoll_iface +echo "test #6: Enslaving IFF_DISABLE_NETPOLL ifaces to bond iface is not supported. Test passed." >&2 + +cleanup_bond +trap - EXIT +exit "${EXIT_STATUS}" diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh index 9b5ef8074440..87f89fd92f8c 100644 --- a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh +++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh @@ -11,9 +11,11 @@ set -euo pipefail LIBDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") SRCIF="" # to be populated later +SRCIP="" # to be populated later SRCIP4="192.0.2.1" SRCIP6="fc00::1" DSTIF="" # to be populated later +DSTIP="" # to be populated later DSTIP4="192.0.2.2" DSTIP6="fc00::2" @@ -28,17 +30,23 @@ NETCONS_PATH="${NETCONS_CONFIGFS}"/"${TARGET}" # NAMESPACE will be populated by setup_ns with a random value NAMESPACE="" -# IDs for netdevsim +# IDs for netdevsim. We either use NSIM_DEV_{1,2}_ID for standard test +# or NSIM_BOND_{T,R}X_{1,2} for the bonding tests. Not both at the +# same time. NSIM_DEV_1_ID=$((256 + RANDOM % 256)) NSIM_DEV_2_ID=$((512 + RANDOM % 256)) +NSIM_BOND_TX_1=$((768 + RANDOM % 256)) +NSIM_BOND_TX_2=$((1024 + RANDOM % 256)) +NSIM_BOND_RX_1=$((1280 + RANDOM % 256)) +NSIM_BOND_RX_2=$((1536 + RANDOM % 256)) NSIM_DEV_SYS_NEW="/sys/bus/netdevsim/new_device" +NSIM_DEV_SYS_LINK="/sys/bus/netdevsim/link_device" # Used to create and delete namespaces source "${LIBDIR}"/../../../../net/lib.sh # Create netdevsim interfaces create_ifaces() { - echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_NEW" echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_NEW" udevadm settle 2> /dev/null || true @@ -137,9 +145,6 @@ function _create_dynamic_target() { then echo 1 > "${NCPATH}"/extended fi - - echo 1 > "${NCPATH}"/enabled - } function create_dynamic_target() { @@ -147,6 +152,8 @@ function create_dynamic_target() { local NCPATH=${2:-"$NETCONS_PATH"} _create_dynamic_target "${FORMAT}" "${NCPATH}" + echo 1 > "${NCPATH}"/enabled + # This will make sure that the kernel was able to # load the netconsole driver configuration. The console message # gets more organized/sequential as well. @@ -193,14 +200,26 @@ function do_cleanup() { echo "${DEFAULT_PRINTK_VALUES}" > /proc/sys/kernel/printk } -function cleanup() { +function cleanup_netcons() { # delete netconsole dynamic reconfiguration - echo 0 > "${NETCONS_PATH}"/enabled + # do not fail if the target is already disabled + if [[ ! -d "${NETCONS_PATH}" ]] + then + # in some cases this is called before netcons path is created + return + fi + if [[ $(cat "${NETCONS_PATH}"/enabled) != 0 ]] + then + echo 0 > "${NETCONS_PATH}"/enabled || true + fi # Remove all the keys that got created during the selftest find "${NETCONS_PATH}/userdata/" -mindepth 1 -type d -delete # Remove the configfs entry rmdir "${NETCONS_PATH}" +} +function cleanup() { + cleanup_netcons do_cleanup } @@ -377,3 +396,24 @@ function wait_for_port() { # more frequently on IPv6 sleep 1 } + +# Clean up netdevsim ifaces created for bonding test +function cleanup_bond_nsim() { + ip -n "${TXNS}" \ + link delete "${BOND_TX_MAIN_IF}" type bond || true + ip -n "${RXNS}" \ + link delete "${BOND_RX_MAIN_IF}" type bond || true + + cleanup_netdevsim "$NSIM_BOND_TX_1" + cleanup_netdevsim "$NSIM_BOND_TX_2" + cleanup_netdevsim "$NSIM_BOND_RX_1" + cleanup_netdevsim "$NSIM_BOND_RX_2" +} + +# cleanup tests that use bonding interfaces +function cleanup_bond() { + cleanup_netcons + cleanup_bond_nsim + cleanup_all_ns + ip link delete "${VETH0}" || true +} -- cgit v1.2.3 From 63c643aa7b7287fdbb0167063785f89ece3f000f Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 10 Nov 2025 19:23:40 +0100 Subject: selftests: mptcp: connect: fix fallback note due to OoO The "fallback due to TCP OoO" was never printed because the stat_ooo_now variable was checked twice: once in the parent if-statement, and one in the child one. The second condition was then always true then, and the 'else' branch was never taken. The idea is that when there are more ACK + MP_CAPABLE than expected, the test either fails if there was no out of order packets, or a notice is printed. Fixes: 69ca3d29a755 ("mptcp: update selftest for fallback due to OoO") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20251110-net-mptcp-sft-join-unstable-v1-1-a4332c714e10@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_connect.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index 47ecb5b3836e..9b7b93f8eb0c 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -492,7 +492,7 @@ do_transfer() "than expected (${expect_synrx})" retc=1 fi - if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} ] && [ ${stat_ooo_now} -eq 0 ]; then + if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} ]; then if [ ${stat_ooo_now} -eq 0 ]; then mptcp_lib_pr_fail "lower MPC ACK rx (${stat_ackrx_now_l})" \ "than expected (${expect_ackrx})" -- cgit v1.2.3 From aea73bae662a0e184393d6d7d0feb18d2577b9b9 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 10 Nov 2025 19:23:41 +0100 Subject: selftests: mptcp: join: rm: set backup flag Some of these 'remove' tests rarely fail because a subflow has been reset instead of cleanly removed. This can happen when one extra subflow which has never carried data is being closed (FIN) on one side, while the other is sending data for the first time. To avoid such subflows to be used right at the end, the backup flag has been added. With that, data will be only carried on the initial subflow. Fixes: d2c4333a801c ("selftests: mptcp: add testcases for removing addrs") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20251110-net-mptcp-sft-join-unstable-v1-2-a4332c714e10@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 54 ++++++++++++------------- 1 file changed, 27 insertions(+), 27 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 78a1aa4ecff2..9ed9ec7202d6 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -2532,7 +2532,7 @@ remove_tests() if reset "remove single subflow"; then pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 0 1 - pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup addr_nr_ns2=-1 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 @@ -2545,8 +2545,8 @@ remove_tests() if reset "remove multiple subflows"; then pm_nl_set_limits $ns1 0 2 pm_nl_set_limits $ns2 0 2 - pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow - pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,backup + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup addr_nr_ns2=-2 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 2 2 2 @@ -2557,7 +2557,7 @@ remove_tests() # single address, remove if reset "remove single address"; then pm_nl_set_limits $ns1 0 1 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup pm_nl_set_limits $ns2 1 1 addr_nr_ns1=-1 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 @@ -2570,9 +2570,9 @@ remove_tests() # subflow and signal, remove if reset "remove subflow and signal"; then pm_nl_set_limits $ns1 0 2 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup pm_nl_set_limits $ns2 1 2 - pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup addr_nr_ns1=-1 addr_nr_ns2=-1 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 2 2 2 @@ -2584,10 +2584,10 @@ remove_tests() # subflows and signal, remove if reset "remove subflows and signal"; then pm_nl_set_limits $ns1 0 3 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup pm_nl_set_limits $ns2 1 3 - pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow - pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup + pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow,backup addr_nr_ns1=-1 addr_nr_ns2=-2 speed=10 \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 3 3 3 @@ -2599,9 +2599,9 @@ remove_tests() # addresses remove if reset "remove addresses"; then pm_nl_set_limits $ns1 3 3 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal id 250 - pm_nl_add_endpoint $ns1 10.0.3.1 flags signal - pm_nl_add_endpoint $ns1 10.0.4.1 flags signal + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup id 250 + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal,backup + pm_nl_add_endpoint $ns1 10.0.4.1 flags signal,backup pm_nl_set_limits $ns2 3 3 addr_nr_ns1=-3 speed=10 \ run_tests $ns1 $ns2 10.0.1.1 @@ -2614,10 +2614,10 @@ remove_tests() # invalid addresses remove if reset "remove invalid addresses"; then pm_nl_set_limits $ns1 3 3 - pm_nl_add_endpoint $ns1 10.0.12.1 flags signal + pm_nl_add_endpoint $ns1 10.0.12.1 flags signal,backup # broadcast IP: no packet for this address will be received on ns1 - pm_nl_add_endpoint $ns1 224.0.0.1 flags signal - pm_nl_add_endpoint $ns1 10.0.3.1 flags signal + pm_nl_add_endpoint $ns1 224.0.0.1 flags signal,backup + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal,backup pm_nl_set_limits $ns2 2 2 addr_nr_ns1=-3 speed=10 \ run_tests $ns1 $ns2 10.0.1.1 @@ -2631,10 +2631,10 @@ remove_tests() # subflows and signal, flush if reset "flush subflows and signal"; then pm_nl_set_limits $ns1 0 3 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup pm_nl_set_limits $ns2 1 3 - pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow - pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup + pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow,backup addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 3 3 3 @@ -2647,9 +2647,9 @@ remove_tests() if reset "flush subflows"; then pm_nl_set_limits $ns1 3 3 pm_nl_set_limits $ns2 3 3 - pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow id 150 - pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow - pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow + pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow,backup id 150 + pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow,backup + pm_nl_add_endpoint $ns2 10.0.4.2 flags subflow,backup addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 3 3 3 @@ -2666,9 +2666,9 @@ remove_tests() # addresses flush if reset "flush addresses"; then pm_nl_set_limits $ns1 3 3 - pm_nl_add_endpoint $ns1 10.0.2.1 flags signal id 250 - pm_nl_add_endpoint $ns1 10.0.3.1 flags signal - pm_nl_add_endpoint $ns1 10.0.4.1 flags signal + pm_nl_add_endpoint $ns1 10.0.2.1 flags signal,backup id 250 + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal,backup + pm_nl_add_endpoint $ns1 10.0.4.1 flags signal,backup pm_nl_set_limits $ns2 3 3 addr_nr_ns1=-8 addr_nr_ns2=-8 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 @@ -2681,9 +2681,9 @@ remove_tests() # invalid addresses flush if reset "flush invalid addresses"; then pm_nl_set_limits $ns1 3 3 - pm_nl_add_endpoint $ns1 10.0.12.1 flags signal - pm_nl_add_endpoint $ns1 10.0.3.1 flags signal - pm_nl_add_endpoint $ns1 10.0.14.1 flags signal + pm_nl_add_endpoint $ns1 10.0.12.1 flags signal,backup + pm_nl_add_endpoint $ns1 10.0.3.1 flags signal,backup + pm_nl_add_endpoint $ns1 10.0.14.1 flags signal,backup pm_nl_set_limits $ns2 3 3 addr_nr_ns1=-8 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 -- cgit v1.2.3 From 6457595db9870298ee30b6d75287b8548e33fe19 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 10 Nov 2025 19:23:42 +0100 Subject: selftests: mptcp: join: endpoints: longer transfer In rare cases, when the test environment is very slow, some userspace tests can fail because some expected events have not been seen. Because the tests are expecting a long on-going connection, and they are not waiting for the end of the transfer, it is fine to make the connection longer. This connection will be killed at the end, after the verifications, so making it longer doesn't change anything, apart from avoid it to end before the end of the verifications To play it safe, all endpoints tests not waiting for the end of the transfer are now sharing a longer file (128KB) at slow speed. Fixes: 69c6ce7b6eca ("selftests: mptcp: add implicit endpoint test case") Cc: stable@vger.kernel.org Fixes: e274f7154008 ("selftests: mptcp: add subflow limits test-cases") Fixes: b5e2fb832f48 ("selftests: mptcp: add explicit test case for remove/readd") Fixes: e06959e9eebd ("selftests: mptcp: join: test for flush/re-add endpoints") Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20251110-net-mptcp-sft-join-unstable-v1-3-a4332c714e10@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 9ed9ec7202d6..97af8d89ac5c 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -3943,7 +3943,7 @@ endpoint_tests() pm_nl_set_limits $ns1 2 2 pm_nl_set_limits $ns2 2 2 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal - { speed=slow \ + { test_linkfail=128 speed=slow \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! @@ -3970,7 +3970,7 @@ endpoint_tests() pm_nl_set_limits $ns2 0 3 pm_nl_add_endpoint $ns2 10.0.1.2 id 1 dev ns2eth1 flags subflow pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow - { test_linkfail=4 speed=5 \ + { test_linkfail=128 speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! @@ -4048,7 +4048,7 @@ endpoint_tests() # broadcast IP: no packet for this address will be received on ns1 pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal pm_nl_add_endpoint $ns1 10.0.1.1 id 42 flags signal - { test_linkfail=4 speed=5 \ + { test_linkfail=128 speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! @@ -4121,7 +4121,7 @@ endpoint_tests() # broadcast IP: no packet for this address will be received on ns1 pm_nl_add_endpoint $ns1 224.0.0.1 id 2 flags signal pm_nl_add_endpoint $ns2 10.0.3.2 id 3 flags subflow - { test_linkfail=4 speed=20 \ + { test_linkfail=128 speed=20 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! -- cgit v1.2.3 From 290493078b96ce2ce3e60f55c23654acb678042a Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 10 Nov 2025 19:23:43 +0100 Subject: selftests: mptcp: join: userspace: longer transfer In rare cases, when the test environment is very slow, some userspace tests can fail because some expected events have not been seen. Because the tests are expecting a long on-going connection, and they are not waiting for the end of the transfer, it is fine to make the connection longer. This connection will be killed at the end, after the verifications, so making it longer doesn't change anything, apart from avoid it to end before the end of the verifications To play it safe, all userspace tests not waiting for the end of the transfer are now sharing a longer file (128KB) at slow speed. Fixes: 4369c198e599 ("selftests: mptcp: test userspace pm out of transfer") Cc: stable@vger.kernel.org Fixes: b2e2248f365a ("selftests: mptcp: userspace pm create id 0 subflow") Fixes: e3b47e460b4b ("selftests: mptcp: userspace pm remove initial subflow") Fixes: b9fb176081fb ("selftests: mptcp: userspace pm send RM_ADDR for ID 0") Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20251110-net-mptcp-sft-join-unstable-v1-4-a4332c714e10@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 97af8d89ac5c..01273abfdc89 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -3806,7 +3806,7 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns1 pm_nl_set_limits $ns2 2 2 - { speed=5 \ + { test_linkfail=128 speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns1 @@ -3839,7 +3839,7 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns2 pm_nl_set_limits $ns1 0 1 - { speed=5 \ + { test_linkfail=128 speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns2 @@ -3867,7 +3867,7 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns2 pm_nl_set_limits $ns1 0 1 - { speed=5 \ + { test_linkfail=128 speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns2 @@ -3888,7 +3888,7 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns2 pm_nl_set_limits $ns1 0 1 - { speed=5 \ + { test_linkfail=128 speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns2 @@ -3912,7 +3912,7 @@ userspace_tests() continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns1 pm_nl_set_limits $ns2 1 1 - { speed=5 \ + { test_linkfail=128 speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & } 2>/dev/null local tests_pid=$! wait_mpj $ns1 -- cgit v1.2.3 From ee79980f7a428ec299f6261bea4c1084dcbc9631 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 10 Nov 2025 19:23:44 +0100 Subject: selftests: mptcp: connect: trunc: read all recv data MPTCP Join "fastclose server" selftest is sometimes failing because the client output file doesn't have the expected size, e.g. 296B instead of 1024B. When looking at a packet trace when this happens, the server sent the expected 1024B in two parts -- 100B, then 924B -- then the MP_FASTCLOSE. It is then strange to see the client only receiving 296B, which would mean it only got a part of the second packet. The problem is then not on the networking side, but rather on the data reception side. When mptcp_connect is launched with '-f -1', it means the connection might stop before having sent everything, because a reset has been received. When this happens, the program was directly stopped. But it is also possible there are still some data to read, simply because the previous 'read' step was done with a buffer smaller than the pending data, see do_rnd_read(). In this case, it is important to read what's left in the kernel buffers before stopping without error like before. SIGPIPE is now ignored, not to quit the app before having read everything. Fixes: 6bf41020b72b ("selftests: mptcp: update and extend fastclose test-cases") Cc: stable@vger.kernel.org Reviewed-by: Geliang Tang Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20251110-net-mptcp-sft-join-unstable-v1-5-a4332c714e10@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_connect.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index b148cadb96d0..fc7e22b503d3 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -710,8 +710,14 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, bw = do_rnd_write(peerfd, winfo->buf + winfo->off, winfo->len); if (bw < 0) { - if (cfg_rcv_trunc) - return 0; + /* expected reset, continue to read */ + if (cfg_rcv_trunc && + (errno == ECONNRESET || + errno == EPIPE)) { + fds.events &= ~POLLOUT; + continue; + } + perror("write"); return 111; } @@ -737,8 +743,10 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd, } if (fds.revents & (POLLERR | POLLNVAL)) { - if (cfg_rcv_trunc) - return 0; + if (cfg_rcv_trunc) { + fds.events &= ~(POLLERR | POLLNVAL); + continue; + } fprintf(stderr, "Unexpected revents: " "POLLERR/POLLNVAL(%x)\n", fds.revents); return 5; @@ -1433,7 +1441,7 @@ static void parse_opts(int argc, char **argv) */ if (cfg_truncate < 0) { cfg_rcv_trunc = true; - signal(SIGPIPE, handle_signal); + signal(SIGPIPE, SIG_IGN); } break; case 'j': -- cgit v1.2.3 From 852b644acbce1529307a4bb283752c4e77b5cda7 Mon Sep 17 00:00:00 2001 From: "Matthieu Baerts (NGI0)" Date: Mon, 10 Nov 2025 19:23:45 +0100 Subject: selftests: mptcp: join: properly kill background tasks The 'run_tests' function is executed in the background, but killing its associated PID would not kill the children tasks running in the background. To properly kill all background tasks, 'kill -- -PID' could be used, but this requires kill from procps-ng. Instead, all children tasks are listed using 'ps', and 'kill' is called with all PIDs of this group. Fixes: 31ee4ad86afd ("selftests: mptcp: join: stop transfer when check is done (part 1)") Cc: stable@vger.kernel.org Fixes: 04b57c9e096a ("selftests: mptcp: join: stop transfer when check is done (part 2)") Signed-off-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20251110-net-mptcp-sft-join-unstable-v1-6-a4332c714e10@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/mptcp/mptcp_join.sh | 18 +++++++++--------- tools/testing/selftests/net/mptcp/mptcp_lib.sh | 21 +++++++++++++++++++++ 2 files changed, 30 insertions(+), 9 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 01273abfdc89..41503c241989 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -3831,7 +3831,7 @@ userspace_tests() chk_mptcp_info subflows 0 subflows 0 chk_subflows_total 1 1 kill_events_pids - mptcp_lib_kill_wait $tests_pid + mptcp_lib_kill_group_wait $tests_pid fi # userspace pm create destroy subflow @@ -3859,7 +3859,7 @@ userspace_tests() chk_mptcp_info subflows 0 subflows 0 chk_subflows_total 1 1 kill_events_pids - mptcp_lib_kill_wait $tests_pid + mptcp_lib_kill_group_wait $tests_pid fi # userspace pm create id 0 subflow @@ -3880,7 +3880,7 @@ userspace_tests() chk_mptcp_info subflows 1 subflows 1 chk_subflows_total 2 2 kill_events_pids - mptcp_lib_kill_wait $tests_pid + mptcp_lib_kill_group_wait $tests_pid fi # userspace pm remove initial subflow @@ -3904,7 +3904,7 @@ userspace_tests() chk_mptcp_info subflows 1 subflows 1 chk_subflows_total 1 1 kill_events_pids - mptcp_lib_kill_wait $tests_pid + mptcp_lib_kill_group_wait $tests_pid fi # userspace pm send RM_ADDR for ID 0 @@ -3930,7 +3930,7 @@ userspace_tests() chk_mptcp_info subflows 1 subflows 1 chk_subflows_total 1 1 kill_events_pids - mptcp_lib_kill_wait $tests_pid + mptcp_lib_kill_group_wait $tests_pid fi } @@ -3960,7 +3960,7 @@ endpoint_tests() pm_nl_add_endpoint $ns2 10.0.2.2 flags signal pm_nl_check_endpoint "modif is allowed" \ $ns2 10.0.2.2 id 1 flags signal - mptcp_lib_kill_wait $tests_pid + mptcp_lib_kill_group_wait $tests_pid fi if reset_with_tcp_filter "delete and re-add" ns2 10.0.3.2 REJECT OUTPUT && @@ -4015,7 +4015,7 @@ endpoint_tests() chk_mptcp_info subflows 3 subflows 3 done - mptcp_lib_kill_wait $tests_pid + mptcp_lib_kill_group_wait $tests_pid kill_events_pids chk_evt_nr ns1 MPTCP_LIB_EVENT_LISTENER_CREATED 1 @@ -4089,7 +4089,7 @@ endpoint_tests() wait_mpj $ns2 chk_subflow_nr "after re-re-add ID 0" 3 chk_mptcp_info subflows 3 subflows 3 - mptcp_lib_kill_wait $tests_pid + mptcp_lib_kill_group_wait $tests_pid kill_events_pids chk_evt_nr ns1 MPTCP_LIB_EVENT_LISTENER_CREATED 1 @@ -4137,7 +4137,7 @@ endpoint_tests() wait_mpj $ns2 pm_nl_add_endpoint $ns1 10.0.3.1 id 2 flags signal wait_mpj $ns2 - mptcp_lib_kill_wait $tests_pid + mptcp_lib_kill_group_wait $tests_pid join_syn_tx=3 join_connect_err=1 \ chk_join_nr 2 2 2 diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index d62e653d48b0..f4388900016a 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -350,6 +350,27 @@ mptcp_lib_kill_wait() { wait "${1}" 2>/dev/null } +# $1: PID +mptcp_lib_pid_list_children() { + local curr="${1}" + # evoke 'ps' only once + local pids="${2:-"$(ps o pid,ppid)"}" + + echo "${curr}" + + local pid + for pid in $(echo "${pids}" | awk "\$2 == ${curr} { print \$1 }"); do + mptcp_lib_pid_list_children "${pid}" "${pids}" + done +} + +# $1: PID +mptcp_lib_kill_group_wait() { + # Some users might not have procps-ng: cannot use "kill -- -PID" + mptcp_lib_pid_list_children "${1}" | xargs -r kill &>/dev/null + wait "${1}" 2>/dev/null +} + # $1: IP address mptcp_lib_is_v6() { [ -z "${1##*:*}" ] -- cgit v1.2.3 From 7c44656ab3ea6f8429027ed14c23b314502e2541 Mon Sep 17 00:00:00 2001 From: Alex Mastro Date: Tue, 11 Nov 2025 10:48:24 -0800 Subject: vfio: selftests: add iova range query helpers VFIO selftests need to map IOVAs from legally accessible ranges, which could vary between hardware. Tests in vfio_dma_mapping_test.c are making excessively strong assumptions about which IOVAs can be mapped. Add vfio_iommu_iova_ranges(), which queries IOVA ranges from the IOMMUFD or VFIO container associated with the device. The queried ranges are normalized to IOMMUFD's iommu_iova_range representation so that handling of IOVA ranges up the stack can be implementation-agnostic. iommu_iova_range and vfio_iova_range are equivalent, so bias to using the new interface's struct. Query IOMMUFD's ranges with IOMMU_IOAS_IOVA_RANGES. Query VFIO container's ranges with VFIO_IOMMU_GET_INFO and VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE. The underlying vfio_iommu_type1_info buffer-related functionality has been kept generic so the same helpers can be used to query other capability chain information, if needed. Reviewed-by: David Matlack Tested-by: David Matlack Signed-off-by: Alex Mastro Link: https://lore.kernel.org/r/20251111-iova-ranges-v3-1-7960244642c5@fb.com Signed-off-by: Alex Williamson --- .../testing/selftests/vfio/lib/include/vfio_util.h | 8 +- tools/testing/selftests/vfio/lib/vfio_pci_device.c | 172 +++++++++++++++++++++ 2 files changed, 179 insertions(+), 1 deletion(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/vfio/lib/include/vfio_util.h b/tools/testing/selftests/vfio/lib/include/vfio_util.h index 240409bf5f8a..ef8f06ef0c13 100644 --- a/tools/testing/selftests/vfio/lib/include/vfio_util.h +++ b/tools/testing/selftests/vfio/lib/include/vfio_util.h @@ -4,9 +4,12 @@ #include #include -#include + +#include +#include #include #include +#include #include "../../../kselftest.h" @@ -206,6 +209,9 @@ struct vfio_pci_device *vfio_pci_device_init(const char *bdf, const char *iommu_ void vfio_pci_device_cleanup(struct vfio_pci_device *device); void vfio_pci_device_reset(struct vfio_pci_device *device); +struct iommu_iova_range *vfio_pci_iova_ranges(struct vfio_pci_device *device, + u32 *nranges); + int __vfio_pci_dma_map(struct vfio_pci_device *device, struct vfio_dma_region *region); int __vfio_pci_dma_unmap(struct vfio_pci_device *device, diff --git a/tools/testing/selftests/vfio/lib/vfio_pci_device.c b/tools/testing/selftests/vfio/lib/vfio_pci_device.c index a381fd253aa7..11749348f53f 100644 --- a/tools/testing/selftests/vfio/lib/vfio_pci_device.c +++ b/tools/testing/selftests/vfio/lib/vfio_pci_device.c @@ -29,6 +29,178 @@ VFIO_ASSERT_EQ(__ret, 0, "ioctl(%s, %s, %s) returned %d\n", #_fd, #_op, #_arg, __ret); \ } while (0) +static struct vfio_info_cap_header *next_cap_hdr(void *buf, u32 bufsz, + u32 *cap_offset) +{ + struct vfio_info_cap_header *hdr; + + if (!*cap_offset) + return NULL; + + VFIO_ASSERT_LT(*cap_offset, bufsz); + VFIO_ASSERT_GE(bufsz - *cap_offset, sizeof(*hdr)); + + hdr = (struct vfio_info_cap_header *)((u8 *)buf + *cap_offset); + *cap_offset = hdr->next; + + return hdr; +} + +static struct vfio_info_cap_header *vfio_iommu_info_cap_hdr(struct vfio_iommu_type1_info *info, + u16 cap_id) +{ + struct vfio_info_cap_header *hdr; + u32 cap_offset = info->cap_offset; + u32 max_depth; + u32 depth = 0; + + if (!(info->flags & VFIO_IOMMU_INFO_CAPS)) + return NULL; + + if (cap_offset) + VFIO_ASSERT_GE(cap_offset, sizeof(*info)); + + max_depth = (info->argsz - sizeof(*info)) / sizeof(*hdr); + + while ((hdr = next_cap_hdr(info, info->argsz, &cap_offset))) { + depth++; + VFIO_ASSERT_LE(depth, max_depth, "Capability chain contains a cycle\n"); + + if (hdr->id == cap_id) + return hdr; + } + + return NULL; +} + +/* Return buffer including capability chain, if present. Free with free() */ +static struct vfio_iommu_type1_info *vfio_iommu_get_info(struct vfio_pci_device *device) +{ + struct vfio_iommu_type1_info *info; + + info = malloc(sizeof(*info)); + VFIO_ASSERT_NOT_NULL(info); + + *info = (struct vfio_iommu_type1_info) { + .argsz = sizeof(*info), + }; + + ioctl_assert(device->container_fd, VFIO_IOMMU_GET_INFO, info); + VFIO_ASSERT_GE(info->argsz, sizeof(*info)); + + info = realloc(info, info->argsz); + VFIO_ASSERT_NOT_NULL(info); + + ioctl_assert(device->container_fd, VFIO_IOMMU_GET_INFO, info); + VFIO_ASSERT_GE(info->argsz, sizeof(*info)); + + return info; +} + +/* + * Return iova ranges for the device's container. Normalize vfio_iommu_type1 to + * report iommufd's iommu_iova_range. Free with free(). + */ +static struct iommu_iova_range *vfio_iommu_iova_ranges(struct vfio_pci_device *device, + u32 *nranges) +{ + struct vfio_iommu_type1_info_cap_iova_range *cap_range; + struct vfio_iommu_type1_info *info; + struct vfio_info_cap_header *hdr; + struct iommu_iova_range *ranges = NULL; + + info = vfio_iommu_get_info(device); + hdr = vfio_iommu_info_cap_hdr(info, VFIO_IOMMU_TYPE1_INFO_CAP_IOVA_RANGE); + VFIO_ASSERT_NOT_NULL(hdr); + + cap_range = container_of(hdr, struct vfio_iommu_type1_info_cap_iova_range, header); + VFIO_ASSERT_GT(cap_range->nr_iovas, 0); + + ranges = calloc(cap_range->nr_iovas, sizeof(*ranges)); + VFIO_ASSERT_NOT_NULL(ranges); + + for (u32 i = 0; i < cap_range->nr_iovas; i++) { + ranges[i] = (struct iommu_iova_range){ + .start = cap_range->iova_ranges[i].start, + .last = cap_range->iova_ranges[i].end, + }; + } + + *nranges = cap_range->nr_iovas; + + free(info); + return ranges; +} + +/* Return iova ranges of the device's IOAS. Free with free() */ +static struct iommu_iova_range *iommufd_iova_ranges(struct vfio_pci_device *device, + u32 *nranges) +{ + struct iommu_iova_range *ranges; + int ret; + + struct iommu_ioas_iova_ranges query = { + .size = sizeof(query), + .ioas_id = device->ioas_id, + }; + + ret = ioctl(device->iommufd, IOMMU_IOAS_IOVA_RANGES, &query); + VFIO_ASSERT_EQ(ret, -1); + VFIO_ASSERT_EQ(errno, EMSGSIZE); + VFIO_ASSERT_GT(query.num_iovas, 0); + + ranges = calloc(query.num_iovas, sizeof(*ranges)); + VFIO_ASSERT_NOT_NULL(ranges); + + query.allowed_iovas = (uintptr_t)ranges; + + ioctl_assert(device->iommufd, IOMMU_IOAS_IOVA_RANGES, &query); + *nranges = query.num_iovas; + + return ranges; +} + +static int iova_range_comp(const void *a, const void *b) +{ + const struct iommu_iova_range *ra = a, *rb = b; + + if (ra->start < rb->start) + return -1; + + if (ra->start > rb->start) + return 1; + + return 0; +} + +/* Return sorted IOVA ranges of the device. Free with free(). */ +struct iommu_iova_range *vfio_pci_iova_ranges(struct vfio_pci_device *device, + u32 *nranges) +{ + struct iommu_iova_range *ranges; + + if (device->iommufd) + ranges = iommufd_iova_ranges(device, nranges); + else + ranges = vfio_iommu_iova_ranges(device, nranges); + + if (!ranges) + return NULL; + + VFIO_ASSERT_GT(*nranges, 0); + + /* Sort and check that ranges are sane and non-overlapping */ + qsort(ranges, *nranges, sizeof(*ranges), iova_range_comp); + VFIO_ASSERT_LT(ranges[0].start, ranges[0].last); + + for (u32 i = 1; i < *nranges; i++) { + VFIO_ASSERT_LT(ranges[i].start, ranges[i].last); + VFIO_ASSERT_LT(ranges[i - 1].last, ranges[i].start); + } + + return ranges; +} + iova_t __to_iova(struct vfio_pci_device *device, void *vaddr) { struct vfio_dma_region *region; -- cgit v1.2.3 From a77fa0b9222d2f23a764061a3be18e6bc738672e Mon Sep 17 00:00:00 2001 From: Alex Mastro Date: Tue, 11 Nov 2025 10:48:25 -0800 Subject: vfio: selftests: fix map limit tests to use last available iova Use the newly available vfio_pci_iova_ranges() to determine the last legal IOVA, and use this as the basis for vfio_dma_map_limit_test tests. Fixes: de8d1f2fd5a5 ("vfio: selftests: add end of address space DMA map/unmap tests") Reviewed-by: David Matlack Tested-by: David Matlack Signed-off-by: Alex Mastro Link: https://lore.kernel.org/r/20251111-iova-ranges-v3-2-7960244642c5@fb.com Signed-off-by: Alex Williamson --- tools/testing/selftests/vfio/vfio_dma_mapping_test.c | 15 +++++++++++++-- 1 file changed, 13 insertions(+), 2 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/vfio/vfio_dma_mapping_test.c b/tools/testing/selftests/vfio/vfio_dma_mapping_test.c index 4f1ea79a200c..e1374aab96bd 100644 --- a/tools/testing/selftests/vfio/vfio_dma_mapping_test.c +++ b/tools/testing/selftests/vfio/vfio_dma_mapping_test.c @@ -3,6 +3,8 @@ #include #include +#include +#include #include #include #include @@ -219,7 +221,10 @@ FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(); FIXTURE_SETUP(vfio_dma_map_limit_test) { struct vfio_dma_region *region = &self->region; + struct iommu_iova_range *ranges; u64 region_size = getpagesize(); + iova_t last_iova; + u32 nranges; /* * Over-allocate mmap by double the size to provide enough backing vaddr @@ -232,8 +237,13 @@ FIXTURE_SETUP(vfio_dma_map_limit_test) MAP_ANONYMOUS | MAP_PRIVATE, -1, 0); ASSERT_NE(region->vaddr, MAP_FAILED); - /* One page prior to the end of address space */ - region->iova = ~(iova_t)0 & ~(region_size - 1); + ranges = vfio_pci_iova_ranges(self->device, &nranges); + VFIO_ASSERT_NOT_NULL(ranges); + last_iova = ranges[nranges - 1].last; + free(ranges); + + /* One page prior to the last iova */ + region->iova = last_iova & ~(region_size - 1); region->size = region_size; } @@ -276,6 +286,7 @@ TEST_F(vfio_dma_map_limit_test, overflow) struct vfio_dma_region *region = &self->region; int rc; + region->iova = ~(iova_t)0 & ~(region->size - 1); region->size = self->mmap_size; rc = __vfio_pci_dma_map(self->device, region); -- cgit v1.2.3 From ce0e3c403e00e9e03e80aca6570bf936a44279e2 Mon Sep 17 00:00:00 2001 From: Alex Mastro Date: Tue, 11 Nov 2025 10:48:26 -0800 Subject: vfio: selftests: add iova allocator Add struct iova_allocator, which gives tests a convenient way to generate legally-accessible IOVAs to map. This allocator traverses the sorted available IOVA ranges linearly, requires power-of-two size allocations, and does not support freeing iova allocations. The assumption is that tests are not IOVA space-bounded, and will not need to recycle IOVAs. This is based on Alex Williamson's patch series for adding an IOVA allocator [1]. [1] https://lore.kernel.org/all/20251108212954.26477-1-alex@shazbot.org/ Reviewed-by: David Matlack Tested-by: David Matlack Signed-off-by: Alex Mastro Link: https://lore.kernel.org/r/20251111-iova-ranges-v3-3-7960244642c5@fb.com Signed-off-by: Alex Williamson --- .../testing/selftests/vfio/lib/include/vfio_util.h | 11 ++++ tools/testing/selftests/vfio/lib/vfio_pci_device.c | 74 +++++++++++++++++++++- 2 files changed, 84 insertions(+), 1 deletion(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/vfio/lib/include/vfio_util.h b/tools/testing/selftests/vfio/lib/include/vfio_util.h index ef8f06ef0c13..69ec0c856481 100644 --- a/tools/testing/selftests/vfio/lib/include/vfio_util.h +++ b/tools/testing/selftests/vfio/lib/include/vfio_util.h @@ -188,6 +188,13 @@ struct vfio_pci_device { struct vfio_pci_driver driver; }; +struct iova_allocator { + struct iommu_iova_range *ranges; + u32 nranges; + u32 range_idx; + u64 range_offset; +}; + /* * Return the BDF string of the device that the test should use. * @@ -212,6 +219,10 @@ void vfio_pci_device_reset(struct vfio_pci_device *device); struct iommu_iova_range *vfio_pci_iova_ranges(struct vfio_pci_device *device, u32 *nranges); +struct iova_allocator *iova_allocator_init(struct vfio_pci_device *device); +void iova_allocator_cleanup(struct iova_allocator *allocator); +iova_t iova_allocator_alloc(struct iova_allocator *allocator, size_t size); + int __vfio_pci_dma_map(struct vfio_pci_device *device, struct vfio_dma_region *region); int __vfio_pci_dma_unmap(struct vfio_pci_device *device, diff --git a/tools/testing/selftests/vfio/lib/vfio_pci_device.c b/tools/testing/selftests/vfio/lib/vfio_pci_device.c index 11749348f53f..b479a359da12 100644 --- a/tools/testing/selftests/vfio/lib/vfio_pci_device.c +++ b/tools/testing/selftests/vfio/lib/vfio_pci_device.c @@ -12,11 +12,12 @@ #include #include +#include #include #include +#include #include #include -#include #include "../../../kselftest.h" #include @@ -201,6 +202,77 @@ struct iommu_iova_range *vfio_pci_iova_ranges(struct vfio_pci_device *device, return ranges; } +struct iova_allocator *iova_allocator_init(struct vfio_pci_device *device) +{ + struct iova_allocator *allocator; + struct iommu_iova_range *ranges; + u32 nranges; + + ranges = vfio_pci_iova_ranges(device, &nranges); + VFIO_ASSERT_NOT_NULL(ranges); + + allocator = malloc(sizeof(*allocator)); + VFIO_ASSERT_NOT_NULL(allocator); + + *allocator = (struct iova_allocator){ + .ranges = ranges, + .nranges = nranges, + .range_idx = 0, + .range_offset = 0, + }; + + return allocator; +} + +void iova_allocator_cleanup(struct iova_allocator *allocator) +{ + free(allocator->ranges); + free(allocator); +} + +iova_t iova_allocator_alloc(struct iova_allocator *allocator, size_t size) +{ + VFIO_ASSERT_GT(size, 0, "Invalid size arg, zero\n"); + VFIO_ASSERT_EQ(size & (size - 1), 0, "Invalid size arg, non-power-of-2\n"); + + for (;;) { + struct iommu_iova_range *range; + iova_t iova, last; + + VFIO_ASSERT_LT(allocator->range_idx, allocator->nranges, + "IOVA allocator out of space\n"); + + range = &allocator->ranges[allocator->range_idx]; + iova = range->start + allocator->range_offset; + + /* Check for sufficient space at the current offset */ + if (check_add_overflow(iova, size - 1, &last) || + last > range->last) + goto next_range; + + /* Align iova to size */ + iova = last & ~(size - 1); + + /* Check for sufficient space at the aligned iova */ + if (check_add_overflow(iova, size - 1, &last) || + last > range->last) + goto next_range; + + if (last == range->last) { + allocator->range_idx++; + allocator->range_offset = 0; + } else { + allocator->range_offset = last - range->start + 1; + } + + return iova; + +next_range: + allocator->range_idx++; + allocator->range_offset = 0; + } +} + iova_t __to_iova(struct vfio_pci_device *device, void *vaddr) { struct vfio_dma_region *region; -- cgit v1.2.3 From d323ad739666761646048fca587734f4ae64f2c8 Mon Sep 17 00:00:00 2001 From: Alex Mastro Date: Tue, 11 Nov 2025 10:48:27 -0800 Subject: vfio: selftests: replace iova=vaddr with allocated iovas vfio_dma_mapping_test and vfio_pci_driver_test currently use iova=vaddr as part of DMA mapping operations. However, not all IOMMUs support the same virtual address width as the processor. For instance, older Intel consumer platforms only support 39-bits of IOMMU address space. On such platforms, using the virtual address as the IOVA fails. Make the tests more robust by using iova_allocator to vend IOVAs, which queries legally accessible IOVAs from the underlying IOMMUFD or VFIO container. Reviewed-by: David Matlack Tested-by: David Matlack Signed-off-by: Alex Mastro Link: https://lore.kernel.org/r/20251111-iova-ranges-v3-4-7960244642c5@fb.com Signed-off-by: Alex Williamson --- tools/testing/selftests/vfio/vfio_dma_mapping_test.c | 5 ++++- tools/testing/selftests/vfio/vfio_pci_driver_test.c | 12 ++++++++---- 2 files changed, 12 insertions(+), 5 deletions(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/vfio/vfio_dma_mapping_test.c b/tools/testing/selftests/vfio/vfio_dma_mapping_test.c index e1374aab96bd..102603d4407d 100644 --- a/tools/testing/selftests/vfio/vfio_dma_mapping_test.c +++ b/tools/testing/selftests/vfio/vfio_dma_mapping_test.c @@ -95,6 +95,7 @@ static int iommu_mapping_get(const char *bdf, u64 iova, FIXTURE(vfio_dma_mapping_test) { struct vfio_pci_device *device; + struct iova_allocator *iova_allocator; }; FIXTURE_VARIANT(vfio_dma_mapping_test) { @@ -119,10 +120,12 @@ FIXTURE_VARIANT_ADD_ALL_IOMMU_MODES(anonymous_hugetlb_1gb, SZ_1G, MAP_HUGETLB | FIXTURE_SETUP(vfio_dma_mapping_test) { self->device = vfio_pci_device_init(device_bdf, variant->iommu_mode); + self->iova_allocator = iova_allocator_init(self->device); } FIXTURE_TEARDOWN(vfio_dma_mapping_test) { + iova_allocator_cleanup(self->iova_allocator); vfio_pci_device_cleanup(self->device); } @@ -144,7 +147,7 @@ TEST_F(vfio_dma_mapping_test, dma_map_unmap) else ASSERT_NE(region.vaddr, MAP_FAILED); - region.iova = (u64)region.vaddr; + region.iova = iova_allocator_alloc(self->iova_allocator, size); region.size = size; vfio_pci_dma_map(self->device, ®ion); diff --git a/tools/testing/selftests/vfio/vfio_pci_driver_test.c b/tools/testing/selftests/vfio/vfio_pci_driver_test.c index 2dbd70b7db62..f69eec8b928d 100644 --- a/tools/testing/selftests/vfio/vfio_pci_driver_test.c +++ b/tools/testing/selftests/vfio/vfio_pci_driver_test.c @@ -19,6 +19,7 @@ static const char *device_bdf; } while (0) static void region_setup(struct vfio_pci_device *device, + struct iova_allocator *iova_allocator, struct vfio_dma_region *region, u64 size) { const int flags = MAP_SHARED | MAP_ANONYMOUS; @@ -29,7 +30,7 @@ static void region_setup(struct vfio_pci_device *device, VFIO_ASSERT_NE(vaddr, MAP_FAILED); region->vaddr = vaddr; - region->iova = (u64)vaddr; + region->iova = iova_allocator_alloc(iova_allocator, size); region->size = size; vfio_pci_dma_map(device, region); @@ -44,6 +45,7 @@ static void region_teardown(struct vfio_pci_device *device, FIXTURE(vfio_pci_driver_test) { struct vfio_pci_device *device; + struct iova_allocator *iova_allocator; struct vfio_dma_region memcpy_region; void *vaddr; int msi_fd; @@ -72,14 +74,15 @@ FIXTURE_SETUP(vfio_pci_driver_test) struct vfio_pci_driver *driver; self->device = vfio_pci_device_init(device_bdf, variant->iommu_mode); + self->iova_allocator = iova_allocator_init(self->device); driver = &self->device->driver; - region_setup(self->device, &self->memcpy_region, SZ_1G); - region_setup(self->device, &driver->region, SZ_2M); + region_setup(self->device, self->iova_allocator, &self->memcpy_region, SZ_1G); + region_setup(self->device, self->iova_allocator, &driver->region, SZ_2M); /* Any IOVA that doesn't overlap memcpy_region and driver->region. */ - self->unmapped_iova = 8UL * SZ_1G; + self->unmapped_iova = iova_allocator_alloc(self->iova_allocator, SZ_1G); vfio_pci_driver_init(self->device); self->msi_fd = self->device->msi_eventfds[driver->msi]; @@ -108,6 +111,7 @@ FIXTURE_TEARDOWN(vfio_pci_driver_test) region_teardown(self->device, &self->memcpy_region); region_teardown(self->device, &driver->region); + iova_allocator_cleanup(self->iova_allocator); vfio_pci_device_cleanup(self->device); } -- cgit v1.2.3 From cb730e4ac1b4dca09d364fd83464ebd29547a4ef Mon Sep 17 00:00:00 2001 From: Jiayuan Chen Date: Tue, 11 Nov 2025 14:02:52 +0800 Subject: selftests/bpf: Add mptcp test with sockmap Add test cases to verify that when MPTCP falls back to plain TCP sockets, they can properly work with sockmap. Additionally, add test cases to ensure that sockmap correctly rejects MPTCP sockets as expected. Signed-off-by: Jiayuan Chen Signed-off-by: Martin KaFai Lau Acked-by: Matthieu Baerts (NGI0) Link: https://patch.msgid.link/20251111060307.194196-4-jiayuan.chen@linux.dev --- tools/testing/selftests/bpf/prog_tests/mptcp.c | 140 ++++++++++++++++++++++ tools/testing/selftests/bpf/progs/mptcp_sockmap.c | 43 +++++++ 2 files changed, 183 insertions(+) create mode 100644 tools/testing/selftests/bpf/progs/mptcp_sockmap.c (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/prog_tests/mptcp.c b/tools/testing/selftests/bpf/prog_tests/mptcp.c index f8eb7f9d4fd2..8fade8bdc451 100644 --- a/tools/testing/selftests/bpf/prog_tests/mptcp.c +++ b/tools/testing/selftests/bpf/prog_tests/mptcp.c @@ -6,11 +6,13 @@ #include #include #include +#include #include "cgroup_helpers.h" #include "network_helpers.h" #include "mptcp_sock.skel.h" #include "mptcpify.skel.h" #include "mptcp_subflow.skel.h" +#include "mptcp_sockmap.skel.h" #define NS_TEST "mptcp_ns" #define ADDR_1 "10.0.1.1" @@ -436,6 +438,142 @@ close_cgroup: close(cgroup_fd); } +/* Test sockmap on MPTCP server handling non-mp-capable clients. */ +static void test_sockmap_with_mptcp_fallback(struct mptcp_sockmap *skel) +{ + int listen_fd = -1, client_fd1 = -1, client_fd2 = -1; + int server_fd1 = -1, server_fd2 = -1, sent, recvd; + char snd[9] = "123456789"; + char rcv[10]; + + /* start server with MPTCP enabled */ + listen_fd = start_mptcp_server(AF_INET, NULL, 0, 0); + if (!ASSERT_OK_FD(listen_fd, "sockmap-fb:start_mptcp_server")) + return; + + skel->bss->trace_port = ntohs(get_socket_local_port(listen_fd)); + skel->bss->sk_index = 0; + /* create client without MPTCP enabled */ + client_fd1 = connect_to_fd_opts(listen_fd, NULL); + if (!ASSERT_OK_FD(client_fd1, "sockmap-fb:connect_to_fd")) + goto end; + + server_fd1 = accept(listen_fd, NULL, 0); + skel->bss->sk_index = 1; + client_fd2 = connect_to_fd_opts(listen_fd, NULL); + if (!ASSERT_OK_FD(client_fd2, "sockmap-fb:connect_to_fd")) + goto end; + + server_fd2 = accept(listen_fd, NULL, 0); + /* test normal redirect behavior: data sent by client_fd1 can be + * received by client_fd2 + */ + skel->bss->redirect_idx = 1; + sent = send(client_fd1, snd, sizeof(snd), 0); + if (!ASSERT_EQ(sent, sizeof(snd), "sockmap-fb:send(client_fd1)")) + goto end; + + /* try to recv more bytes to avoid truncation check */ + recvd = recv(client_fd2, rcv, sizeof(rcv), 0); + if (!ASSERT_EQ(recvd, sizeof(snd), "sockmap-fb:recv(client_fd2)")) + goto end; + +end: + if (client_fd1 >= 0) + close(client_fd1); + if (client_fd2 >= 0) + close(client_fd2); + if (server_fd1 >= 0) + close(server_fd1); + if (server_fd2 >= 0) + close(server_fd2); + close(listen_fd); +} + +/* Test sockmap rejection of MPTCP sockets - both server and client sides. */ +static void test_sockmap_reject_mptcp(struct mptcp_sockmap *skel) +{ + int listen_fd = -1, server_fd = -1, client_fd1 = -1; + int err, zero = 0; + + /* start server with MPTCP enabled */ + listen_fd = start_mptcp_server(AF_INET, NULL, 0, 0); + if (!ASSERT_OK_FD(listen_fd, "start_mptcp_server")) + return; + + skel->bss->trace_port = ntohs(get_socket_local_port(listen_fd)); + skel->bss->sk_index = 0; + /* create client with MPTCP enabled */ + client_fd1 = connect_to_fd(listen_fd, 0); + if (!ASSERT_OK_FD(client_fd1, "connect_to_fd client_fd1")) + goto end; + + /* bpf_sock_map_update() called from sockops should reject MPTCP sk */ + if (!ASSERT_EQ(skel->bss->helper_ret, -EOPNOTSUPP, "should reject")) + goto end; + + server_fd = accept(listen_fd, NULL, 0); + err = bpf_map_update_elem(bpf_map__fd(skel->maps.sock_map), + &zero, &server_fd, BPF_NOEXIST); + if (!ASSERT_EQ(err, -EOPNOTSUPP, "server should be disallowed")) + goto end; + + /* MPTCP client should also be disallowed */ + err = bpf_map_update_elem(bpf_map__fd(skel->maps.sock_map), + &zero, &client_fd1, BPF_NOEXIST); + if (!ASSERT_EQ(err, -EOPNOTSUPP, "client should be disallowed")) + goto end; +end: + if (client_fd1 >= 0) + close(client_fd1); + if (server_fd >= 0) + close(server_fd); + close(listen_fd); +} + +static void test_mptcp_sockmap(void) +{ + struct mptcp_sockmap *skel; + struct netns_obj *netns; + int cgroup_fd, err; + + cgroup_fd = test__join_cgroup("/mptcp_sockmap"); + if (!ASSERT_OK_FD(cgroup_fd, "join_cgroup: mptcp_sockmap")) + return; + + skel = mptcp_sockmap__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_open_load: mptcp_sockmap")) + goto close_cgroup; + + skel->links.mptcp_sockmap_inject = + bpf_program__attach_cgroup(skel->progs.mptcp_sockmap_inject, cgroup_fd); + if (!ASSERT_OK_PTR(skel->links.mptcp_sockmap_inject, "attach sockmap")) + goto skel_destroy; + + err = bpf_prog_attach(bpf_program__fd(skel->progs.mptcp_sockmap_redirect), + bpf_map__fd(skel->maps.sock_map), + BPF_SK_SKB_STREAM_VERDICT, 0); + if (!ASSERT_OK(err, "bpf_prog_attach stream verdict")) + goto skel_destroy; + + netns = netns_new(NS_TEST, true); + if (!ASSERT_OK_PTR(netns, "netns_new: mptcp_sockmap")) + goto skel_destroy; + + if (endpoint_init("subflow") < 0) + goto close_netns; + + test_sockmap_with_mptcp_fallback(skel); + test_sockmap_reject_mptcp(skel); + +close_netns: + netns_free(netns); +skel_destroy: + mptcp_sockmap__destroy(skel); +close_cgroup: + close(cgroup_fd); +} + void test_mptcp(void) { if (test__start_subtest("base")) @@ -444,4 +582,6 @@ void test_mptcp(void) test_mptcpify(); if (test__start_subtest("subflow")) test_subflow(); + if (test__start_subtest("sockmap")) + test_mptcp_sockmap(); } diff --git a/tools/testing/selftests/bpf/progs/mptcp_sockmap.c b/tools/testing/selftests/bpf/progs/mptcp_sockmap.c new file mode 100644 index 000000000000..d4eef0cbadb9 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/mptcp_sockmap.c @@ -0,0 +1,43 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include "bpf_tracing_net.h" + +char _license[] SEC("license") = "GPL"; + +int sk_index; +int redirect_idx; +int trace_port; +int helper_ret; +struct { + __uint(type, BPF_MAP_TYPE_SOCKMAP); + __uint(key_size, sizeof(__u32)); + __uint(value_size, sizeof(__u32)); + __uint(max_entries, 100); +} sock_map SEC(".maps"); + +SEC("sockops") +int mptcp_sockmap_inject(struct bpf_sock_ops *skops) +{ + struct bpf_sock *sk; + + /* only accept specified connection */ + if (skops->local_port != trace_port || + skops->op != BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB) + return 1; + + sk = skops->sk; + if (!sk) + return 1; + + /* update sk handler */ + helper_ret = bpf_sock_map_update(skops, &sock_map, &sk_index, BPF_NOEXIST); + + return 1; +} + +SEC("sk_skb/stream_verdict") +int mptcp_sockmap_redirect(struct __sk_buff *skb) +{ + /* redirect skb to the sk under sock_map[redirect_idx] */ + return bpf_sk_redirect_map(skb, &sock_map, redirect_idx, 0); +} -- cgit v1.2.3 From 6c762611fed7365790000925f3d14f20037d0061 Mon Sep 17 00:00:00 2001 From: Eduard Zingerman Date: Thu, 13 Nov 2025 18:57:30 -0800 Subject: selftests/bpf: Test widen_imprecise_scalars() with different stack depth A test case for a situation when widen_imprecise_scalars() is called with old->allocated_stack > cur->allocated_stack. Test structure: def widening_stack_size_bug(): r1 = 0 for r6 in 0..1: iterator_with_diff_stack_depth(r1) r1 = 42 def iterator_with_diff_stack_depth(r1): if r1 != 42: use 128 bytes of stack iterator based loop iterator_with_diff_stack_depth() is verified with r1 == 0 first and r1 == 42 next. Causing stack usage of 128 bytes on a first visit and 8 bytes on a second. Such arrangement triggered a KASAN error in widen_imprecise_scalars(). Signed-off-by: Eduard Zingerman Link: https://lore.kernel.org/r/20251114025730.772723-2-eddyz87@gmail.com Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/progs/iters_looping.c | 53 +++++++++++++++++++++++ 1 file changed, 53 insertions(+) (limited to 'tools/testing') diff --git a/tools/testing/selftests/bpf/progs/iters_looping.c b/tools/testing/selftests/bpf/progs/iters_looping.c index 05fa5ce7fc59..d00fd570255a 100644 --- a/tools/testing/selftests/bpf/progs/iters_looping.c +++ b/tools/testing/selftests/bpf/progs/iters_looping.c @@ -161,3 +161,56 @@ int simplest_loop(void *ctx) return 0; } + +__used +static void iterator_with_diff_stack_depth(int x) +{ + struct bpf_iter_num iter; + + asm volatile ( + "if r1 == 42 goto 0f;" + "*(u64 *)(r10 - 128) = 0;" + "0:" + /* create iterator */ + "r1 = %[iter];" + "r2 = 0;" + "r3 = 10;" + "call %[bpf_iter_num_new];" + "1:" + /* consume next item */ + "r1 = %[iter];" + "call %[bpf_iter_num_next];" + "if r0 == 0 goto 2f;" + "goto 1b;" + "2:" + /* destroy iterator */ + "r1 = %[iter];" + "call %[bpf_iter_num_destroy];" + : + : __imm_ptr(iter), ITER_HELPERS + : __clobber_common, "r6" + ); +} + +SEC("socket") +__success +__naked int widening_stack_size_bug(void *ctx) +{ + /* + * Depending on iterator_with_diff_stack_depth() parameter value, + * subprogram stack depth is either 8 or 128 bytes. Arrange values so + * that it is 128 on a first call and 8 on a second. This triggered a + * bug in verifier's widen_imprecise_scalars() logic. + */ + asm volatile ( + "r6 = 0;" + "r1 = 0;" + "1:" + "call iterator_with_diff_stack_depth;" + "r1 = 42;" + "r6 += 1;" + "if r6 < 2 goto 1b;" + "r0 = 0;" + "exit;" + ::: __clobber_all); +} -- cgit v1.2.3 From 216158f063fe24fb003bd7da0cd92cd6e2c4d48b Mon Sep 17 00:00:00 2001 From: Ankit Khushwaha Date: Thu, 6 Nov 2025 15:25:32 +0530 Subject: selftests/user_events: fix type cast for write_index packed member in perf_test Accessing 'reg.write_index' directly triggers a -Waddress-of-packed-member warning due to potential unaligned pointer access: perf_test.c:239:38: warning: taking address of packed member 'write_index' of class or structure 'user_reg' may result in an unaligned pointer value [-Waddress-of-packed-member] 239 | ASSERT_NE(-1, write(self->data_fd, ®.write_index, | ^~~~~~~~~~~~~~~ Since write(2) works with any alignment. Casting '®.write_index' explicitly to 'void *' to suppress this warning. Link: https://lkml.kernel.org/r/20251106095532.15185-1-ankitkhushwaha.linux@gmail.com Fixes: 42187bdc3ca4 ("selftests/user_events: Add perf self-test for empty arguments events") Signed-off-by: Ankit Khushwaha Cc: Beau Belgrave Cc: "Masami Hiramatsu (Google)" Cc: Steven Rostedt Cc: sunliming Cc: Wei Yang Cc: Shuah Khan Cc: Signed-off-by: Andrew Morton --- tools/testing/selftests/user_events/perf_test.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'tools/testing') diff --git a/tools/testing/selftests/user_events/perf_test.c b/tools/testing/selftests/user_events/perf_test.c index 5288e768b207..68625362add2 100644 --- a/tools/testing/selftests/user_events/perf_test.c +++ b/tools/testing/selftests/user_events/perf_test.c @@ -236,7 +236,7 @@ TEST_F(user, perf_empty_events) { ASSERT_EQ(1 << reg.enable_bit, self->check); /* Ensure write shows up at correct offset */ - ASSERT_NE(-1, write(self->data_fd, ®.write_index, + ASSERT_NE(-1, write(self->data_fd, (void *)®.write_index, sizeof(reg.write_index))); val = (void *)(((char *)perf_page) + perf_page->data_offset); ASSERT_EQ(PERF_RECORD_SAMPLE, *val); -- cgit v1.2.3