summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/Makefile16
-rw-r--r--tools/arch/x86/lib/x86-opcode-map.txt4
-rw-r--r--tools/hv/hv_kvp_daemon.c108
-rw-r--r--tools/lib/bpf/libbpf.h5
-rw-r--r--tools/lib/bpf/netlink.c20
-rw-r--r--tools/net/ynl/Makefile.deps5
-rw-r--r--tools/net/ynl/generated/Makefile2
-rw-r--r--tools/net/ynl/lib/ynl-priv.h3
-rw-r--r--tools/net/ynl/lib/ynl.c67
-rw-r--r--tools/net/ynl/lib/ynl.h17
-rwxr-xr-xtools/net/ynl/pyynl/ynl_gen_c.py317
-rwxr-xr-xtools/net/ynl/pyynl/ynl_gen_rst.py2
-rw-r--r--tools/net/ynl/samples/.gitignore4
-rw-r--r--tools/net/ynl/samples/rt-addr.c80
-rw-r--r--tools/net/ynl/samples/rt-route.c80
-rw-r--r--tools/objtool/check.c1
-rw-r--r--tools/sched_ext/scx_flatcg.bpf.c2
-rw-r--r--tools/testing/cxl/test/mem.c2
-rw-r--r--tools/testing/kunit/configs/all_tests.config2
-rw-r--r--tools/testing/kunit/qemu_configs/sh.py4
-rw-r--r--tools/testing/memblock/tests/basic_api.c102
-rw-r--r--tools/testing/selftests/Makefile1
-rw-r--r--tools/testing/selftests/bpf/config2
-rw-r--r--tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c231
-rw-r--r--tools/testing/selftests/bpf/prog_tests/for_each.c37
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c447
-rw-r--r--tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c1
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_misc.h2
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_qdisc_common.h27
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_qdisc_fail__incompl_ops.c41
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_qdisc_fifo.c126
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_qdisc_fq.c756
-rw-r--r--tools/testing/selftests/bpf/progs/bpf_tracing_net.h1
-rw-r--r--tools/testing/selftests/bpf/progs/for_each_hash_modify.c30
-rw-r--r--tools/testing/selftests/bpf/progs/sock_iter_batch.c24
-rw-r--r--tools/testing/selftests/bpf/progs/xsk_xdp_progs.c50
-rw-r--r--tools/testing/selftests/bpf/xsk_xdp_common.h1
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.c118
-rw-r--r--tools/testing/selftests/bpf/xskxceiver.h2
-rw-r--r--tools/testing/selftests/drivers/net/.gitignore2
-rw-r--r--tools/testing/selftests/drivers/net/Makefile5
l---------tools/testing/selftests/drivers/net/dsa/tc_taprio.sh1
-rw-r--r--tools/testing/selftests/drivers/net/hw/Makefile1
-rw-r--r--tools/testing/selftests/drivers/net/hw/iou-zcrx.c27
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/iou-zcrx.py140
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/rss_input_xfrm.py5
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/xsk_reconfig.py60
-rwxr-xr-xtools/testing/selftests/drivers/net/napi_id.py23
-rw-r--r--tools/testing/selftests/drivers/net/napi_id_helper.c83
-rwxr-xr-xtools/testing/selftests/drivers/net/ocelot/psfp.sh8
-rwxr-xr-xtools/testing/selftests/drivers/net/queues.py4
-rw-r--r--tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c57
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/dynevent_limitations.tc23
-rw-r--r--tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc20
-rw-r--r--tools/testing/selftests/landlock/audit.h21
-rw-r--r--tools/testing/selftests/landlock/audit_test.c154
-rw-r--r--tools/testing/selftests/landlock/fs_test.c3
-rw-r--r--tools/testing/selftests/lib/config1
-rw-r--r--tools/testing/selftests/net/config1
-rwxr-xr-xtools/testing/selftests/net/fib_tests.sh120
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_igmp.sh80
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_mld.sh81
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_vlan_aware.sh96
-rw-r--r--tools/testing/selftests/net/forwarding/config1
-rwxr-xr-xtools/testing/selftests/net/forwarding/tc_taprio.sh421
-rw-r--r--tools/testing/selftests/net/forwarding/tsn_lib.sh26
-rw-r--r--tools/testing/selftests/net/lib.sh24
-rw-r--r--tools/testing/selftests/net/lib/.gitignore1
-rw-r--r--tools/testing/selftests/net/lib/Makefile1
-rw-r--r--tools/testing/selftests/net/lib/ksft.h56
-rw-r--r--tools/testing/selftests/net/lib/py/ksft.py24
-rw-r--r--tools/testing/selftests/net/lib/py/ynl.py4
-rw-r--r--tools/testing/selftests/net/lib/xdp_helper.c (renamed from tools/testing/selftests/drivers/net/xdp_helper.c)82
-rwxr-xr-xtools/testing/selftests/net/mptcp/diag.sh37
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_connect.c21
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_diag.c231
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_inq.c16
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh26
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_lib.sh7
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_sockopt.c16
-rw-r--r--tools/testing/selftests/net/netfilter/Makefile1
-rw-r--r--tools/testing/selftests/net/netfilter/config1
-rwxr-xr-xtools/testing/selftests/net/netfilter/conntrack_resize.sh406
-rwxr-xr-xtools/testing/selftests/net/netfilter/nft_fib.sh23
-rw-r--r--tools/testing/selftests/net/ovpn/.gitignore2
-rw-r--r--tools/testing/selftests/net/ovpn/Makefile31
-rw-r--r--tools/testing/selftests/net/ovpn/common.sh92
-rw-r--r--tools/testing/selftests/net/ovpn/config10
-rw-r--r--tools/testing/selftests/net/ovpn/data64.key5
-rw-r--r--tools/testing/selftests/net/ovpn/ovpn-cli.c2376
-rw-r--r--tools/testing/selftests/net/ovpn/tcp_peers.txt5
-rwxr-xr-xtools/testing/selftests/net/ovpn/test-chachapoly.sh9
-rwxr-xr-xtools/testing/selftests/net/ovpn/test-close-socket-tcp.sh9
-rwxr-xr-xtools/testing/selftests/net/ovpn/test-close-socket.sh45
-rwxr-xr-xtools/testing/selftests/net/ovpn/test-float.sh9
-rwxr-xr-xtools/testing/selftests/net/ovpn/test-tcp.sh9
-rwxr-xr-xtools/testing/selftests/net/ovpn/test.sh113
-rw-r--r--tools/testing/selftests/net/ovpn/udp_peers.txt5
-rw-r--r--tools/testing/selftests/net/reuseport_addr_any.c36
-rwxr-xr-xtools/testing/selftests/net/test_bridge_neigh_suppress.sh125
-rw-r--r--tools/testing/selftests/pcie_bwctrl/Makefile3
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json225
-rwxr-xr-xtools/testing/selftests/tc-testing/tdc.sh4
-rw-r--r--tools/testing/selftests/ublk/Makefile9
-rw-r--r--tools/testing/selftests/ublk/fault_inject.c98
-rw-r--r--tools/testing/selftests/ublk/kublk.c344
-rw-r--r--tools/testing/selftests/ublk/kublk.h46
-rw-r--r--tools/testing/selftests/ublk/stripe.c28
-rwxr-xr-xtools/testing/selftests/ublk/test_common.sh146
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_04.sh40
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_05.sh44
-rwxr-xr-xtools/testing/selftests/ublk/test_generic_06.sh41
-rwxr-xr-xtools/testing/selftests/ublk/test_loop_01.sh8
-rwxr-xr-xtools/testing/selftests/ublk/test_loop_02.sh8
-rwxr-xr-xtools/testing/selftests/ublk/test_loop_03.sh8
-rwxr-xr-xtools/testing/selftests/ublk/test_loop_04.sh9
-rwxr-xr-xtools/testing/selftests/ublk/test_loop_05.sh8
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_01.sh45
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_02.sh45
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_03.sh38
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_04.sh37
-rwxr-xr-xtools/testing/selftests/ublk/test_stress_05.sh64
-rwxr-xr-xtools/testing/selftests/ublk/test_stripe_01.sh12
-rwxr-xr-xtools/testing/selftests/ublk/test_stripe_02.sh13
-rwxr-xr-xtools/testing/selftests/ublk/test_stripe_03.sh12
-rwxr-xr-xtools/testing/selftests/ublk/test_stripe_04.sh13
126 files changed, 8827 insertions, 583 deletions
diff --git a/tools/Makefile b/tools/Makefile
index 5e1254eb66de..c31cbbd12c45 100644
--- a/tools/Makefile
+++ b/tools/Makefile
@@ -41,6 +41,7 @@ help:
@echo ' mm - misc mm tools'
@echo ' wmi - WMI interface examples'
@echo ' x86_energy_perf_policy - Intel energy policy tool'
+ @echo ' ynl - ynl headers, library, and python tool'
@echo ''
@echo 'You can do:'
@echo ' $$ make -C tools/ <tool>_install'
@@ -118,11 +119,14 @@ freefall: FORCE
kvm_stat: FORCE
$(call descend,kvm/$@)
+ynl: FORCE
+ $(call descend,net/ynl)
+
all: acpi counter cpupower gpio hv firewire \
perf selftests bootconfig spi turbostat usb \
virtio mm bpf x86_energy_perf_policy \
tmon freefall iio objtool kvm_stat wmi \
- debugging tracing thermal thermometer thermal-engine
+ debugging tracing thermal thermometer thermal-engine ynl
acpi_install:
$(call descend,power/$(@:_install=),install)
@@ -157,13 +161,16 @@ freefall_install:
kvm_stat_install:
$(call descend,kvm/$(@:_install=),install)
+ynl_install:
+ $(call descend,net/$(@:_install=),install)
+
install: acpi_install counter_install cpupower_install gpio_install \
hv_install firewire_install iio_install \
perf_install selftests_install turbostat_install usb_install \
virtio_install mm_install bpf_install x86_energy_perf_policy_install \
tmon_install freefall_install objtool_install kvm_stat_install \
wmi_install debugging_install intel-speed-select_install \
- tracing_install thermometer_install thermal-engine_install
+ tracing_install thermometer_install thermal-engine_install ynl_install
acpi_clean:
$(call descend,power/acpi,clean)
@@ -214,12 +221,15 @@ freefall_clean:
build_clean:
$(call descend,build,clean)
+ynl_clean:
+ $(call descend,net/$(@:_clean=),clean)
+
clean: acpi_clean counter_clean cpupower_clean hv_clean firewire_clean \
perf_clean selftests_clean turbostat_clean bootconfig_clean spi_clean usb_clean virtio_clean \
mm_clean bpf_clean iio_clean x86_energy_perf_policy_clean tmon_clean \
freefall_clean build_clean libbpf_clean libsubcmd_clean \
gpio_clean objtool_clean leds_clean wmi_clean firmware_clean debugging_clean \
intel-speed-select_clean tracing_clean thermal_clean thermometer_clean thermal-engine_clean \
- sched_ext_clean
+ sched_ext_clean ynl_clean
.PHONY: FORCE
diff --git a/tools/arch/x86/lib/x86-opcode-map.txt b/tools/arch/x86/lib/x86-opcode-map.txt
index caedb3ef6688..f5dd84eb55dc 100644
--- a/tools/arch/x86/lib/x86-opcode-map.txt
+++ b/tools/arch/x86/lib/x86-opcode-map.txt
@@ -996,8 +996,8 @@ AVXcode: 4
83: Grp1 Ev,Ib (1A),(es)
# CTESTSCC instructions are: CTESTB, CTESTBE, CTESTF, CTESTL, CTESTLE, CTESTNB, CTESTNBE, CTESTNL,
# CTESTNLE, CTESTNO, CTESTNS, CTESTNZ, CTESTO, CTESTS, CTESTT, CTESTZ
-84: CTESTSCC (ev)
-85: CTESTSCC (es) | CTESTSCC (66),(es)
+84: CTESTSCC Eb,Gb (ev)
+85: CTESTSCC Ev,Gv (es) | CTESTSCC Ev,Gv (66),(es)
88: POPCNT Gv,Ev (es) | POPCNT Gv,Ev (66),(es)
8f: POP2 Bq,Rq (000),(11B),(ev)
a5: SHLD Ev,Gv,CL (es) | SHLD Ev,Gv,CL (66),(es)
diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c
index 04ba035d67e9..b9ce3aab15fe 100644
--- a/tools/hv/hv_kvp_daemon.c
+++ b/tools/hv/hv_kvp_daemon.c
@@ -24,6 +24,7 @@
#include <sys/poll.h>
#include <sys/utsname.h>
+#include <stdbool.h>
#include <stdio.h>
#include <stdlib.h>
#include <unistd.h>
@@ -677,6 +678,88 @@ static void kvp_process_ipconfig_file(char *cmd,
pclose(file);
}
+static bool kvp_verify_ip_address(const void *address_string)
+{
+ char verify_buf[sizeof(struct in6_addr)];
+
+ if (inet_pton(AF_INET, address_string, verify_buf) == 1)
+ return true;
+ if (inet_pton(AF_INET6, address_string, verify_buf) == 1)
+ return true;
+ return false;
+}
+
+static void kvp_extract_routes(const char *line, void **output, size_t *remaining)
+{
+ static const char needle[] = "via ";
+ const char *match, *haystack = line;
+
+ while ((match = strstr(haystack, needle))) {
+ const char *address, *next_char;
+
+ /* Address starts after needle. */
+ address = match + strlen(needle);
+
+ /* The char following address is a space or end of line. */
+ next_char = strpbrk(address, " \t\\");
+ if (!next_char)
+ next_char = address + strlen(address) + 1;
+
+ /* Enough room for address and semicolon. */
+ if (*remaining >= (next_char - address) + 1) {
+ memcpy(*output, address, next_char - address);
+ /* Terminate string for verification. */
+ memcpy(*output + (next_char - address), "", 1);
+ if (kvp_verify_ip_address(*output)) {
+ /* Advance output buffer. */
+ *output += next_char - address;
+ *remaining -= next_char - address;
+
+ /* Each address needs a trailing semicolon. */
+ memcpy(*output, ";", 1);
+ *output += 1;
+ *remaining -= 1;
+ }
+ }
+ haystack = next_char;
+ }
+}
+
+static void kvp_get_gateway(void *buffer, size_t buffer_len)
+{
+ static const char needle[] = "default ";
+ FILE *f;
+ void *output = buffer;
+ char *line = NULL;
+ size_t alloc_size = 0, remaining = buffer_len - 1;
+ ssize_t num_chars;
+
+ /* Show route information in a single line, for each address family */
+ f = popen("ip --oneline -4 route show;ip --oneline -6 route show", "r");
+ if (!f) {
+ /* Convert buffer into C-String. */
+ memcpy(output, "", 1);
+ return;
+ }
+ while ((num_chars = getline(&line, &alloc_size, f)) > 0) {
+ /* Skip short lines. */
+ if (num_chars <= strlen(needle))
+ continue;
+ /* Skip lines without default route. */
+ if (memcmp(line, needle, strlen(needle)))
+ continue;
+ /* Remove trailing newline to simplify further parsing. */
+ if (line[num_chars - 1] == '\n')
+ line[num_chars - 1] = '\0';
+ /* Search routes after match. */
+ kvp_extract_routes(line + strlen(needle), &output, &remaining);
+ }
+ /* Convert buffer into C-String. */
+ memcpy(output, "", 1);
+ free(line);
+ pclose(f);
+}
+
static void kvp_get_ipconfig_info(char *if_name,
struct hv_kvp_ipaddr_value *buffer)
{
@@ -685,30 +768,7 @@ static void kvp_get_ipconfig_info(char *if_name,
char *p;
FILE *file;
- /*
- * Get the address of default gateway (ipv4).
- */
- sprintf(cmd, "%s %s", "ip route show dev", if_name);
- strcat(cmd, " | awk '/default/ {print $3 }'");
-
- /*
- * Execute the command to gather gateway info.
- */
- kvp_process_ipconfig_file(cmd, (char *)buffer->gate_way,
- (MAX_GATEWAY_SIZE * 2), INET_ADDRSTRLEN, 0);
-
- /*
- * Get the address of default gateway (ipv6).
- */
- sprintf(cmd, "%s %s", "ip -f inet6 route show dev", if_name);
- strcat(cmd, " | awk '/default/ {print $3 }'");
-
- /*
- * Execute the command to gather gateway info (ipv6).
- */
- kvp_process_ipconfig_file(cmd, (char *)buffer->gate_way,
- (MAX_GATEWAY_SIZE * 2), INET6_ADDRSTRLEN, 1);
-
+ kvp_get_gateway(buffer->gate_way, sizeof(buffer->gate_way));
/*
* Gather the DNS state.
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index e0605403f977..fdcee6a71e0f 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -1283,6 +1283,7 @@ enum bpf_tc_attach_point {
BPF_TC_INGRESS = 1 << 0,
BPF_TC_EGRESS = 1 << 1,
BPF_TC_CUSTOM = 1 << 2,
+ BPF_TC_QDISC = 1 << 3,
};
#define BPF_TC_PARENT(a, b) \
@@ -1297,9 +1298,11 @@ struct bpf_tc_hook {
int ifindex;
enum bpf_tc_attach_point attach_point;
__u32 parent;
+ __u32 handle;
+ const char *qdisc;
size_t :0;
};
-#define bpf_tc_hook__last_field parent
+#define bpf_tc_hook__last_field qdisc
struct bpf_tc_opts {
size_t sz;
diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c
index 68a2def17175..c997e69d507f 100644
--- a/tools/lib/bpf/netlink.c
+++ b/tools/lib/bpf/netlink.c
@@ -529,9 +529,9 @@ int bpf_xdp_query_id(int ifindex, int flags, __u32 *prog_id)
}
-typedef int (*qdisc_config_t)(struct libbpf_nla_req *req);
+typedef int (*qdisc_config_t)(struct libbpf_nla_req *req, const struct bpf_tc_hook *hook);
-static int clsact_config(struct libbpf_nla_req *req)
+static int clsact_config(struct libbpf_nla_req *req, const struct bpf_tc_hook *hook)
{
req->tc.tcm_parent = TC_H_CLSACT;
req->tc.tcm_handle = TC_H_MAKE(TC_H_CLSACT, 0);
@@ -539,6 +539,16 @@ static int clsact_config(struct libbpf_nla_req *req)
return nlattr_add(req, TCA_KIND, "clsact", sizeof("clsact"));
}
+static int qdisc_config(struct libbpf_nla_req *req, const struct bpf_tc_hook *hook)
+{
+ const char *qdisc = OPTS_GET(hook, qdisc, NULL);
+
+ req->tc.tcm_parent = OPTS_GET(hook, parent, TC_H_ROOT);
+ req->tc.tcm_handle = OPTS_GET(hook, handle, 0);
+
+ return nlattr_add(req, TCA_KIND, qdisc, strlen(qdisc) + 1);
+}
+
static int attach_point_to_config(struct bpf_tc_hook *hook,
qdisc_config_t *config)
{
@@ -552,6 +562,9 @@ static int attach_point_to_config(struct bpf_tc_hook *hook,
return 0;
case BPF_TC_CUSTOM:
return -EOPNOTSUPP;
+ case BPF_TC_QDISC:
+ *config = &qdisc_config;
+ return 0;
default:
return -EINVAL;
}
@@ -596,7 +609,7 @@ static int tc_qdisc_modify(struct bpf_tc_hook *hook, int cmd, int flags)
req.tc.tcm_family = AF_UNSPEC;
req.tc.tcm_ifindex = OPTS_GET(hook, ifindex, 0);
- ret = config(&req);
+ ret = config(&req, hook);
if (ret < 0)
return ret;
@@ -639,6 +652,7 @@ int bpf_tc_hook_destroy(struct bpf_tc_hook *hook)
case BPF_TC_INGRESS:
case BPF_TC_EGRESS:
return libbpf_err(__bpf_tc_detach(hook, NULL, true));
+ case BPF_TC_QDISC:
case BPF_TC_INGRESS | BPF_TC_EGRESS:
return libbpf_err(tc_qdisc_delete(hook));
case BPF_TC_CUSTOM:
diff --git a/tools/net/ynl/Makefile.deps b/tools/net/ynl/Makefile.deps
index f3269ce39e5b..a5e6093903fb 100644
--- a/tools/net/ynl/Makefile.deps
+++ b/tools/net/ynl/Makefile.deps
@@ -20,13 +20,18 @@ CFLAGS_ethtool:=$(call get_hdr_inc,_LINUX_ETHTOOL_H,ethtool.h) \
$(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_H_,ethtool_netlink.h) \
$(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_GENERATED_H,ethtool_netlink_generated.h)
CFLAGS_handshake:=$(call get_hdr_inc,_LINUX_HANDSHAKE_H,handshake.h)
+CFLAGS_lockd_netlink:=$(call get_hdr_inc,_LINUX_LOCKD_NETLINK_H,lockd_netlink.h)
CFLAGS_mptcp_pm:=$(call get_hdr_inc,_LINUX_MPTCP_PM_H,mptcp_pm.h)
CFLAGS_net_shaper:=$(call get_hdr_inc,_LINUX_NET_SHAPER_H,net_shaper.h)
CFLAGS_netdev:=$(call get_hdr_inc,_LINUX_NETDEV_H,netdev.h)
CFLAGS_nl80211:=$(call get_hdr_inc,__LINUX_NL802121_H,nl80211.h)
CFLAGS_nlctrl:=$(call get_hdr_inc,__LINUX_GENERIC_NETLINK_H,genetlink.h)
CFLAGS_nfsd:=$(call get_hdr_inc,_LINUX_NFSD_NETLINK_H,nfsd_netlink.h)
+CFLAGS_ovpn:=$(call get_hdr_inc,_LINUX_OVPN_H,ovpn.h)
CFLAGS_ovs_datapath:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h)
CFLAGS_ovs_flow:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h)
CFLAGS_ovs_vport:=$(call get_hdr_inc,__LINUX_OPENVSWITCH_H,openvswitch.h)
+CFLAGS_rt-addr:=$(call get_hdr_inc,__LINUX_RTNETLINK_H,rtnetlink.h) \
+ $(call get_hdr_inc,__LINUX_IF_ADDR_H,if_addr.h)
+CFLAGS_rt-route:=$(call get_hdr_inc,__LINUX_RTNETLINK_H,rtnetlink.h)
CFLAGS_tcp_metrics:=$(call get_hdr_inc,_LINUX_TCP_METRICS_H,tcp_metrics.h)
diff --git a/tools/net/ynl/generated/Makefile b/tools/net/ynl/generated/Makefile
index 21f9e299dc75..6603ad8d4ce1 100644
--- a/tools/net/ynl/generated/Makefile
+++ b/tools/net/ynl/generated/Makefile
@@ -25,7 +25,7 @@ SPECS_DIR:=../../../../Documentation/netlink/specs
GENS_PATHS=$(shell grep -nrI --files-without-match \
'protocol: netlink' \
$(SPECS_DIR))
-GENS=$(patsubst $(SPECS_DIR)/%.yaml,%,${GENS_PATHS})
+GENS=$(patsubst $(SPECS_DIR)/%.yaml,%,${GENS_PATHS}) rt-addr rt-route
SRCS=$(patsubst %,%-user.c,${GENS})
HDRS=$(patsubst %,%-user.h,${GENS})
OBJS=$(patsubst %,%-user.o,${GENS})
diff --git a/tools/net/ynl/lib/ynl-priv.h b/tools/net/ynl/lib/ynl-priv.h
index 3c09a7bbfba5..5debb09491e7 100644
--- a/tools/net/ynl/lib/ynl-priv.h
+++ b/tools/net/ynl/lib/ynl-priv.h
@@ -94,6 +94,9 @@ struct ynl_ntf_base_type {
unsigned char data[] __attribute__((aligned(8)));
};
+struct nlmsghdr *ynl_msg_start_req(struct ynl_sock *ys, __u32 id, __u16 flags);
+struct nlmsghdr *ynl_msg_start_dump(struct ynl_sock *ys, __u32 id);
+
struct nlmsghdr *
ynl_gemsg_start_req(struct ynl_sock *ys, __u32 id, __u8 cmd, __u8 version);
struct nlmsghdr *
diff --git a/tools/net/ynl/lib/ynl.c b/tools/net/ynl/lib/ynl.c
index ce32cb35007d..d263f6f40ad5 100644
--- a/tools/net/ynl/lib/ynl.c
+++ b/tools/net/ynl/lib/ynl.c
@@ -191,12 +191,12 @@ ynl_ext_ack_check(struct ynl_sock *ys, const struct nlmsghdr *nlh,
n = snprintf(bad_attr, sizeof(bad_attr), "%sbad attribute: ",
str ? " (" : "");
- start = ynl_nlmsg_data_offset(ys->nlh, ys->family->hdr_len);
+ start = ynl_nlmsg_data_offset(ys->nlh, ys->req_hdr_len);
end = ynl_nlmsg_end_addr(ys->nlh);
off = ys->err.attr_offs;
off -= sizeof(struct nlmsghdr);
- off -= ys->family->hdr_len;
+ off -= ys->req_hdr_len;
n += ynl_err_walk(ys, start, end, off, ys->req_policy,
&bad_attr[n], sizeof(bad_attr) - n, NULL);
@@ -216,14 +216,14 @@ ynl_ext_ack_check(struct ynl_sock *ys, const struct nlmsghdr *nlh,
n = snprintf(miss_attr, sizeof(miss_attr), "%smissing attribute: ",
bad_attr[0] ? ", " : (str ? " (" : ""));
- start = ynl_nlmsg_data_offset(ys->nlh, ys->family->hdr_len);
+ start = ynl_nlmsg_data_offset(ys->nlh, ys->req_hdr_len);
end = ynl_nlmsg_end_addr(ys->nlh);
nest_pol = ys->req_policy;
if (tb[NLMSGERR_ATTR_MISS_NEST]) {
off = ynl_attr_get_u32(tb[NLMSGERR_ATTR_MISS_NEST]);
off -= sizeof(struct nlmsghdr);
- off -= ys->family->hdr_len;
+ off -= ys->req_hdr_len;
n += ynl_err_walk(ys, start, end, off, ys->req_policy,
&miss_attr[n], sizeof(miss_attr) - n,
@@ -451,14 +451,14 @@ ynl_gemsg_start(struct ynl_sock *ys, __u32 id, __u16 flags,
return nlh;
}
-void ynl_msg_start_req(struct ynl_sock *ys, __u32 id)
+struct nlmsghdr *ynl_msg_start_req(struct ynl_sock *ys, __u32 id, __u16 flags)
{
- ynl_msg_start(ys, id, NLM_F_REQUEST | NLM_F_ACK);
+ return ynl_msg_start(ys, id, NLM_F_REQUEST | NLM_F_ACK | flags);
}
-void ynl_msg_start_dump(struct ynl_sock *ys, __u32 id)
+struct nlmsghdr *ynl_msg_start_dump(struct ynl_sock *ys, __u32 id)
{
- ynl_msg_start(ys, id, NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP);
+ return ynl_msg_start(ys, id, NLM_F_REQUEST | NLM_F_ACK | NLM_F_DUMP);
}
struct nlmsghdr *
@@ -663,6 +663,7 @@ ynl_sock_create(const struct ynl_family *yf, struct ynl_error *yse)
struct sockaddr_nl addr;
struct ynl_sock *ys;
socklen_t addrlen;
+ int sock_type;
int one = 1;
ys = malloc(sizeof(*ys) + 2 * YNL_SOCKET_BUFFER_SIZE);
@@ -675,7 +676,9 @@ ynl_sock_create(const struct ynl_family *yf, struct ynl_error *yse)
ys->rx_buf = &ys->raw_buf[YNL_SOCKET_BUFFER_SIZE];
ys->ntf_last_next = &ys->ntf_first;
- ys->socket = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC);
+ sock_type = yf->is_classic ? yf->classic_id : NETLINK_GENERIC;
+
+ ys->socket = socket(AF_NETLINK, SOCK_RAW, sock_type);
if (ys->socket < 0) {
__perr(yse, "failed to create a netlink socket");
goto err_free_sock;
@@ -708,8 +711,9 @@ ynl_sock_create(const struct ynl_family *yf, struct ynl_error *yse)
ys->portid = addr.nl_pid;
ys->seq = random();
-
- if (ynl_sock_read_family(ys, yf->name)) {
+ if (yf->is_classic) {
+ ys->family_id = yf->classic_id;
+ } else if (ynl_sock_read_family(ys, yf->name)) {
if (yse)
memcpy(yse, &ys->err, sizeof(*yse));
goto err_close_sock;
@@ -791,13 +795,21 @@ static int ynl_ntf_parse(struct ynl_sock *ys, const struct nlmsghdr *nlh)
struct ynl_parse_arg yarg = { .ys = ys, };
const struct ynl_ntf_info *info;
struct ynl_ntf_base_type *rsp;
- struct genlmsghdr *gehdr;
+ __u32 cmd;
int ret;
- gehdr = ynl_nlmsg_data(nlh);
- if (gehdr->cmd >= ys->family->ntf_info_size)
+ if (ys->family->is_classic) {
+ cmd = nlh->nlmsg_type;
+ } else {
+ struct genlmsghdr *gehdr;
+
+ gehdr = ynl_nlmsg_data(nlh);
+ cmd = gehdr->cmd;
+ }
+
+ if (cmd >= ys->family->ntf_info_size)
return YNL_PARSE_CB_ERROR;
- info = &ys->family->ntf_info[gehdr->cmd];
+ info = &ys->family->ntf_info[cmd];
if (!info->cb)
return YNL_PARSE_CB_ERROR;
@@ -811,7 +823,7 @@ static int ynl_ntf_parse(struct ynl_sock *ys, const struct nlmsghdr *nlh)
goto err_free;
rsp->family = nlh->nlmsg_type;
- rsp->cmd = gehdr->cmd;
+ rsp->cmd = cmd;
*ys->ntf_last_next = rsp;
ys->ntf_last_next = &rsp->next;
@@ -863,17 +875,22 @@ int ynl_error_parse(struct ynl_parse_arg *yarg, const char *msg)
static int
ynl_check_alien(struct ynl_sock *ys, const struct nlmsghdr *nlh, __u32 rsp_cmd)
{
- struct genlmsghdr *gehdr;
+ if (ys->family->is_classic) {
+ if (nlh->nlmsg_type != rsp_cmd)
+ return ynl_ntf_parse(ys, nlh);
+ } else {
+ struct genlmsghdr *gehdr;
- if (ynl_nlmsg_data_len(nlh) < sizeof(*gehdr)) {
- yerr(ys, YNL_ERROR_INV_RESP,
- "Kernel responded with truncated message");
- return -1;
- }
+ if (ynl_nlmsg_data_len(nlh) < sizeof(*gehdr)) {
+ yerr(ys, YNL_ERROR_INV_RESP,
+ "Kernel responded with truncated message");
+ return -1;
+ }
- gehdr = ynl_nlmsg_data(nlh);
- if (gehdr->cmd != rsp_cmd)
- return ynl_ntf_parse(ys, nlh);
+ gehdr = ynl_nlmsg_data(nlh);
+ if (gehdr->cmd != rsp_cmd)
+ return ynl_ntf_parse(ys, nlh);
+ }
return 0;
}
diff --git a/tools/net/ynl/lib/ynl.h b/tools/net/ynl/lib/ynl.h
index 6cd570b283ea..32efeb224829 100644
--- a/tools/net/ynl/lib/ynl.h
+++ b/tools/net/ynl/lib/ynl.h
@@ -2,6 +2,7 @@
#ifndef __YNL_C_H
#define __YNL_C_H 1
+#include <stdbool.h>
#include <stddef.h>
#include <linux/genetlink.h>
#include <linux/types.h>
@@ -48,6 +49,8 @@ struct ynl_family {
/* private: */
const char *name;
size_t hdr_len;
+ bool is_classic;
+ __u16 classic_id;
const struct ynl_ntf_info *ntf_info;
unsigned int ntf_info_size;
};
@@ -77,11 +80,25 @@ struct ynl_sock {
struct nlmsghdr *nlh;
const struct ynl_policy_nest *req_policy;
+ size_t req_hdr_len;
unsigned char *tx_buf;
unsigned char *rx_buf;
unsigned char raw_buf[];
};
+/**
+ * struct ynl_string - parsed individual string
+ * @len: length of the string (excluding terminating character)
+ * @str: value of the string
+ *
+ * Parsed and nul-terminated string. This struct is only used for arrays of
+ * strings. Non-array string members are placed directly in respective types.
+ */
+struct ynl_string {
+ unsigned int len;
+ char str[];
+};
+
struct ynl_sock *
ynl_sock_create(const struct ynl_family *yf, struct ynl_error *e);
void ynl_sock_destroy(struct ynl_sock *ys);
diff --git a/tools/net/ynl/pyynl/ynl_gen_c.py b/tools/net/ynl/pyynl/ynl_gen_c.py
index 30c0a34b2784..acba03bbe67d 100755
--- a/tools/net/ynl/pyynl/ynl_gen_c.py
+++ b/tools/net/ynl/pyynl/ynl_gen_c.py
@@ -175,21 +175,23 @@ class Type(SpecAttr):
def arg_member(self, ri):
member = self._complex_member_type(ri)
if member:
- arg = [member + ' *' + self.c_name]
+ spc = ' ' if member[-1] != '*' else ''
+ arg = [member + spc + '*' + self.c_name]
if self.presence_type() == 'count':
arg += ['unsigned int n_' + self.c_name]
return arg
raise Exception(f"Struct member not implemented for class type {self.type}")
def struct_member(self, ri):
- if self.is_multi_val():
- ri.cw.p(f"unsigned int n_{self.c_name};")
member = self._complex_member_type(ri)
if member:
+ if self.is_multi_val():
+ ri.cw.p(f"unsigned int n_{self.c_name};")
ptr = '*' if self.is_multi_val() else ''
if self.is_recursive_for_op(ri):
ptr = '*'
- ri.cw.p(f"{member} {ptr}{self.c_name};")
+ spc = ' ' if member[-1] != '*' else ''
+ ri.cw.p(f"{member}{spc}{ptr}{self.c_name};")
return
members = self.arg_member(ri)
for one in members:
@@ -355,26 +357,10 @@ class TypeScalar(Type):
if 'byte-order' in attr:
self.byte_order_comment = f" /* {attr['byte-order']} */"
- if 'enum' in self.attr:
- enum = self.family.consts[self.attr['enum']]
- low, high = enum.value_range()
- if 'min' not in self.checks:
- if low != 0 or self.type[0] == 's':
- self.checks['min'] = low
- if 'max' not in self.checks:
- self.checks['max'] = high
-
- if 'min' in self.checks and 'max' in self.checks:
- if self.get_limit('min') > self.get_limit('max'):
- raise Exception(f'Invalid limit for "{self.name}" min: {self.get_limit("min")} max: {self.get_limit("max")}')
- self.checks['range'] = True
-
- low = min(self.get_limit('min', 0), self.get_limit('max', 0))
- high = max(self.get_limit('min', 0), self.get_limit('max', 0))
- if low < 0 and self.type[0] == 'u':
- raise Exception(f'Invalid limit for "{self.name}" negative limit for unsigned type')
- if low < -32768 or high > 32767:
- self.checks['full-range'] = True
+ # Classic families have some funny enums, don't bother
+ # computing checks, since we only need them for kernel policies
+ if not family.is_classic():
+ self._init_checks()
# Added by resolve():
self.is_bitfield = None
@@ -399,6 +385,31 @@ class TypeScalar(Type):
else:
self.type_name = '__' + self.type
+ def _init_checks(self):
+ if 'enum' in self.attr:
+ enum = self.family.consts[self.attr['enum']]
+ low, high = enum.value_range()
+ if low == None and high == None:
+ self.checks['sparse'] = True
+ else:
+ if 'min' not in self.checks:
+ if low != 0 or self.type[0] == 's':
+ self.checks['min'] = low
+ if 'max' not in self.checks:
+ self.checks['max'] = high
+
+ if 'min' in self.checks and 'max' in self.checks:
+ if self.get_limit('min') > self.get_limit('max'):
+ raise Exception(f'Invalid limit for "{self.name}" min: {self.get_limit("min")} max: {self.get_limit("max")}')
+ self.checks['range'] = True
+
+ low = min(self.get_limit('min', 0), self.get_limit('max', 0))
+ high = max(self.get_limit('min', 0), self.get_limit('max', 0))
+ if low < 0 and self.type[0] == 'u':
+ raise Exception(f'Invalid limit for "{self.name}" negative limit for unsigned type')
+ if low < -32768 or high > 32767:
+ self.checks['full-range'] = True
+
def _attr_policy(self, policy):
if 'flags-mask' in self.checks or self.is_bitfield:
if self.is_bitfield:
@@ -417,6 +428,8 @@ class TypeScalar(Type):
return f"NLA_POLICY_MIN({policy}, {self.get_limit_str('min')})"
elif 'max' in self.checks:
return f"NLA_POLICY_MAX({policy}, {self.get_limit_str('max')})"
+ elif 'sparse' in self.checks:
+ return f"NLA_POLICY_VALIDATE_FN({policy}, &{c_lower(self.enum_name)}_validate)"
return super()._attr_policy(policy)
def _attr_typol(self):
@@ -638,19 +651,37 @@ class TypeMultiAttr(Type):
def _complex_member_type(self, ri):
if 'type' not in self.attr or self.attr['type'] == 'nest':
return self.nested_struct_type
+ elif self.attr['type'] == 'binary' and 'struct' in self.attr:
+ return None # use arg_member()
+ elif self.attr['type'] == 'string':
+ return 'struct ynl_string *'
elif self.attr['type'] in scalars:
scalar_pfx = '__' if ri.ku_space == 'user' else ''
return scalar_pfx + self.attr['type']
else:
raise Exception(f"Sub-type {self.attr['type']} not supported yet")
+ def arg_member(self, ri):
+ if self.type == 'binary' and 'struct' in self.attr:
+ return [f'struct {c_lower(self.attr["struct"])} *{self.c_name}',
+ f'unsigned int n_{self.c_name}']
+ return super().arg_member(ri)
+
def free_needs_iter(self):
- return 'type' not in self.attr or self.attr['type'] == 'nest'
+ return self.attr['type'] in {'nest', 'string'}
def _free_lines(self, ri, var, ref):
lines = []
if self.attr['type'] in scalars:
lines += [f"free({var}->{ref}{self.c_name});"]
+ elif self.attr['type'] == 'binary' and 'struct' in self.attr:
+ lines += [f"free({var}->{ref}{self.c_name});"]
+ elif self.attr['type'] == 'string':
+ lines += [
+ f"for (i = 0; i < {var}->{ref}n_{self.c_name}; i++)",
+ f"free({var}->{ref}{self.c_name}[i]);",
+ f"free({var}->{ref}{self.c_name});",
+ ]
elif 'type' not in self.attr or self.attr['type'] == 'nest':
lines += [
f"for (i = 0; i < {var}->{ref}n_{self.c_name}; i++)",
@@ -675,6 +706,12 @@ class TypeMultiAttr(Type):
put_type = self.type
ri.cw.p(f"for (i = 0; i < {var}->n_{self.c_name}; i++)")
ri.cw.p(f"ynl_attr_put_{put_type}(nlh, {self.enum_name}, {var}->{self.c_name}[i]);")
+ elif self.attr['type'] == 'binary' and 'struct' in self.attr:
+ ri.cw.p(f"for (i = 0; i < {var}->n_{self.c_name}; i++)")
+ ri.cw.p(f"ynl_attr_put(nlh, {self.enum_name}, &{var}->{self.c_name}[i], sizeof(struct {c_lower(self.attr['struct'])}));")
+ elif self.attr['type'] == 'string':
+ ri.cw.p(f"for (i = 0; i < {var}->n_{self.c_name}; i++)")
+ ri.cw.p(f"ynl_attr_put_str(nlh, {self.enum_name}, {var}->{self.c_name}[i]->str);")
elif 'type' not in self.attr or self.attr['type'] == 'nest':
ri.cw.p(f"for (i = 0; i < {var}->n_{self.c_name}; i++)")
self._attr_put_line(ri, var, f"{self.nested_render_name}_put(nlh, " +
@@ -702,12 +739,22 @@ class TypeArrayNest(Type):
elif self.attr['sub-type'] in scalars:
scalar_pfx = '__' if ri.ku_space == 'user' else ''
return scalar_pfx + self.attr['sub-type']
+ elif self.attr['sub-type'] == 'binary' and 'exact-len' in self.checks:
+ return None # use arg_member()
else:
raise Exception(f"Sub-type {self.attr['sub-type']} not supported yet")
+ def arg_member(self, ri):
+ if self.sub_type == 'binary' and 'exact-len' in self.checks:
+ return [f'unsigned char (*{self.c_name})[{self.checks["exact-len"]}]',
+ f'unsigned int n_{self.c_name}']
+ return super().arg_member(ri)
+
def _attr_typol(self):
if self.attr['sub-type'] in scalars:
return f'.type = YNL_PT_U{c_upper(self.sub_type[1:])}, '
+ elif self.attr['sub-type'] == 'binary' and 'exact-len' in self.checks:
+ return f'.type = YNL_PT_BINARY, .len = {self.checks["exact-len"]}, '
else:
return f'.type = YNL_PT_NEST, .nest = &{self.nested_render_name}_nest, '
@@ -721,6 +768,26 @@ class TypeArrayNest(Type):
'}']
return get_lines, None, local_vars
+ def attr_put(self, ri, var):
+ ri.cw.p(f'array = ynl_attr_nest_start(nlh, {self.enum_name});')
+ if self.sub_type in scalars:
+ put_type = self.sub_type
+ ri.cw.block_start(line=f'for (i = 0; i < {var}->n_{self.c_name}; i++)')
+ ri.cw.p(f"ynl_attr_put_{put_type}(nlh, i, {var}->{self.c_name}[i]);")
+ ri.cw.block_end()
+ elif self.sub_type == 'binary' and 'exact-len' in self.checks:
+ ri.cw.p(f'for (i = 0; i < {var}->n_{self.c_name}; i++)')
+ ri.cw.p(f"ynl_attr_put(nlh, i, {var}->{self.c_name}[i], {self.checks['exact-len']});")
+ else:
+ raise Exception(f"Put for ArrayNest sub-type {self.attr['sub-type']} not supported, yet")
+ ri.cw.p('ynl_attr_nest_end(nlh, array);')
+
+ def _setter_lines(self, ri, member, presence):
+ # For multi-attr we have a count, not presence, hack up the presence
+ presence = presence[:-(len('_present.') + len(self.c_name))] + "n_" + self.c_name
+ return [f"{member} = {self.c_name};",
+ f"{presence} = n_{self.c_name};"]
+
class TypeNestTypeValue(Type):
def _complex_member_type(self, ri):
@@ -809,6 +876,12 @@ class Struct:
raise Exception("Inheriting different members not supported")
self.inherited = [c_lower(x) for x in sorted(self._inherited)]
+ def free_needs_iter(self):
+ for _, attr in self.attr_list:
+ if attr.free_needs_iter():
+ return True
+ return False
+
class EnumEntry(SpecEnumEntry):
def __init__(self, enum_set, yaml, prev, value_start):
@@ -862,7 +935,7 @@ class EnumSet(SpecEnumSet):
high = max([x.value for x in self.entries.values()])
if high - low + 1 != len(self.entries):
- raise Exception("Can't get value range for a noncontiguous enum")
+ return None, None
return low, high
@@ -915,7 +988,7 @@ class AttrSet(SpecAttrSet):
elif elem['type'] == 'nest':
t = TypeNest(self.family, self, elem, value)
elif elem['type'] == 'indexed-array' and 'sub-type' in elem:
- if elem["sub-type"] in ['nest', 'u32']:
+ if elem["sub-type"] in ['binary', 'nest', 'u32']:
t = TypeArrayNest(self.family, self, elem, value)
else:
raise Exception(f'new_attr: unsupported sub-type {elem["sub-type"]}')
@@ -932,6 +1005,14 @@ class AttrSet(SpecAttrSet):
class Operation(SpecOperation):
def __init__(self, family, yaml, req_value, rsp_value):
+ # Fill in missing operation properties (for fixed hdr-only msgs)
+ for mode in ['do', 'dump', 'event']:
+ for direction in ['request', 'reply']:
+ try:
+ yaml[mode][direction].setdefault('attributes', [])
+ except KeyError:
+ pass
+
super().__init__(family, yaml, req_value, rsp_value)
self.render_name = c_lower(family.ident_name + '_' + self.name)
@@ -993,9 +1074,6 @@ class Family(SpecFamily):
def resolve(self):
self.resolve_up(super())
- if self.yaml.get('protocol', 'genetlink') not in {'genetlink', 'genetlink-c', 'genetlink-legacy'}:
- raise Exception("Codegen only supported for genetlink")
-
self.c_name = c_lower(self.ident_name)
if 'name-prefix' in self.yaml['operations']:
self.op_prefix = c_upper(self.yaml['operations']['name-prefix'])
@@ -1018,7 +1096,7 @@ class Family(SpecFamily):
# dict space-name -> 'request': set(attrs), 'reply': set(attrs)
self.root_sets = dict()
- # dict space-name -> set('request', 'reply')
+ # dict space-name -> Struct
self.pure_nested_structs = dict()
self._mark_notify()
@@ -1042,6 +1120,9 @@ class Family(SpecFamily):
def new_operation(self, elem, req_value, rsp_value):
return Operation(self, elem, req_value, rsp_value)
+ def is_classic(self):
+ return self.proto == 'netlink-raw'
+
def _mark_notify(self):
for op in self.msgs.values():
if 'notify' in op:
@@ -1235,11 +1316,19 @@ class RenderInfo:
self.op = op
self.fixed_hdr = None
+ self.fixed_hdr_len = 'ys->family->hdr_len'
if op and op.fixed_header:
self.fixed_hdr = 'struct ' + c_lower(op.fixed_header)
+ if op.fixed_header != family.fixed_header:
+ if family.is_classic():
+ self.fixed_hdr_len = f"sizeof({self.fixed_hdr})"
+ else:
+ raise Exception(f"Per-op fixed header not supported, yet")
+
# 'do' and 'dump' response parsing is identical
self.type_consistent = True
+ self.type_oneside = False
if op_mode != 'do' and 'dump' in op:
if 'do' in op:
if ('reply' in op['do']) != ('reply' in op["dump"]):
@@ -1247,7 +1336,8 @@ class RenderInfo:
elif 'reply' in op['do'] and op["do"]["reply"] != op["dump"]["reply"]:
self.type_consistent = False
else:
- self.type_consistent = False
+ self.type_consistent = True
+ self.type_oneside = True
self.attr_set = attr_set
if not self.attr_set:
@@ -1265,7 +1355,7 @@ class RenderInfo:
self.struct = dict()
if op_mode == 'notify':
- op_mode = 'do'
+ op_mode = 'do' if 'do' in op else 'dump'
for op_dir in ['request', 'reply']:
if op:
type_list = []
@@ -1275,6 +1365,12 @@ class RenderInfo:
if op_mode == 'event':
self.struct['reply'] = Struct(family, self.attr_set, type_list=op['event']['attributes'])
+ def type_empty(self, key):
+ return len(self.struct[key].attr_list) == 0 and self.fixed_hdr is None
+
+ def needs_nlflags(self, direction):
+ return self.op_mode == 'do' and direction == 'request' and self.family.is_classic()
+
class CodeWriter:
def __init__(self, nlib, out_file=None, overwrite=True):
@@ -1541,7 +1637,9 @@ def op_prefix(ri, direction, deref=False):
suffix += f"{direction_to_suffix[direction]}"
else:
if direction == 'request':
- suffix += '_req_dump'
+ suffix += '_req'
+ if not ri.type_oneside:
+ suffix += '_dump'
else:
if ri.type_consistent:
if deref:
@@ -1678,10 +1776,15 @@ def put_req_nested(ri, struct):
local_vars.append('struct nlattr *nest;')
init_lines.append("nest = ynl_attr_nest_start(nlh, attr_type);")
+ has_anest = False
+ has_count = False
for _, arg in struct.member_list():
- if arg.presence_type() == 'count':
- local_vars.append('unsigned int i;')
- break
+ has_anest |= arg.type == 'indexed-array'
+ has_count |= arg.presence_type() == 'count'
+ if has_anest:
+ local_vars.append('struct nlattr *array;')
+ if has_count:
+ local_vars.append('unsigned int i;')
put_req_nested_prototype(ri, struct, suffix='')
ri.cw.block_start()
@@ -1708,13 +1811,18 @@ def _multi_parse(ri, struct, init_lines, local_vars):
if ri.fixed_hdr:
local_vars += ['void *hdr;']
iter_line = "ynl_attr_for_each(attr, nlh, yarg->ys->family->hdr_len)"
+ if ri.op.fixed_header != ri.family.fixed_header:
+ if ri.family.is_classic():
+ iter_line = f"ynl_attr_for_each(attr, nlh, sizeof({ri.fixed_hdr}))"
+ else:
+ raise Exception(f"Per-op fixed header not supported, yet")
array_nests = set()
multi_attrs = set()
needs_parg = False
for arg, aspec in struct.member_list():
if aspec['type'] == 'indexed-array' and 'sub-type' in aspec:
- if aspec["sub-type"] == 'nest':
+ if aspec["sub-type"] in {'binary', 'nest'}:
local_vars.append(f'const struct nlattr *attr_{aspec.c_name};')
array_nests.add(arg)
elif aspec['sub-type'] in scalars:
@@ -1747,7 +1855,10 @@ def _multi_parse(ri, struct, init_lines, local_vars):
ri.cw.p(f'dst->{arg} = {arg};')
if ri.fixed_hdr:
- ri.cw.p('hdr = ynl_nlmsg_data_offset(nlh, sizeof(struct genlmsghdr));')
+ if ri.family.is_classic():
+ ri.cw.p('hdr = ynl_nlmsg_data(nlh);')
+ else:
+ ri.cw.p('hdr = ynl_nlmsg_data_offset(nlh, sizeof(struct genlmsghdr));')
ri.cw.p(f"memcpy(&dst->_hdr, hdr, sizeof({ri.fixed_hdr}));")
for anest in sorted(all_multi):
aspec = struct[anest]
@@ -1784,6 +1895,9 @@ def _multi_parse(ri, struct, init_lines, local_vars):
ri.cw.p('return YNL_PARSE_CB_ERROR;')
elif aspec.sub_type in scalars:
ri.cw.p(f"dst->{aspec.c_name}[i] = ynl_attr_get_{aspec.sub_type}(attr);")
+ elif aspec.sub_type == 'binary' and 'exact-len' in aspec.checks:
+ # Length is validated by typol
+ ri.cw.p(f'memcpy(dst->{aspec.c_name}[i], ynl_attr_data(attr), {aspec.checks["exact-len"]});')
else:
raise Exception(f"Nest parsing type not supported in {aspec['name']}")
ri.cw.p('i++;')
@@ -1807,8 +1921,22 @@ def _multi_parse(ri, struct, init_lines, local_vars):
ri.cw.p('return YNL_PARSE_CB_ERROR;')
elif aspec.type in scalars:
ri.cw.p(f"dst->{aspec.c_name}[i] = ynl_attr_get_{aspec.type}(attr);")
+ elif aspec.type == 'binary' and 'struct' in aspec:
+ ri.cw.p('size_t len = ynl_attr_data_len(attr);')
+ ri.cw.nl()
+ ri.cw.p(f'if (len > sizeof(dst->{aspec.c_name}[0]))')
+ ri.cw.p(f'len = sizeof(dst->{aspec.c_name}[0]);')
+ ri.cw.p(f"memcpy(&dst->{aspec.c_name}[i], ynl_attr_data(attr), len);")
+ elif aspec.type == 'string':
+ ri.cw.p('unsigned int len;')
+ ri.cw.nl()
+ ri.cw.p('len = strnlen(ynl_attr_get_str(attr), ynl_attr_data_len(attr));')
+ ri.cw.p(f'dst->{aspec.c_name}[i] = malloc(sizeof(struct ynl_string) + len + 1);')
+ ri.cw.p(f"dst->{aspec.c_name}[i]->len = len;")
+ ri.cw.p(f"memcpy(dst->{aspec.c_name}[i]->str, ynl_attr_get_str(attr), len);")
+ ri.cw.p(f"dst->{aspec.c_name}[i]->str[len] = 0;")
else:
- raise Exception('Nest parsing type not supported yet')
+ raise Exception(f'Nest parsing of type {aspec.type} not supported yet')
ri.cw.p('i++;')
ri.cw.block_end()
ri.cw.block_end()
@@ -1899,9 +2027,13 @@ def print_req(ri):
ri.cw.block_start()
ri.cw.write_func_lvar(local_vars)
- ri.cw.p(f"nlh = ynl_gemsg_start_req(ys, {ri.nl.get_family_id()}, {ri.op.enum_name}, 1);")
+ if ri.family.is_classic():
+ ri.cw.p(f"nlh = ynl_msg_start_req(ys, {ri.op.enum_name}, req->_nlmsg_flags);")
+ else:
+ ri.cw.p(f"nlh = ynl_gemsg_start_req(ys, {ri.nl.get_family_id()}, {ri.op.enum_name}, 1);")
ri.cw.p(f"ys->req_policy = &{ri.struct['request'].render_name}_nest;")
+ ri.cw.p(f"ys->req_hdr_len = {ri.fixed_hdr_len};")
if 'reply' in ri.op[ri.op_mode]:
ri.cw.p(f"yrs.yarg.rsp_policy = &{ri.struct['reply'].render_name}_nest;")
ri.cw.nl()
@@ -1968,7 +2100,10 @@ def print_dump(ri):
else:
ri.cw.p(f'yds.rsp_cmd = {ri.op.rsp_value};')
ri.cw.nl()
- ri.cw.p(f"nlh = ynl_gemsg_start_dump(ys, {ri.nl.get_family_id()}, {ri.op.enum_name}, 1);")
+ if ri.family.is_classic():
+ ri.cw.p(f"nlh = ynl_msg_start_dump(ys, {ri.op.enum_name});")
+ else:
+ ri.cw.p(f"nlh = ynl_gemsg_start_dump(ys, {ri.nl.get_family_id()}, {ri.op.enum_name}, 1);")
if ri.fixed_hdr:
ri.cw.p("hdr_len = sizeof(req->_hdr);")
@@ -1978,6 +2113,7 @@ def print_dump(ri):
if "request" in ri.op[ri.op_mode]:
ri.cw.p(f"ys->req_policy = &{ri.struct['request'].render_name}_nest;")
+ ri.cw.p(f"ys->req_hdr_len = {ri.fixed_hdr_len};")
ri.cw.nl()
for _, attr in ri.struct["request"].member_list():
attr.attr_put(ri, "req")
@@ -2023,16 +2159,29 @@ def print_free_prototype(ri, direction, suffix=';'):
ri.cw.write_func_prot('void', f"{name}_free", [f"struct {struct_name} *{arg}"], suffix=suffix)
+def print_nlflags_set(ri, direction):
+ name = op_prefix(ri, direction)
+ ri.cw.write_func_prot(f'static inline void', f"{name}_set_nlflags",
+ [f"struct {name} *req", "__u16 nl_flags"])
+ ri.cw.block_start()
+ ri.cw.p('req->_nlmsg_flags = nl_flags;')
+ ri.cw.block_end()
+ ri.cw.nl()
+
+
def _print_type(ri, direction, struct):
suffix = f'_{ri.type_name}{direction_to_suffix[direction]}'
if not direction and ri.type_name_conflict:
suffix += '_'
- if ri.op_mode == 'dump':
+ if ri.op_mode == 'dump' and not ri.type_oneside:
suffix += '_dump'
ri.cw.block_start(line=f"struct {ri.family.c_name}{suffix}")
+ if ri.needs_nlflags(direction):
+ ri.cw.p('__u16 _nlmsg_flags;')
+ ri.cw.nl()
if ri.fixed_hdr:
ri.cw.p(ri.fixed_hdr + ' _hdr;')
ri.cw.nl()
@@ -2072,6 +2221,9 @@ def print_type_helpers(ri, direction, deref=False):
print_free_prototype(ri, direction)
ri.cw.nl()
+ if ri.needs_nlflags(direction):
+ print_nlflags_set(ri, direction)
+
if ri.ku_space == 'user' and direction == 'request':
for _, attr in ri.struct[direction].member_list():
attr.setter(ri, ri.attr_set, direction, deref=deref)
@@ -2079,7 +2231,7 @@ def print_type_helpers(ri, direction, deref=False):
def print_req_type_helpers(ri):
- if len(ri.struct["request"].attr_list) == 0:
+ if ri.type_empty("request"):
return
print_alloc_wrapper(ri, "request")
print_type_helpers(ri, "request")
@@ -2102,7 +2254,7 @@ def print_parse_prototype(ri, direction, terminate=True):
def print_req_type(ri):
- if len(ri.struct["request"].attr_list) == 0:
+ if ri.type_empty("request"):
return
print_type(ri, "request")
@@ -2140,11 +2292,9 @@ def print_wrapped_type(ri):
def _free_type_members_iter(ri, struct):
- for _, attr in struct.member_list():
- if attr.free_needs_iter():
- ri.cw.p('unsigned int i;')
- ri.cw.nl()
- break
+ if struct.free_needs_iter():
+ ri.cw.p('unsigned int i;')
+ ri.cw.nl()
def _free_type_members(ri, var, struct, ref=''):
@@ -2281,6 +2431,46 @@ def print_kernel_policy_ranges(family, cw):
cw.nl()
+def print_kernel_policy_sparse_enum_validates(family, cw):
+ first = True
+ for _, attr_set in family.attr_sets.items():
+ if attr_set.subset_of:
+ continue
+
+ for _, attr in attr_set.items():
+ if not attr.request:
+ continue
+ if not attr.enum_name:
+ continue
+ if 'sparse' not in attr.checks:
+ continue
+
+ if first:
+ cw.p('/* Sparse enums validation callbacks */')
+ first = False
+
+ sign = '' if attr.type[0] == 'u' else '_signed'
+ suffix = 'ULL' if attr.type[0] == 'u' else 'LL'
+ cw.write_func_prot('static int', f'{c_lower(attr.enum_name)}_validate',
+ ['const struct nlattr *attr', 'struct netlink_ext_ack *extack'])
+ cw.block_start()
+ cw.block_start(line=f'switch (nla_get_{attr["type"]}(attr))')
+ enum = family.consts[attr['enum']]
+ first_entry = True
+ for entry in enum.entries.values():
+ if first_entry:
+ first_entry = False
+ else:
+ cw.p('fallthrough;')
+ cw.p(f'case {entry.c_name}:')
+ cw.p('return 0;')
+ cw.block_end()
+ cw.p('NL_SET_ERR_MSG_ATTR(extack, attr, "invalid enum value");')
+ cw.p('return -EINVAL;')
+ cw.block_end()
+ cw.nl()
+
+
def print_kernel_op_table_fwd(family, cw, terminate):
exported = not kernel_can_gen_family_struct(family)
@@ -2740,7 +2930,11 @@ def render_uapi(family, cw):
def _render_user_ntf_entry(ri, op):
- ri.cw.block_start(line=f"[{op.enum_name}] = ")
+ if not ri.family.is_classic():
+ ri.cw.block_start(line=f"[{op.enum_name}] = ")
+ else:
+ crud_op = ri.family.req_by_value[op.rsp_value]
+ ri.cw.block_start(line=f"[{crud_op.enum_name}] = ")
ri.cw.p(f".alloc_sz\t= sizeof({type_name(ri, 'event')}),")
ri.cw.p(f".cb\t\t= {op_prefix(ri, 'reply', deref=True)}_parse,")
ri.cw.p(f".policy\t\t= &{ri.struct['reply'].render_name}_nest,")
@@ -2755,7 +2949,7 @@ def render_user_family(family, cw, prototype):
return
if family.ntfs:
- cw.block_start(line=f"static const struct ynl_ntf_info {family['name']}_ntf_info[] = ")
+ cw.block_start(line=f"static const struct ynl_ntf_info {family.c_name}_ntf_info[] = ")
for ntf_op_name, ntf_op in family.ntfs.items():
if 'notify' in ntf_op:
op = family.ops[ntf_op['notify']]
@@ -2775,13 +2969,19 @@ def render_user_family(family, cw, prototype):
cw.block_start(f'{symbol} = ')
cw.p(f'.name\t\t= "{family.c_name}",')
- if family.fixed_header:
+ if family.is_classic():
+ cw.p(f'.is_classic\t= true,')
+ cw.p(f'.classic_id\t= {family.get("protonum")},')
+ if family.is_classic():
+ if family.fixed_header:
+ cw.p(f'.hdr_len\t= sizeof(struct {c_lower(family.fixed_header)}),')
+ elif family.fixed_header:
cw.p(f'.hdr_len\t= sizeof(struct genlmsghdr) + sizeof(struct {c_lower(family.fixed_header)}),')
else:
cw.p('.hdr_len\t= sizeof(struct genlmsghdr),')
if family.ntfs:
- cw.p(f".ntf_info\t= {family['name']}_ntf_info,")
- cw.p(f".ntf_info_size\t= YNL_ARRAY_SIZE({family['name']}_ntf_info),")
+ cw.p(f".ntf_info\t= {family.c_name}_ntf_info,")
+ cw.p(f".ntf_info_size\t= YNL_ARRAY_SIZE({family.c_name}_ntf_info),")
cw.block_end(line=';')
@@ -2888,7 +3088,7 @@ def main():
cw.p(f'#include "{hdr_file}"')
cw.p('#include "ynl.h"')
headers = []
- for definition in parsed['definitions']:
+ for definition in parsed['definitions'] + parsed['attribute-sets']:
if 'header' in definition:
headers.append(definition['header'])
if args.mode == 'user':
@@ -2942,6 +3142,7 @@ def main():
print_kernel_family_struct_hdr(parsed, cw)
else:
print_kernel_policy_ranges(parsed, cw)
+ print_kernel_policy_sparse_enum_validates(parsed, cw)
for _, struct in sorted(parsed.pure_nested_structs.items()):
if struct.request:
@@ -3010,7 +3211,7 @@ def main():
ri = RenderInfo(cw, parsed, args.mode, op, 'dump')
print_req_type(ri)
print_req_type_helpers(ri)
- if not ri.type_consistent:
+ if not ri.type_consistent or ri.type_oneside:
print_rsp_type(ri)
print_wrapped_type(ri)
print_dump_prototype(ri)
@@ -3088,7 +3289,7 @@ def main():
if 'dump' in op:
cw.p(f"/* {op.enum_name} - dump */")
ri = RenderInfo(cw, parsed, args.mode, op, "dump")
- if not ri.type_consistent:
+ if not ri.type_consistent or ri.type_oneside:
parse_rsp_msg(ri, deref=True)
print_req_free(ri)
print_dump_type_free(ri)
diff --git a/tools/net/ynl/pyynl/ynl_gen_rst.py b/tools/net/ynl/pyynl/ynl_gen_rst.py
index 6c56d0d726b4..0cb6348e28d3 100755
--- a/tools/net/ynl/pyynl/ynl_gen_rst.py
+++ b/tools/net/ynl/pyynl/ynl_gen_rst.py
@@ -392,7 +392,7 @@ def parse_arguments() -> argparse.Namespace:
def parse_yaml_file(filename: str) -> str:
- """Transform the YAML specified by filename into a rst-formmated string"""
+ """Transform the YAML specified by filename into an RST-formatted string"""
with open(filename, "r", encoding="utf-8") as spec_file:
yaml_data = yaml.safe_load(spec_file)
content = parse_yaml(yaml_data)
diff --git a/tools/net/ynl/samples/.gitignore b/tools/net/ynl/samples/.gitignore
index dda6686257a7..7f9781cf532f 100644
--- a/tools/net/ynl/samples/.gitignore
+++ b/tools/net/ynl/samples/.gitignore
@@ -2,4 +2,6 @@ ethtool
devlink
netdev
ovs
-page-pool \ No newline at end of file
+page-pool
+rt-addr
+rt-route
diff --git a/tools/net/ynl/samples/rt-addr.c b/tools/net/ynl/samples/rt-addr.c
new file mode 100644
index 000000000000..0f4851b4ec57
--- /dev/null
+++ b/tools/net/ynl/samples/rt-addr.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <string.h>
+
+#include <ynl.h>
+
+#include <arpa/inet.h>
+#include <net/if.h>
+
+#include "rt-addr-user.h"
+
+static void rt_addr_print(struct rt_addr_getaddr_rsp *a)
+{
+ char ifname[IF_NAMESIZE];
+ char addr_str[64];
+ const char *addr;
+ const char *name;
+
+ name = if_indextoname(a->_hdr.ifa_index, ifname);
+ if (name)
+ printf("%16s: ", name);
+
+ switch (a->_present.address_len) {
+ case 4:
+ addr = inet_ntop(AF_INET, a->address,
+ addr_str, sizeof(addr_str));
+ break;
+ case 16:
+ addr = inet_ntop(AF_INET6, a->address,
+ addr_str, sizeof(addr_str));
+ break;
+ default:
+ addr = NULL;
+ break;
+ }
+ if (addr)
+ printf("%s", addr);
+ else
+ printf("[%d]", a->_present.address_len);
+
+ printf("\n");
+}
+
+int main(int argc, char **argv)
+{
+ struct rt_addr_getaddr_list *rsp;
+ struct rt_addr_getaddr_req *req;
+ struct ynl_error yerr;
+ struct ynl_sock *ys;
+
+ ys = ynl_sock_create(&ynl_rt_addr_family, &yerr);
+ if (!ys) {
+ fprintf(stderr, "YNL: %s\n", yerr.msg);
+ return 1;
+ }
+
+ req = rt_addr_getaddr_req_alloc();
+ if (!req)
+ goto err_destroy;
+
+ rsp = rt_addr_getaddr_dump(ys, req);
+ rt_addr_getaddr_req_free(req);
+ if (!rsp)
+ goto err_close;
+
+ if (ynl_dump_empty(rsp))
+ fprintf(stderr, "Error: no addresses reported\n");
+ ynl_dump_foreach(rsp, addr)
+ rt_addr_print(addr);
+ rt_addr_getaddr_list_free(rsp);
+
+ ynl_sock_destroy(ys);
+ return 0;
+
+err_close:
+ fprintf(stderr, "YNL: %s\n", ys->err.msg);
+err_destroy:
+ ynl_sock_destroy(ys);
+ return 2;
+}
diff --git a/tools/net/ynl/samples/rt-route.c b/tools/net/ynl/samples/rt-route.c
new file mode 100644
index 000000000000..9d9c868f8873
--- /dev/null
+++ b/tools/net/ynl/samples/rt-route.c
@@ -0,0 +1,80 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <stdio.h>
+#include <string.h>
+
+#include <ynl.h>
+
+#include <arpa/inet.h>
+#include <net/if.h>
+
+#include "rt-route-user.h"
+
+static void rt_route_print(struct rt_route_getroute_rsp *r)
+{
+ char ifname[IF_NAMESIZE];
+ char route_str[64];
+ const char *route;
+ const char *name;
+
+ /* Ignore local */
+ if (r->_hdr.rtm_table == RT_TABLE_LOCAL)
+ return;
+
+ if (r->_present.oif) {
+ name = if_indextoname(r->oif, ifname);
+ if (name)
+ printf("oif: %-16s ", name);
+ }
+
+ if (r->_present.dst_len) {
+ route = inet_ntop(r->_hdr.rtm_family, r->dst,
+ route_str, sizeof(route_str));
+ printf("dst: %s/%d", route, r->_hdr.rtm_dst_len);
+ }
+
+ if (r->_present.gateway_len) {
+ route = inet_ntop(r->_hdr.rtm_family, r->gateway,
+ route_str, sizeof(route_str));
+ printf("gateway: %s ", route);
+ }
+
+ printf("\n");
+}
+
+int main(int argc, char **argv)
+{
+ struct rt_route_getroute_req_dump *req;
+ struct rt_route_getroute_list *rsp;
+ struct ynl_error yerr;
+ struct ynl_sock *ys;
+
+ ys = ynl_sock_create(&ynl_rt_route_family, &yerr);
+ if (!ys) {
+ fprintf(stderr, "YNL: %s\n", yerr.msg);
+ return 1;
+ }
+
+ req = rt_route_getroute_req_dump_alloc();
+ if (!req)
+ goto err_destroy;
+
+ rsp = rt_route_getroute_dump(ys, req);
+ rt_route_getroute_req_dump_free(req);
+ if (!rsp)
+ goto err_close;
+
+ if (ynl_dump_empty(rsp))
+ fprintf(stderr, "Error: no routeesses reported\n");
+ ynl_dump_foreach(rsp, route)
+ rt_route_print(route);
+ rt_route_getroute_list_free(rsp);
+
+ ynl_sock_destroy(ys);
+ return 0;
+
+err_close:
+ fprintf(stderr, "YNL: %s\n", ys->err.msg);
+err_destroy:
+ ynl_sock_destroy(ys);
+ return 2;
+}
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index b649049b6a11..3a411064fa34 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -225,6 +225,7 @@ static bool is_rust_noreturn(const struct symbol *func)
str_ends_with(func->name, "_4core9panicking14panic_nounwind") ||
str_ends_with(func->name, "_4core9panicking18panic_bounds_check") ||
str_ends_with(func->name, "_4core9panicking19assert_failed_inner") ||
+ str_ends_with(func->name, "_4core9panicking30panic_null_pointer_dereference") ||
str_ends_with(func->name, "_4core9panicking36panic_misaligned_pointer_dereference") ||
strstr(func->name, "_4core9panicking13assert_failed") ||
strstr(func->name, "_4core9panicking11panic_const24panic_const_") ||
diff --git a/tools/sched_ext/scx_flatcg.bpf.c b/tools/sched_ext/scx_flatcg.bpf.c
index 2c720e3ecad5..fdc7170639e6 100644
--- a/tools/sched_ext/scx_flatcg.bpf.c
+++ b/tools/sched_ext/scx_flatcg.bpf.c
@@ -950,5 +950,5 @@ SCX_OPS_DEFINE(flatcg_ops,
.cgroup_move = (void *)fcg_cgroup_move,
.init = (void *)fcg_init,
.exit = (void *)fcg_exit,
- .flags = SCX_OPS_HAS_CGROUP_WEIGHT | SCX_OPS_ENQ_EXITING,
+ .flags = SCX_OPS_ENQ_EXITING,
.name = "flatcg");
diff --git a/tools/testing/cxl/test/mem.c b/tools/testing/cxl/test/mem.c
index f2957a3e36fe..bf9caa908f89 100644
--- a/tools/testing/cxl/test/mem.c
+++ b/tools/testing/cxl/test/mem.c
@@ -1780,7 +1780,7 @@ static int cxl_mock_mem_probe(struct platform_device *pdev)
if (rc)
return rc;
- rc = devm_cxl_setup_fwctl(cxlmd);
+ rc = devm_cxl_setup_fwctl(&pdev->dev, cxlmd);
if (rc)
dev_dbg(dev, "No CXL FWCTL setup\n");
diff --git a/tools/testing/kunit/configs/all_tests.config b/tools/testing/kunit/configs/all_tests.config
index cdd9782f9646..7bb885b0c32d 100644
--- a/tools/testing/kunit/configs/all_tests.config
+++ b/tools/testing/kunit/configs/all_tests.config
@@ -43,6 +43,8 @@ CONFIG_REGMAP_BUILD=y
CONFIG_AUDIT=y
+CONFIG_PRIME_NUMBERS=y
+
CONFIG_SECURITY=y
CONFIG_SECURITY_APPARMOR=y
CONFIG_SECURITY_LANDLOCK=y
diff --git a/tools/testing/kunit/qemu_configs/sh.py b/tools/testing/kunit/qemu_configs/sh.py
index 78a474a5b95f..f00cb89fdef6 100644
--- a/tools/testing/kunit/qemu_configs/sh.py
+++ b/tools/testing/kunit/qemu_configs/sh.py
@@ -7,7 +7,9 @@ CONFIG_CPU_SUBTYPE_SH7751R=y
CONFIG_MEMORY_START=0x0c000000
CONFIG_SH_RTS7751R2D=y
CONFIG_RTS7751R2D_PLUS=y
-CONFIG_SERIAL_SH_SCI=y''',
+CONFIG_SERIAL_SH_SCI=y
+CONFIG_CMDLINE_EXTEND=y
+''',
qemu_arch='sh4',
kernel_path='arch/sh/boot/zImage',
kernel_command_line='console=ttySC1',
diff --git a/tools/testing/memblock/tests/basic_api.c b/tools/testing/memblock/tests/basic_api.c
index 67503089e6a0..01e836fba488 100644
--- a/tools/testing/memblock/tests/basic_api.c
+++ b/tools/testing/memblock/tests/basic_api.c
@@ -2434,6 +2434,107 @@ static int memblock_overlaps_region_checks(void)
return 0;
}
+#ifdef CONFIG_NUMA
+static int memblock_set_node_check(void)
+{
+ unsigned long i, max_reserved;
+ struct memblock_region *rgn;
+ void *orig_region;
+
+ PREFIX_PUSH();
+
+ reset_memblock_regions();
+ memblock_allow_resize();
+
+ dummy_physical_memory_init();
+ memblock_add(dummy_physical_memory_base(), MEM_SIZE);
+ orig_region = memblock.reserved.regions;
+
+ /* Equally Split range to node 0 and 1*/
+ memblock_set_node(memblock_start_of_DRAM(),
+ memblock_phys_mem_size() / 2, &memblock.memory, 0);
+ memblock_set_node(memblock_start_of_DRAM() + memblock_phys_mem_size() / 2,
+ memblock_phys_mem_size() / 2, &memblock.memory, 1);
+
+ ASSERT_EQ(memblock.memory.cnt, 2);
+ rgn = &memblock.memory.regions[0];
+ ASSERT_EQ(rgn->base, memblock_start_of_DRAM());
+ ASSERT_EQ(rgn->size, memblock_phys_mem_size() / 2);
+ ASSERT_EQ(memblock_get_region_node(rgn), 0);
+ rgn = &memblock.memory.regions[1];
+ ASSERT_EQ(rgn->base, memblock_start_of_DRAM() + memblock_phys_mem_size() / 2);
+ ASSERT_EQ(rgn->size, memblock_phys_mem_size() / 2);
+ ASSERT_EQ(memblock_get_region_node(rgn), 1);
+
+ /* Reserve 126 regions with the last one across node boundary */
+ for (i = 0; i < 125; i++)
+ memblock_reserve(memblock_start_of_DRAM() + SZ_16 * i, SZ_8);
+
+ memblock_reserve(memblock_start_of_DRAM() + memblock_phys_mem_size() / 2 - SZ_8,
+ SZ_16);
+
+ /*
+ * Commit 61167ad5fecd ("mm: pass nid to reserve_bootmem_region()")
+ * do following process to set nid to each memblock.reserved region.
+ * But it may miss some region if memblock_set_node() double the
+ * array.
+ *
+ * By checking 'max', we make sure all region nid is set properly.
+ */
+repeat:
+ max_reserved = memblock.reserved.max;
+ for_each_mem_region(rgn) {
+ int nid = memblock_get_region_node(rgn);
+
+ memblock_set_node(rgn->base, rgn->size, &memblock.reserved, nid);
+ }
+ if (max_reserved != memblock.reserved.max)
+ goto repeat;
+
+ /* Confirm each region has valid node set */
+ for_each_reserved_mem_region(rgn) {
+ ASSERT_TRUE(numa_valid_node(memblock_get_region_node(rgn)));
+ if (rgn == (memblock.reserved.regions + memblock.reserved.cnt - 1))
+ ASSERT_EQ(1, memblock_get_region_node(rgn));
+ else
+ ASSERT_EQ(0, memblock_get_region_node(rgn));
+ }
+
+ dummy_physical_memory_cleanup();
+
+ /*
+ * The current reserved.regions is occupying a range of memory that
+ * allocated from dummy_physical_memory_init(). After free the memory,
+ * we must not use it. So restore the origin memory region to make sure
+ * the tests can run as normal and not affected by the double array.
+ */
+ memblock.reserved.regions = orig_region;
+ memblock.reserved.cnt = INIT_MEMBLOCK_RESERVED_REGIONS;
+
+ test_pass_pop();
+
+ return 0;
+}
+
+static int memblock_set_node_checks(void)
+{
+ prefix_reset();
+ prefix_push("memblock_set_node");
+ test_print("Running memblock_set_node tests...\n");
+
+ memblock_set_node_check();
+
+ prefix_pop();
+
+ return 0;
+}
+#else
+static int memblock_set_node_checks(void)
+{
+ return 0;
+}
+#endif
+
int memblock_basic_checks(void)
{
memblock_initialization_check();
@@ -2444,6 +2545,7 @@ int memblock_basic_checks(void)
memblock_bottom_up_checks();
memblock_trim_memory_checks();
memblock_overlaps_region_checks();
+ memblock_set_node_checks();
return 0;
}
diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile
index c77c8c8e3d9b..61bb8bf1b507 100644
--- a/tools/testing/selftests/Makefile
+++ b/tools/testing/selftests/Makefile
@@ -71,6 +71,7 @@ TARGETS += net/hsr
TARGETS += net/mptcp
TARGETS += net/netfilter
TARGETS += net/openvswitch
+TARGETS += net/ovpn
TARGETS += net/packetdrill
TARGETS += net/rds
TARGETS += net/tcp_ao
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index c378d5d07e02..3201a962b3dc 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -71,8 +71,10 @@ CONFIG_NET_IPGRE=y
CONFIG_NET_IPGRE_DEMUX=y
CONFIG_NET_IPIP=y
CONFIG_NET_MPLS_GSO=y
+CONFIG_NET_SCH_BPF=y
CONFIG_NET_SCH_FQ=y
CONFIG_NET_SCH_INGRESS=y
+CONFIG_NET_SCH_HTB=y
CONFIG_NET_SCHED=y
CONFIG_NETDEVSIM=y
CONFIG_NETFILTER=y
diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c b/tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c
new file mode 100644
index 000000000000..730357cd0c9a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_qdisc.c
@@ -0,0 +1,231 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/pkt_sched.h>
+#include <linux/rtnetlink.h>
+#include <test_progs.h>
+
+#include "network_helpers.h"
+#include "bpf_qdisc_fifo.skel.h"
+#include "bpf_qdisc_fq.skel.h"
+#include "bpf_qdisc_fail__incompl_ops.skel.h"
+
+#define LO_IFINDEX 1
+
+static const unsigned int total_bytes = 10 * 1024 * 1024;
+
+static void do_test(char *qdisc)
+{
+ DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = LO_IFINDEX,
+ .attach_point = BPF_TC_QDISC,
+ .parent = TC_H_ROOT,
+ .handle = 0x8000000,
+ .qdisc = qdisc);
+ int srv_fd = -1, cli_fd = -1;
+ int err;
+
+ err = bpf_tc_hook_create(&hook);
+ if (!ASSERT_OK(err, "attach qdisc"))
+ return;
+
+ srv_fd = start_server(AF_INET6, SOCK_STREAM, NULL, 0, 0);
+ if (!ASSERT_OK_FD(srv_fd, "start server"))
+ goto done;
+
+ cli_fd = connect_to_fd(srv_fd, 0);
+ if (!ASSERT_OK_FD(cli_fd, "connect to client"))
+ goto done;
+
+ err = send_recv_data(srv_fd, cli_fd, total_bytes);
+ ASSERT_OK(err, "send_recv_data");
+
+done:
+ if (srv_fd != -1)
+ close(srv_fd);
+ if (cli_fd != -1)
+ close(cli_fd);
+
+ bpf_tc_hook_destroy(&hook);
+}
+
+static void test_fifo(void)
+{
+ struct bpf_qdisc_fifo *fifo_skel;
+
+ fifo_skel = bpf_qdisc_fifo__open_and_load();
+ if (!ASSERT_OK_PTR(fifo_skel, "bpf_qdisc_fifo__open_and_load"))
+ return;
+
+ if (!ASSERT_OK(bpf_qdisc_fifo__attach(fifo_skel), "bpf_qdisc_fifo__attach"))
+ goto out;
+
+ do_test("bpf_fifo");
+out:
+ bpf_qdisc_fifo__destroy(fifo_skel);
+}
+
+static void test_fq(void)
+{
+ struct bpf_qdisc_fq *fq_skel;
+
+ fq_skel = bpf_qdisc_fq__open_and_load();
+ if (!ASSERT_OK_PTR(fq_skel, "bpf_qdisc_fq__open_and_load"))
+ return;
+
+ if (!ASSERT_OK(bpf_qdisc_fq__attach(fq_skel), "bpf_qdisc_fq__attach"))
+ goto out;
+
+ do_test("bpf_fq");
+out:
+ bpf_qdisc_fq__destroy(fq_skel);
+}
+
+static void test_qdisc_attach_to_mq(void)
+{
+ DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook,
+ .attach_point = BPF_TC_QDISC,
+ .parent = TC_H_MAKE(1 << 16, 1),
+ .handle = 0x11 << 16,
+ .qdisc = "bpf_fifo");
+ struct bpf_qdisc_fifo *fifo_skel;
+ int err;
+
+ fifo_skel = bpf_qdisc_fifo__open_and_load();
+ if (!ASSERT_OK_PTR(fifo_skel, "bpf_qdisc_fifo__open_and_load"))
+ return;
+
+ if (!ASSERT_OK(bpf_qdisc_fifo__attach(fifo_skel), "bpf_qdisc_fifo__attach"))
+ goto out;
+
+ SYS(out, "ip link add veth0 type veth peer veth1");
+ hook.ifindex = if_nametoindex("veth0");
+ SYS(out, "tc qdisc add dev veth0 root handle 1: mq");
+
+ err = bpf_tc_hook_create(&hook);
+ ASSERT_OK(err, "attach qdisc");
+
+ bpf_tc_hook_destroy(&hook);
+
+ SYS(out, "tc qdisc delete dev veth0 root mq");
+out:
+ bpf_qdisc_fifo__destroy(fifo_skel);
+}
+
+static void test_qdisc_attach_to_non_root(void)
+{
+ DECLARE_LIBBPF_OPTS(bpf_tc_hook, hook, .ifindex = LO_IFINDEX,
+ .attach_point = BPF_TC_QDISC,
+ .parent = TC_H_MAKE(1 << 16, 1),
+ .handle = 0x11 << 16,
+ .qdisc = "bpf_fifo");
+ struct bpf_qdisc_fifo *fifo_skel;
+ int err;
+
+ fifo_skel = bpf_qdisc_fifo__open_and_load();
+ if (!ASSERT_OK_PTR(fifo_skel, "bpf_qdisc_fifo__open_and_load"))
+ return;
+
+ if (!ASSERT_OK(bpf_qdisc_fifo__attach(fifo_skel), "bpf_qdisc_fifo__attach"))
+ goto out;
+
+ SYS(out, "tc qdisc add dev lo root handle 1: htb");
+ SYS(out_del_htb, "tc class add dev lo parent 1: classid 1:1 htb rate 75Kbit");
+
+ err = bpf_tc_hook_create(&hook);
+ if (!ASSERT_ERR(err, "attach qdisc"))
+ bpf_tc_hook_destroy(&hook);
+
+out_del_htb:
+ SYS(out, "tc qdisc delete dev lo root htb");
+out:
+ bpf_qdisc_fifo__destroy(fifo_skel);
+}
+
+static void test_incompl_ops(void)
+{
+ struct bpf_qdisc_fail__incompl_ops *skel;
+ struct bpf_link *link;
+
+ skel = bpf_qdisc_fail__incompl_ops__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "bpf_qdisc_fifo__open_and_load"))
+ return;
+
+ link = bpf_map__attach_struct_ops(skel->maps.test);
+ if (!ASSERT_ERR_PTR(link, "bpf_map__attach_struct_ops"))
+ bpf_link__destroy(link);
+
+ bpf_qdisc_fail__incompl_ops__destroy(skel);
+}
+
+static int get_default_qdisc(char *qdisc_name)
+{
+ FILE *f;
+ int num;
+
+ f = fopen("/proc/sys/net/core/default_qdisc", "r");
+ if (!f)
+ return -errno;
+
+ num = fscanf(f, "%s", qdisc_name);
+ fclose(f);
+
+ return num == 1 ? 0 : -EFAULT;
+}
+
+static void test_default_qdisc_attach_to_mq(void)
+{
+ char default_qdisc[IFNAMSIZ] = {};
+ struct bpf_qdisc_fifo *fifo_skel;
+ struct netns_obj *netns = NULL;
+ int err;
+
+ fifo_skel = bpf_qdisc_fifo__open_and_load();
+ if (!ASSERT_OK_PTR(fifo_skel, "bpf_qdisc_fifo__open_and_load"))
+ return;
+
+ if (!ASSERT_OK(bpf_qdisc_fifo__attach(fifo_skel), "bpf_qdisc_fifo__attach"))
+ goto out;
+
+ err = get_default_qdisc(default_qdisc);
+ if (!ASSERT_OK(err, "read sysctl net.core.default_qdisc"))
+ goto out;
+
+ err = write_sysctl("/proc/sys/net/core/default_qdisc", "bpf_fifo");
+ if (!ASSERT_OK(err, "write sysctl net.core.default_qdisc"))
+ goto out;
+
+ netns = netns_new("bpf_qdisc_ns", true);
+ if (!ASSERT_OK_PTR(netns, "netns_new"))
+ goto out;
+
+ SYS(out, "ip link add veth0 type veth peer veth1");
+ SYS(out, "tc qdisc add dev veth0 root handle 1: mq");
+
+ ASSERT_EQ(fifo_skel->bss->init_called, true, "init_called");
+
+ SYS(out, "tc qdisc delete dev veth0 root mq");
+out:
+ netns_free(netns);
+ if (default_qdisc[0])
+ write_sysctl("/proc/sys/net/core/default_qdisc", default_qdisc);
+
+ bpf_qdisc_fifo__destroy(fifo_skel);
+}
+
+void test_ns_bpf_qdisc(void)
+{
+ if (test__start_subtest("fifo"))
+ test_fifo();
+ if (test__start_subtest("fq"))
+ test_fq();
+ if (test__start_subtest("attach to mq"))
+ test_qdisc_attach_to_mq();
+ if (test__start_subtest("attach to non root"))
+ test_qdisc_attach_to_non_root();
+ if (test__start_subtest("incompl_ops"))
+ test_incompl_ops();
+}
+
+void serial_test_bpf_qdisc_default(void)
+{
+ test_default_qdisc_attach_to_mq();
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/for_each.c b/tools/testing/selftests/bpf/prog_tests/for_each.c
index 09f6487f58b9..5fea3209566e 100644
--- a/tools/testing/selftests/bpf/prog_tests/for_each.c
+++ b/tools/testing/selftests/bpf/prog_tests/for_each.c
@@ -6,6 +6,7 @@
#include "for_each_array_map_elem.skel.h"
#include "for_each_map_elem_write_key.skel.h"
#include "for_each_multi_maps.skel.h"
+#include "for_each_hash_modify.skel.h"
static unsigned int duration;
@@ -203,6 +204,40 @@ out:
for_each_multi_maps__destroy(skel);
}
+static void test_hash_modify(void)
+{
+ struct for_each_hash_modify *skel;
+ int max_entries, i, err;
+ __u64 key, val;
+
+ LIBBPF_OPTS(bpf_test_run_opts, topts,
+ .data_in = &pkt_v4,
+ .data_size_in = sizeof(pkt_v4),
+ .repeat = 1
+ );
+
+ skel = for_each_hash_modify__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "for_each_hash_modify__open_and_load"))
+ return;
+
+ max_entries = bpf_map__max_entries(skel->maps.hashmap);
+ for (i = 0; i < max_entries; i++) {
+ key = i;
+ val = i;
+ err = bpf_map__update_elem(skel->maps.hashmap, &key, sizeof(key),
+ &val, sizeof(val), BPF_ANY);
+ if (!ASSERT_OK(err, "map_update"))
+ goto out;
+ }
+
+ err = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.test_pkt_access), &topts);
+ ASSERT_OK(err, "bpf_prog_test_run_opts");
+ ASSERT_OK(topts.retval, "retval");
+
+out:
+ for_each_hash_modify__destroy(skel);
+}
+
void test_for_each(void)
{
if (test__start_subtest("hash_map"))
@@ -213,4 +248,6 @@ void test_for_each(void)
test_write_map_key();
if (test__start_subtest("multi_maps"))
test_multi_maps();
+ if (test__start_subtest("hash_modify"))
+ test_hash_modify();
}
diff --git a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c
index d56e18b25528..a4517bee34d5 100644
--- a/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c
+++ b/tools/testing/selftests/bpf/prog_tests/sock_iter_batch.c
@@ -7,14 +7,433 @@
#define TEST_NS "sock_iter_batch_netns"
+static const int init_batch_size = 16;
static const int nr_soreuse = 4;
+struct iter_out {
+ int idx;
+ __u64 cookie;
+} __packed;
+
+struct sock_count {
+ __u64 cookie;
+ int count;
+};
+
+static int insert(__u64 cookie, struct sock_count counts[], int counts_len)
+{
+ int insert = -1;
+ int i = 0;
+
+ for (; i < counts_len; i++) {
+ if (!counts[i].cookie) {
+ insert = i;
+ } else if (counts[i].cookie == cookie) {
+ insert = i;
+ break;
+ }
+ }
+ if (insert < 0)
+ return insert;
+
+ counts[insert].cookie = cookie;
+ counts[insert].count++;
+
+ return counts[insert].count;
+}
+
+static int read_n(int iter_fd, int n, struct sock_count counts[],
+ int counts_len)
+{
+ struct iter_out out;
+ int nread = 1;
+ int i = 0;
+
+ for (; nread > 0 && (n < 0 || i < n); i++) {
+ nread = read(iter_fd, &out, sizeof(out));
+ if (!nread || !ASSERT_EQ(nread, sizeof(out), "nread"))
+ break;
+ ASSERT_GE(insert(out.cookie, counts, counts_len), 0, "insert");
+ }
+
+ ASSERT_TRUE(n < 0 || i == n, "n < 0 || i == n");
+
+ return i;
+}
+
+static __u64 socket_cookie(int fd)
+{
+ __u64 cookie;
+ socklen_t cookie_len = sizeof(cookie);
+
+ if (!ASSERT_OK(getsockopt(fd, SOL_SOCKET, SO_COOKIE, &cookie,
+ &cookie_len), "getsockopt(SO_COOKIE)"))
+ return 0;
+ return cookie;
+}
+
+static bool was_seen(int fd, struct sock_count counts[], int counts_len)
+{
+ __u64 cookie = socket_cookie(fd);
+ int i = 0;
+
+ for (; cookie && i < counts_len; i++)
+ if (cookie == counts[i].cookie)
+ return true;
+
+ return false;
+}
+
+static int get_seen_socket(int *fds, struct sock_count counts[], int n)
+{
+ int i = 0;
+
+ for (; i < n; i++)
+ if (was_seen(fds[i], counts, n))
+ return i;
+ return -1;
+}
+
+static int get_nth_socket(int *fds, int fds_len, struct bpf_link *link, int n)
+{
+ int i, nread, iter_fd;
+ int nth_sock_idx = -1;
+ struct iter_out out;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (!ASSERT_OK_FD(iter_fd, "bpf_iter_create"))
+ return -1;
+
+ for (; n >= 0; n--) {
+ nread = read(iter_fd, &out, sizeof(out));
+ if (!nread || !ASSERT_GE(nread, 1, "nread"))
+ goto done;
+ }
+
+ for (i = 0; i < fds_len && nth_sock_idx < 0; i++)
+ if (fds[i] >= 0 && socket_cookie(fds[i]) == out.cookie)
+ nth_sock_idx = i;
+done:
+ close(iter_fd);
+ return nth_sock_idx;
+}
+
+static int get_seen_count(int fd, struct sock_count counts[], int n)
+{
+ __u64 cookie = socket_cookie(fd);
+ int count = 0;
+ int i = 0;
+
+ for (; cookie && !count && i < n; i++)
+ if (cookie == counts[i].cookie)
+ count = counts[i].count;
+
+ return count;
+}
+
+static void check_n_were_seen_once(int *fds, int fds_len, int n,
+ struct sock_count counts[], int counts_len)
+{
+ int seen_once = 0;
+ int seen_cnt;
+ int i = 0;
+
+ for (; i < fds_len; i++) {
+ /* Skip any sockets that were closed or that weren't seen
+ * exactly once.
+ */
+ if (fds[i] < 0)
+ continue;
+ seen_cnt = get_seen_count(fds[i], counts, counts_len);
+ if (seen_cnt && ASSERT_EQ(seen_cnt, 1, "seen_cnt"))
+ seen_once++;
+ }
+
+ ASSERT_EQ(seen_once, n, "seen_once");
+}
+
+static void remove_seen(int family, int sock_type, const char *addr, __u16 port,
+ int *socks, int socks_len, struct sock_count *counts,
+ int counts_len, struct bpf_link *link, int iter_fd)
+{
+ int close_idx;
+
+ /* Iterate through the first socks_len - 1 sockets. */
+ read_n(iter_fd, socks_len - 1, counts, counts_len);
+
+ /* Make sure we saw socks_len - 1 sockets exactly once. */
+ check_n_were_seen_once(socks, socks_len, socks_len - 1, counts,
+ counts_len);
+
+ /* Close a socket we've already seen to remove it from the bucket. */
+ close_idx = get_seen_socket(socks, counts, counts_len);
+ if (!ASSERT_GE(close_idx, 0, "close_idx"))
+ return;
+ close(socks[close_idx]);
+ socks[close_idx] = -1;
+
+ /* Iterate through the rest of the sockets. */
+ read_n(iter_fd, -1, counts, counts_len);
+
+ /* Make sure the last socket wasn't skipped and that there were no
+ * repeats.
+ */
+ check_n_were_seen_once(socks, socks_len, socks_len - 1, counts,
+ counts_len);
+}
+
+static void remove_unseen(int family, int sock_type, const char *addr,
+ __u16 port, int *socks, int socks_len,
+ struct sock_count *counts, int counts_len,
+ struct bpf_link *link, int iter_fd)
+{
+ int close_idx;
+
+ /* Iterate through the first socket. */
+ read_n(iter_fd, 1, counts, counts_len);
+
+ /* Make sure we saw a socket from fds. */
+ check_n_were_seen_once(socks, socks_len, 1, counts, counts_len);
+
+ /* Close what would be the next socket in the bucket to exercise the
+ * condition where we need to skip past the first cookie we remembered.
+ */
+ close_idx = get_nth_socket(socks, socks_len, link, 1);
+ if (!ASSERT_GE(close_idx, 0, "close_idx"))
+ return;
+ close(socks[close_idx]);
+ socks[close_idx] = -1;
+
+ /* Iterate through the rest of the sockets. */
+ read_n(iter_fd, -1, counts, counts_len);
+
+ /* Make sure the remaining sockets were seen exactly once and that we
+ * didn't repeat the socket that was already seen.
+ */
+ check_n_were_seen_once(socks, socks_len, socks_len - 1, counts,
+ counts_len);
+}
+
+static void remove_all(int family, int sock_type, const char *addr,
+ __u16 port, int *socks, int socks_len,
+ struct sock_count *counts, int counts_len,
+ struct bpf_link *link, int iter_fd)
+{
+ int close_idx, i;
+
+ /* Iterate through the first socket. */
+ read_n(iter_fd, 1, counts, counts_len);
+
+ /* Make sure we saw a socket from fds. */
+ check_n_were_seen_once(socks, socks_len, 1, counts, counts_len);
+
+ /* Close all remaining sockets to exhaust the list of saved cookies and
+ * exit without putting any sockets into the batch on the next read.
+ */
+ for (i = 0; i < socks_len - 1; i++) {
+ close_idx = get_nth_socket(socks, socks_len, link, 1);
+ if (!ASSERT_GE(close_idx, 0, "close_idx"))
+ return;
+ close(socks[close_idx]);
+ socks[close_idx] = -1;
+ }
+
+ /* Make sure there are no more sockets returned */
+ ASSERT_EQ(read_n(iter_fd, -1, counts, counts_len), 0, "read_n");
+}
+
+static void add_some(int family, int sock_type, const char *addr, __u16 port,
+ int *socks, int socks_len, struct sock_count *counts,
+ int counts_len, struct bpf_link *link, int iter_fd)
+{
+ int *new_socks = NULL;
+
+ /* Iterate through the first socks_len - 1 sockets. */
+ read_n(iter_fd, socks_len - 1, counts, counts_len);
+
+ /* Make sure we saw socks_len - 1 sockets exactly once. */
+ check_n_were_seen_once(socks, socks_len, socks_len - 1, counts,
+ counts_len);
+
+ /* Double the number of sockets in the bucket. */
+ new_socks = start_reuseport_server(family, sock_type, addr, port, 0,
+ socks_len);
+ if (!ASSERT_OK_PTR(new_socks, "start_reuseport_server"))
+ goto done;
+
+ /* Iterate through the rest of the sockets. */
+ read_n(iter_fd, -1, counts, counts_len);
+
+ /* Make sure each of the original sockets was seen exactly once. */
+ check_n_were_seen_once(socks, socks_len, socks_len, counts,
+ counts_len);
+done:
+ free_fds(new_socks, socks_len);
+}
+
+static void force_realloc(int family, int sock_type, const char *addr,
+ __u16 port, int *socks, int socks_len,
+ struct sock_count *counts, int counts_len,
+ struct bpf_link *link, int iter_fd)
+{
+ int *new_socks = NULL;
+
+ /* Iterate through the first socket just to initialize the batch. */
+ read_n(iter_fd, 1, counts, counts_len);
+
+ /* Double the number of sockets in the bucket to force a realloc on the
+ * next read.
+ */
+ new_socks = start_reuseport_server(family, sock_type, addr, port, 0,
+ socks_len);
+ if (!ASSERT_OK_PTR(new_socks, "start_reuseport_server"))
+ goto done;
+
+ /* Iterate through the rest of the sockets. */
+ read_n(iter_fd, -1, counts, counts_len);
+
+ /* Make sure each socket from the first set was seen exactly once. */
+ check_n_were_seen_once(socks, socks_len, socks_len, counts,
+ counts_len);
+done:
+ free_fds(new_socks, socks_len);
+}
+
+struct test_case {
+ void (*test)(int family, int sock_type, const char *addr, __u16 port,
+ int *socks, int socks_len, struct sock_count *counts,
+ int counts_len, struct bpf_link *link, int iter_fd);
+ const char *description;
+ int init_socks;
+ int max_socks;
+ int sock_type;
+ int family;
+};
+
+static struct test_case resume_tests[] = {
+ {
+ .description = "udp: resume after removing a seen socket",
+ .init_socks = nr_soreuse,
+ .max_socks = nr_soreuse,
+ .sock_type = SOCK_DGRAM,
+ .family = AF_INET6,
+ .test = remove_seen,
+ },
+ {
+ .description = "udp: resume after removing one unseen socket",
+ .init_socks = nr_soreuse,
+ .max_socks = nr_soreuse,
+ .sock_type = SOCK_DGRAM,
+ .family = AF_INET6,
+ .test = remove_unseen,
+ },
+ {
+ .description = "udp: resume after removing all unseen sockets",
+ .init_socks = nr_soreuse,
+ .max_socks = nr_soreuse,
+ .sock_type = SOCK_DGRAM,
+ .family = AF_INET6,
+ .test = remove_all,
+ },
+ {
+ .description = "udp: resume after adding a few sockets",
+ .init_socks = nr_soreuse,
+ .max_socks = nr_soreuse,
+ .sock_type = SOCK_DGRAM,
+ /* Use AF_INET so that new sockets are added to the head of the
+ * bucket's list.
+ */
+ .family = AF_INET,
+ .test = add_some,
+ },
+ {
+ .description = "udp: force a realloc to occur",
+ .init_socks = init_batch_size,
+ .max_socks = init_batch_size * 2,
+ .sock_type = SOCK_DGRAM,
+ /* Use AF_INET6 so that new sockets are added to the tail of the
+ * bucket's list, needing to be added to the next batch to force
+ * a realloc.
+ */
+ .family = AF_INET6,
+ .test = force_realloc,
+ },
+};
+
+static void do_resume_test(struct test_case *tc)
+{
+ struct sock_iter_batch *skel = NULL;
+ static const __u16 port = 10001;
+ struct bpf_link *link = NULL;
+ struct sock_count *counts;
+ int err, iter_fd = -1;
+ const char *addr;
+ int *fds = NULL;
+ int local_port;
+
+ counts = calloc(tc->max_socks, sizeof(*counts));
+ if (!ASSERT_OK_PTR(counts, "counts"))
+ goto done;
+ skel = sock_iter_batch__open();
+ if (!ASSERT_OK_PTR(skel, "sock_iter_batch__open"))
+ goto done;
+
+ /* Prepare a bucket of sockets in the kernel hashtable */
+ addr = tc->family == AF_INET6 ? "::1" : "127.0.0.1";
+ fds = start_reuseport_server(tc->family, tc->sock_type, addr, port, 0,
+ tc->init_socks);
+ if (!ASSERT_OK_PTR(fds, "start_reuseport_server"))
+ goto done;
+ local_port = get_socket_local_port(*fds);
+ if (!ASSERT_GE(local_port, 0, "get_socket_local_port"))
+ goto done;
+ skel->rodata->ports[0] = ntohs(local_port);
+ skel->rodata->sf = tc->family;
+
+ err = sock_iter_batch__load(skel);
+ if (!ASSERT_OK(err, "sock_iter_batch__load"))
+ goto done;
+
+ link = bpf_program__attach_iter(tc->sock_type == SOCK_STREAM ?
+ skel->progs.iter_tcp_soreuse :
+ skel->progs.iter_udp_soreuse,
+ NULL);
+ if (!ASSERT_OK_PTR(link, "bpf_program__attach_iter"))
+ goto done;
+
+ iter_fd = bpf_iter_create(bpf_link__fd(link));
+ if (!ASSERT_OK_FD(iter_fd, "bpf_iter_create"))
+ goto done;
+
+ tc->test(tc->family, tc->sock_type, addr, port, fds, tc->init_socks,
+ counts, tc->max_socks, link, iter_fd);
+done:
+ free(counts);
+ free_fds(fds, tc->init_socks);
+ if (iter_fd >= 0)
+ close(iter_fd);
+ bpf_link__destroy(link);
+ sock_iter_batch__destroy(skel);
+}
+
+static void do_resume_tests(void)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(resume_tests); i++) {
+ if (test__start_subtest(resume_tests[i].description)) {
+ do_resume_test(&resume_tests[i]);
+ }
+ }
+}
+
static void do_test(int sock_type, bool onebyone)
{
int err, i, nread, to_read, total_read, iter_fd = -1;
- int first_idx, second_idx, indices[nr_soreuse];
+ struct iter_out outputs[nr_soreuse];
struct bpf_link *link = NULL;
struct sock_iter_batch *skel;
+ int first_idx, second_idx;
int *fds[2] = {};
skel = sock_iter_batch__open();
@@ -34,6 +453,7 @@ static void do_test(int sock_type, bool onebyone)
goto done;
skel->rodata->ports[i] = ntohs(local_port);
}
+ skel->rodata->sf = AF_INET6;
err = sock_iter_batch__load(skel);
if (!ASSERT_OK(err, "sock_iter_batch__load"))
@@ -55,38 +475,38 @@ static void do_test(int sock_type, bool onebyone)
* from a bucket and leave one socket out from
* that bucket on purpose.
*/
- to_read = (nr_soreuse - 1) * sizeof(*indices);
+ to_read = (nr_soreuse - 1) * sizeof(*outputs);
total_read = 0;
first_idx = -1;
do {
- nread = read(iter_fd, indices, onebyone ? sizeof(*indices) : to_read);
- if (nread <= 0 || nread % sizeof(*indices))
+ nread = read(iter_fd, outputs, onebyone ? sizeof(*outputs) : to_read);
+ if (nread <= 0 || nread % sizeof(*outputs))
break;
total_read += nread;
if (first_idx == -1)
- first_idx = indices[0];
- for (i = 0; i < nread / sizeof(*indices); i++)
- ASSERT_EQ(indices[i], first_idx, "first_idx");
+ first_idx = outputs[0].idx;
+ for (i = 0; i < nread / sizeof(*outputs); i++)
+ ASSERT_EQ(outputs[i].idx, first_idx, "first_idx");
} while (total_read < to_read);
- ASSERT_EQ(nread, onebyone ? sizeof(*indices) : to_read, "nread");
+ ASSERT_EQ(nread, onebyone ? sizeof(*outputs) : to_read, "nread");
ASSERT_EQ(total_read, to_read, "total_read");
free_fds(fds[first_idx], nr_soreuse);
fds[first_idx] = NULL;
/* Read the "whole" second bucket */
- to_read = nr_soreuse * sizeof(*indices);
+ to_read = nr_soreuse * sizeof(*outputs);
total_read = 0;
second_idx = !first_idx;
do {
- nread = read(iter_fd, indices, onebyone ? sizeof(*indices) : to_read);
- if (nread <= 0 || nread % sizeof(*indices))
+ nread = read(iter_fd, outputs, onebyone ? sizeof(*outputs) : to_read);
+ if (nread <= 0 || nread % sizeof(*outputs))
break;
total_read += nread;
- for (i = 0; i < nread / sizeof(*indices); i++)
- ASSERT_EQ(indices[i], second_idx, "second_idx");
+ for (i = 0; i < nread / sizeof(*outputs); i++)
+ ASSERT_EQ(outputs[i].idx, second_idx, "second_idx");
} while (total_read <= to_read);
ASSERT_EQ(nread, 0, "nread");
/* Both so_reuseport ports should be in different buckets, so
@@ -128,6 +548,7 @@ void test_sock_iter_batch(void)
do_test(SOCK_DGRAM, true);
do_test(SOCK_DGRAM, false);
}
+ do_resume_tests();
close_netns(nstoken);
done:
diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
index 2d0796314862..0a99fd404f6d 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_ktls.c
@@ -68,7 +68,6 @@ static void test_sockmap_ktls_disconnect_after_delete(int family, int map)
goto close_cli;
err = disconnect(cli);
- ASSERT_OK(err, "disconnect");
close_cli:
close(cli);
diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h
index 13a2e22f5465..863df7c0fdd0 100644
--- a/tools/testing/selftests/bpf/progs/bpf_misc.h
+++ b/tools/testing/selftests/bpf/progs/bpf_misc.h
@@ -221,7 +221,7 @@
#define CAN_USE_GOTOL
#endif
-#if _clang_major__ >= 18
+#if __clang_major__ >= 18
#define CAN_USE_BPF_ST
#endif
diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_common.h b/tools/testing/selftests/bpf/progs/bpf_qdisc_common.h
new file mode 100644
index 000000000000..3754f581b328
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_common.h
@@ -0,0 +1,27 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef _BPF_QDISC_COMMON_H
+#define _BPF_QDISC_COMMON_H
+
+#define NET_XMIT_SUCCESS 0x00
+#define NET_XMIT_DROP 0x01 /* skb dropped */
+#define NET_XMIT_CN 0x02 /* congestion notification */
+
+#define TC_PRIO_CONTROL 7
+#define TC_PRIO_MAX 15
+
+#define private(name) SEC(".data." #name) __hidden __attribute__((aligned(8)))
+
+struct bpf_sk_buff_ptr;
+
+static struct qdisc_skb_cb *qdisc_skb_cb(const struct sk_buff *skb)
+{
+ return (struct qdisc_skb_cb *)skb->cb;
+}
+
+static inline unsigned int qdisc_pkt_len(const struct sk_buff *skb)
+{
+ return qdisc_skb_cb(skb)->pkt_len;
+}
+
+#endif
diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_fail__incompl_ops.c b/tools/testing/selftests/bpf/progs/bpf_qdisc_fail__incompl_ops.c
new file mode 100644
index 000000000000..f188062ed730
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_fail__incompl_ops.c
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include "bpf_experimental.h"
+#include "bpf_qdisc_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+SEC("struct_ops")
+int BPF_PROG(bpf_qdisc_test_enqueue, struct sk_buff *skb, struct Qdisc *sch,
+ struct bpf_sk_buff_ptr *to_free)
+{
+ bpf_qdisc_skb_drop(skb, to_free);
+ return NET_XMIT_DROP;
+}
+
+SEC("struct_ops")
+struct sk_buff *BPF_PROG(bpf_qdisc_test_dequeue, struct Qdisc *sch)
+{
+ return NULL;
+}
+
+SEC("struct_ops")
+void BPF_PROG(bpf_qdisc_test_reset, struct Qdisc *sch)
+{
+}
+
+SEC("struct_ops")
+void BPF_PROG(bpf_qdisc_test_destroy, struct Qdisc *sch)
+{
+}
+
+SEC(".struct_ops")
+struct Qdisc_ops test = {
+ .enqueue = (void *)bpf_qdisc_test_enqueue,
+ .dequeue = (void *)bpf_qdisc_test_dequeue,
+ .reset = (void *)bpf_qdisc_test_reset,
+ .destroy = (void *)bpf_qdisc_test_destroy,
+ .id = "bpf_qdisc_test",
+};
+
diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_fifo.c b/tools/testing/selftests/bpf/progs/bpf_qdisc_fifo.c
new file mode 100644
index 000000000000..1de2be3e370b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_fifo.c
@@ -0,0 +1,126 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <vmlinux.h>
+#include "bpf_experimental.h"
+#include "bpf_qdisc_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+struct skb_node {
+ struct sk_buff __kptr * skb;
+ struct bpf_list_node node;
+};
+
+private(A) struct bpf_spin_lock q_fifo_lock;
+private(A) struct bpf_list_head q_fifo __contains(skb_node, node);
+
+bool init_called;
+
+SEC("struct_ops/bpf_fifo_enqueue")
+int BPF_PROG(bpf_fifo_enqueue, struct sk_buff *skb, struct Qdisc *sch,
+ struct bpf_sk_buff_ptr *to_free)
+{
+ struct skb_node *skbn;
+ u32 pkt_len;
+
+ if (sch->q.qlen == sch->limit)
+ goto drop;
+
+ skbn = bpf_obj_new(typeof(*skbn));
+ if (!skbn)
+ goto drop;
+
+ pkt_len = qdisc_pkt_len(skb);
+
+ sch->q.qlen++;
+ skb = bpf_kptr_xchg(&skbn->skb, skb);
+ if (skb)
+ bpf_qdisc_skb_drop(skb, to_free);
+
+ bpf_spin_lock(&q_fifo_lock);
+ bpf_list_push_back(&q_fifo, &skbn->node);
+ bpf_spin_unlock(&q_fifo_lock);
+
+ sch->qstats.backlog += pkt_len;
+ return NET_XMIT_SUCCESS;
+drop:
+ bpf_qdisc_skb_drop(skb, to_free);
+ return NET_XMIT_DROP;
+}
+
+SEC("struct_ops/bpf_fifo_dequeue")
+struct sk_buff *BPF_PROG(bpf_fifo_dequeue, struct Qdisc *sch)
+{
+ struct bpf_list_node *node;
+ struct sk_buff *skb = NULL;
+ struct skb_node *skbn;
+
+ bpf_spin_lock(&q_fifo_lock);
+ node = bpf_list_pop_front(&q_fifo);
+ bpf_spin_unlock(&q_fifo_lock);
+ if (!node)
+ return NULL;
+
+ skbn = container_of(node, struct skb_node, node);
+ skb = bpf_kptr_xchg(&skbn->skb, skb);
+ bpf_obj_drop(skbn);
+ if (!skb)
+ return NULL;
+
+ sch->qstats.backlog -= qdisc_pkt_len(skb);
+ bpf_qdisc_bstats_update(sch, skb);
+ sch->q.qlen--;
+
+ return skb;
+}
+
+SEC("struct_ops/bpf_fifo_init")
+int BPF_PROG(bpf_fifo_init, struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
+{
+ sch->limit = 1000;
+ init_called = true;
+ return 0;
+}
+
+SEC("struct_ops/bpf_fifo_reset")
+void BPF_PROG(bpf_fifo_reset, struct Qdisc *sch)
+{
+ struct bpf_list_node *node;
+ struct skb_node *skbn;
+ int i;
+
+ bpf_for(i, 0, sch->q.qlen) {
+ struct sk_buff *skb = NULL;
+
+ bpf_spin_lock(&q_fifo_lock);
+ node = bpf_list_pop_front(&q_fifo);
+ bpf_spin_unlock(&q_fifo_lock);
+
+ if (!node)
+ break;
+
+ skbn = container_of(node, struct skb_node, node);
+ skb = bpf_kptr_xchg(&skbn->skb, skb);
+ if (skb)
+ bpf_kfree_skb(skb);
+ bpf_obj_drop(skbn);
+ }
+ sch->q.qlen = 0;
+}
+
+SEC("struct_ops")
+void BPF_PROG(bpf_fifo_destroy, struct Qdisc *sch)
+{
+}
+
+SEC(".struct_ops")
+struct Qdisc_ops fifo = {
+ .enqueue = (void *)bpf_fifo_enqueue,
+ .dequeue = (void *)bpf_fifo_dequeue,
+ .init = (void *)bpf_fifo_init,
+ .reset = (void *)bpf_fifo_reset,
+ .destroy = (void *)bpf_fifo_destroy,
+ .id = "bpf_fifo",
+};
+
diff --git a/tools/testing/selftests/bpf/progs/bpf_qdisc_fq.c b/tools/testing/selftests/bpf/progs/bpf_qdisc_fq.c
new file mode 100644
index 000000000000..1a3233a275c7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_qdisc_fq.c
@@ -0,0 +1,756 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/* bpf_fq is intended for testing the bpf qdisc infrastructure and not a direct
+ * copy of sch_fq. bpf_fq implements the scheduling algorithm of sch_fq before
+ * 29f834aa326e ("net_sched: sch_fq: add 3 bands and WRR scheduling") was
+ * introduced. It gives each flow a fair chance to transmit packets in a
+ * round-robin fashion. Note that for flow pacing, bpf_fq currently only
+ * respects skb->tstamp but not skb->sk->sk_pacing_rate. In addition, if there
+ * are multiple bpf_fq instances, they will have a shared view of flows and
+ * configuration since some key data structure such as fq_prio_flows,
+ * fq_nonprio_flows, and fq_bpf_data are global.
+ *
+ * To use bpf_fq alone without running selftests, use the following commands.
+ *
+ * 1. Register bpf_fq to the kernel
+ * bpftool struct_ops register bpf_qdisc_fq.bpf.o /sys/fs/bpf
+ * 2. Add bpf_fq to an interface
+ * tc qdisc add dev <interface name> root handle <handle> bpf_fq
+ * 3. Delete bpf_fq attached to the interface
+ * tc qdisc delete dev <interface name> root
+ * 4. Unregister bpf_fq
+ * bpftool struct_ops unregister name fq
+ *
+ * The qdisc name, bpf_fq, used in tc commands is defined by Qdisc_ops.id.
+ * The struct_ops_map_name, fq, used in the bpftool command is the name of the
+ * Qdisc_ops.
+ *
+ * SEC(".struct_ops")
+ * struct Qdisc_ops fq = {
+ * ...
+ * .id = "bpf_fq",
+ * };
+ */
+
+#include <vmlinux.h>
+#include <errno.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_experimental.h"
+#include "bpf_qdisc_common.h"
+
+char _license[] SEC("license") = "GPL";
+
+#define NSEC_PER_USEC 1000L
+#define NSEC_PER_SEC 1000000000L
+
+#define NUM_QUEUE (1 << 20)
+
+struct fq_bpf_data {
+ u32 quantum;
+ u32 initial_quantum;
+ u32 flow_refill_delay;
+ u32 flow_plimit;
+ u64 horizon;
+ u32 orphan_mask;
+ u32 timer_slack;
+ u64 time_next_delayed_flow;
+ u64 unthrottle_latency_ns;
+ u8 horizon_drop;
+ u32 new_flow_cnt;
+ u32 old_flow_cnt;
+ u64 ktime_cache;
+};
+
+enum {
+ CLS_RET_PRIO = 0,
+ CLS_RET_NONPRIO = 1,
+ CLS_RET_ERR = 2,
+};
+
+struct skb_node {
+ u64 tstamp;
+ struct sk_buff __kptr * skb;
+ struct bpf_rb_node node;
+};
+
+struct fq_flow_node {
+ int credit;
+ u32 qlen;
+ u64 age;
+ u64 time_next_packet;
+ struct bpf_list_node list_node;
+ struct bpf_rb_node rb_node;
+ struct bpf_rb_root queue __contains(skb_node, node);
+ struct bpf_spin_lock lock;
+ struct bpf_refcount refcount;
+};
+
+struct dequeue_nonprio_ctx {
+ bool stop_iter;
+ u64 expire;
+ u64 now;
+};
+
+struct remove_flows_ctx {
+ bool gc_only;
+ u32 reset_cnt;
+ u32 reset_max;
+};
+
+struct unset_throttled_flows_ctx {
+ bool unset_all;
+ u64 now;
+};
+
+struct fq_stashed_flow {
+ struct fq_flow_node __kptr * flow;
+};
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __u64);
+ __type(value, struct fq_stashed_flow);
+ __uint(max_entries, NUM_QUEUE);
+} fq_nonprio_flows SEC(".maps");
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __type(key, __u64);
+ __type(value, struct fq_stashed_flow);
+ __uint(max_entries, 1);
+} fq_prio_flows SEC(".maps");
+
+private(A) struct bpf_spin_lock fq_delayed_lock;
+private(A) struct bpf_rb_root fq_delayed __contains(fq_flow_node, rb_node);
+
+private(B) struct bpf_spin_lock fq_new_flows_lock;
+private(B) struct bpf_list_head fq_new_flows __contains(fq_flow_node, list_node);
+
+private(C) struct bpf_spin_lock fq_old_flows_lock;
+private(C) struct bpf_list_head fq_old_flows __contains(fq_flow_node, list_node);
+
+private(D) struct fq_bpf_data q;
+
+/* Wrapper for bpf_kptr_xchg that expects NULL dst */
+static void bpf_kptr_xchg_back(void *map_val, void *ptr)
+{
+ void *ret;
+
+ ret = bpf_kptr_xchg(map_val, ptr);
+ if (ret)
+ bpf_obj_drop(ret);
+}
+
+static bool skbn_tstamp_less(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+ struct skb_node *skbn_a;
+ struct skb_node *skbn_b;
+
+ skbn_a = container_of(a, struct skb_node, node);
+ skbn_b = container_of(b, struct skb_node, node);
+
+ return skbn_a->tstamp < skbn_b->tstamp;
+}
+
+static bool fn_time_next_packet_less(struct bpf_rb_node *a, const struct bpf_rb_node *b)
+{
+ struct fq_flow_node *flow_a;
+ struct fq_flow_node *flow_b;
+
+ flow_a = container_of(a, struct fq_flow_node, rb_node);
+ flow_b = container_of(b, struct fq_flow_node, rb_node);
+
+ return flow_a->time_next_packet < flow_b->time_next_packet;
+}
+
+static void
+fq_flows_add_head(struct bpf_list_head *head, struct bpf_spin_lock *lock,
+ struct fq_flow_node *flow, u32 *flow_cnt)
+{
+ bpf_spin_lock(lock);
+ bpf_list_push_front(head, &flow->list_node);
+ bpf_spin_unlock(lock);
+ *flow_cnt += 1;
+}
+
+static void
+fq_flows_add_tail(struct bpf_list_head *head, struct bpf_spin_lock *lock,
+ struct fq_flow_node *flow, u32 *flow_cnt)
+{
+ bpf_spin_lock(lock);
+ bpf_list_push_back(head, &flow->list_node);
+ bpf_spin_unlock(lock);
+ *flow_cnt += 1;
+}
+
+static void
+fq_flows_remove_front(struct bpf_list_head *head, struct bpf_spin_lock *lock,
+ struct bpf_list_node **node, u32 *flow_cnt)
+{
+ bpf_spin_lock(lock);
+ *node = bpf_list_pop_front(head);
+ bpf_spin_unlock(lock);
+ *flow_cnt -= 1;
+}
+
+static bool
+fq_flows_is_empty(struct bpf_list_head *head, struct bpf_spin_lock *lock)
+{
+ struct bpf_list_node *node;
+
+ bpf_spin_lock(lock);
+ node = bpf_list_pop_front(head);
+ if (node) {
+ bpf_list_push_front(head, node);
+ bpf_spin_unlock(lock);
+ return false;
+ }
+ bpf_spin_unlock(lock);
+
+ return true;
+}
+
+/* flow->age is used to denote the state of the flow (not-detached, detached, throttled)
+ * as well as the timestamp when the flow is detached.
+ *
+ * 0: not-detached
+ * 1 - (~0ULL-1): detached
+ * ~0ULL: throttled
+ */
+static void fq_flow_set_detached(struct fq_flow_node *flow)
+{
+ flow->age = bpf_jiffies64();
+}
+
+static bool fq_flow_is_detached(struct fq_flow_node *flow)
+{
+ return flow->age != 0 && flow->age != ~0ULL;
+}
+
+static bool sk_listener(struct sock *sk)
+{
+ return (1 << sk->__sk_common.skc_state) & (TCPF_LISTEN | TCPF_NEW_SYN_RECV);
+}
+
+static void fq_gc(void);
+
+static int fq_new_flow(void *flow_map, struct fq_stashed_flow **sflow, u64 hash)
+{
+ struct fq_stashed_flow tmp = {};
+ struct fq_flow_node *flow;
+ int ret;
+
+ flow = bpf_obj_new(typeof(*flow));
+ if (!flow)
+ return -ENOMEM;
+
+ flow->credit = q.initial_quantum,
+ flow->qlen = 0,
+ flow->age = 1,
+ flow->time_next_packet = 0,
+
+ ret = bpf_map_update_elem(flow_map, &hash, &tmp, 0);
+ if (ret == -ENOMEM || ret == -E2BIG) {
+ fq_gc();
+ bpf_map_update_elem(&fq_nonprio_flows, &hash, &tmp, 0);
+ }
+
+ *sflow = bpf_map_lookup_elem(flow_map, &hash);
+ if (!*sflow) {
+ bpf_obj_drop(flow);
+ return -ENOMEM;
+ }
+
+ bpf_kptr_xchg_back(&(*sflow)->flow, flow);
+ return 0;
+}
+
+static int
+fq_classify(struct sk_buff *skb, struct fq_stashed_flow **sflow)
+{
+ struct sock *sk = skb->sk;
+ int ret = CLS_RET_NONPRIO;
+ u64 hash = 0;
+
+ if ((skb->priority & TC_PRIO_MAX) == TC_PRIO_CONTROL) {
+ *sflow = bpf_map_lookup_elem(&fq_prio_flows, &hash);
+ ret = CLS_RET_PRIO;
+ } else {
+ if (!sk || sk_listener(sk)) {
+ hash = bpf_skb_get_hash(skb) & q.orphan_mask;
+ /* Avoid collision with an existing flow hash, which
+ * only uses the lower 32 bits of hash, by setting the
+ * upper half of hash to 1.
+ */
+ hash |= (1ULL << 32);
+ } else if (sk->__sk_common.skc_state == TCP_CLOSE) {
+ hash = bpf_skb_get_hash(skb) & q.orphan_mask;
+ hash |= (1ULL << 32);
+ } else {
+ hash = sk->__sk_common.skc_hash;
+ }
+ *sflow = bpf_map_lookup_elem(&fq_nonprio_flows, &hash);
+ }
+
+ if (!*sflow)
+ ret = fq_new_flow(&fq_nonprio_flows, sflow, hash) < 0 ?
+ CLS_RET_ERR : CLS_RET_NONPRIO;
+
+ return ret;
+}
+
+static bool fq_packet_beyond_horizon(struct sk_buff *skb)
+{
+ return (s64)skb->tstamp > (s64)(q.ktime_cache + q.horizon);
+}
+
+SEC("struct_ops/bpf_fq_enqueue")
+int BPF_PROG(bpf_fq_enqueue, struct sk_buff *skb, struct Qdisc *sch,
+ struct bpf_sk_buff_ptr *to_free)
+{
+ struct fq_flow_node *flow = NULL, *flow_copy;
+ struct fq_stashed_flow *sflow;
+ u64 time_to_send, jiffies;
+ struct skb_node *skbn;
+ int ret;
+
+ if (sch->q.qlen >= sch->limit)
+ goto drop;
+
+ if (!skb->tstamp) {
+ time_to_send = q.ktime_cache = bpf_ktime_get_ns();
+ } else {
+ if (fq_packet_beyond_horizon(skb)) {
+ q.ktime_cache = bpf_ktime_get_ns();
+ if (fq_packet_beyond_horizon(skb)) {
+ if (q.horizon_drop)
+ goto drop;
+
+ skb->tstamp = q.ktime_cache + q.horizon;
+ }
+ }
+ time_to_send = skb->tstamp;
+ }
+
+ ret = fq_classify(skb, &sflow);
+ if (ret == CLS_RET_ERR)
+ goto drop;
+
+ flow = bpf_kptr_xchg(&sflow->flow, flow);
+ if (!flow)
+ goto drop;
+
+ if (ret == CLS_RET_NONPRIO) {
+ if (flow->qlen >= q.flow_plimit) {
+ bpf_kptr_xchg_back(&sflow->flow, flow);
+ goto drop;
+ }
+
+ if (fq_flow_is_detached(flow)) {
+ flow_copy = bpf_refcount_acquire(flow);
+
+ jiffies = bpf_jiffies64();
+ if ((s64)(jiffies - (flow_copy->age + q.flow_refill_delay)) > 0) {
+ if (flow_copy->credit < q.quantum)
+ flow_copy->credit = q.quantum;
+ }
+ flow_copy->age = 0;
+ fq_flows_add_tail(&fq_new_flows, &fq_new_flows_lock, flow_copy,
+ &q.new_flow_cnt);
+ }
+ }
+
+ skbn = bpf_obj_new(typeof(*skbn));
+ if (!skbn) {
+ bpf_kptr_xchg_back(&sflow->flow, flow);
+ goto drop;
+ }
+
+ skbn->tstamp = skb->tstamp = time_to_send;
+
+ sch->qstats.backlog += qdisc_pkt_len(skb);
+
+ skb = bpf_kptr_xchg(&skbn->skb, skb);
+ if (skb)
+ bpf_qdisc_skb_drop(skb, to_free);
+
+ bpf_spin_lock(&flow->lock);
+ bpf_rbtree_add(&flow->queue, &skbn->node, skbn_tstamp_less);
+ bpf_spin_unlock(&flow->lock);
+
+ flow->qlen++;
+ bpf_kptr_xchg_back(&sflow->flow, flow);
+
+ sch->q.qlen++;
+ return NET_XMIT_SUCCESS;
+
+drop:
+ bpf_qdisc_skb_drop(skb, to_free);
+ sch->qstats.drops++;
+ return NET_XMIT_DROP;
+}
+
+static int fq_unset_throttled_flows(u32 index, struct unset_throttled_flows_ctx *ctx)
+{
+ struct bpf_rb_node *node = NULL;
+ struct fq_flow_node *flow;
+
+ bpf_spin_lock(&fq_delayed_lock);
+
+ node = bpf_rbtree_first(&fq_delayed);
+ if (!node) {
+ bpf_spin_unlock(&fq_delayed_lock);
+ return 1;
+ }
+
+ flow = container_of(node, struct fq_flow_node, rb_node);
+ if (!ctx->unset_all && flow->time_next_packet > ctx->now) {
+ q.time_next_delayed_flow = flow->time_next_packet;
+ bpf_spin_unlock(&fq_delayed_lock);
+ return 1;
+ }
+
+ node = bpf_rbtree_remove(&fq_delayed, &flow->rb_node);
+
+ bpf_spin_unlock(&fq_delayed_lock);
+
+ if (!node)
+ return 1;
+
+ flow = container_of(node, struct fq_flow_node, rb_node);
+ flow->age = 0;
+ fq_flows_add_tail(&fq_old_flows, &fq_old_flows_lock, flow, &q.old_flow_cnt);
+
+ return 0;
+}
+
+static void fq_flow_set_throttled(struct fq_flow_node *flow)
+{
+ flow->age = ~0ULL;
+
+ if (q.time_next_delayed_flow > flow->time_next_packet)
+ q.time_next_delayed_flow = flow->time_next_packet;
+
+ bpf_spin_lock(&fq_delayed_lock);
+ bpf_rbtree_add(&fq_delayed, &flow->rb_node, fn_time_next_packet_less);
+ bpf_spin_unlock(&fq_delayed_lock);
+}
+
+static void fq_check_throttled(u64 now)
+{
+ struct unset_throttled_flows_ctx ctx = {
+ .unset_all = false,
+ .now = now,
+ };
+ unsigned long sample;
+
+ if (q.time_next_delayed_flow > now)
+ return;
+
+ sample = (unsigned long)(now - q.time_next_delayed_flow);
+ q.unthrottle_latency_ns -= q.unthrottle_latency_ns >> 3;
+ q.unthrottle_latency_ns += sample >> 3;
+
+ q.time_next_delayed_flow = ~0ULL;
+ bpf_loop(NUM_QUEUE, fq_unset_throttled_flows, &ctx, 0);
+}
+
+static struct sk_buff*
+fq_dequeue_nonprio_flows(u32 index, struct dequeue_nonprio_ctx *ctx)
+{
+ u64 time_next_packet, time_to_send;
+ struct bpf_rb_node *rb_node;
+ struct sk_buff *skb = NULL;
+ struct bpf_list_head *head;
+ struct bpf_list_node *node;
+ struct bpf_spin_lock *lock;
+ struct fq_flow_node *flow;
+ struct skb_node *skbn;
+ bool is_empty;
+ u32 *cnt;
+
+ if (q.new_flow_cnt) {
+ head = &fq_new_flows;
+ lock = &fq_new_flows_lock;
+ cnt = &q.new_flow_cnt;
+ } else if (q.old_flow_cnt) {
+ head = &fq_old_flows;
+ lock = &fq_old_flows_lock;
+ cnt = &q.old_flow_cnt;
+ } else {
+ if (q.time_next_delayed_flow != ~0ULL)
+ ctx->expire = q.time_next_delayed_flow;
+ goto break_loop;
+ }
+
+ fq_flows_remove_front(head, lock, &node, cnt);
+ if (!node)
+ goto break_loop;
+
+ flow = container_of(node, struct fq_flow_node, list_node);
+ if (flow->credit <= 0) {
+ flow->credit += q.quantum;
+ fq_flows_add_tail(&fq_old_flows, &fq_old_flows_lock, flow, &q.old_flow_cnt);
+ return NULL;
+ }
+
+ bpf_spin_lock(&flow->lock);
+ rb_node = bpf_rbtree_first(&flow->queue);
+ if (!rb_node) {
+ bpf_spin_unlock(&flow->lock);
+ is_empty = fq_flows_is_empty(&fq_old_flows, &fq_old_flows_lock);
+ if (head == &fq_new_flows && !is_empty) {
+ fq_flows_add_tail(&fq_old_flows, &fq_old_flows_lock, flow, &q.old_flow_cnt);
+ } else {
+ fq_flow_set_detached(flow);
+ bpf_obj_drop(flow);
+ }
+ return NULL;
+ }
+
+ skbn = container_of(rb_node, struct skb_node, node);
+ time_to_send = skbn->tstamp;
+
+ time_next_packet = (time_to_send > flow->time_next_packet) ?
+ time_to_send : flow->time_next_packet;
+ if (ctx->now < time_next_packet) {
+ bpf_spin_unlock(&flow->lock);
+ flow->time_next_packet = time_next_packet;
+ fq_flow_set_throttled(flow);
+ return NULL;
+ }
+
+ rb_node = bpf_rbtree_remove(&flow->queue, rb_node);
+ bpf_spin_unlock(&flow->lock);
+
+ if (!rb_node)
+ goto add_flow_and_break;
+
+ skbn = container_of(rb_node, struct skb_node, node);
+ skb = bpf_kptr_xchg(&skbn->skb, skb);
+ bpf_obj_drop(skbn);
+
+ if (!skb)
+ goto add_flow_and_break;
+
+ flow->credit -= qdisc_skb_cb(skb)->pkt_len;
+ flow->qlen--;
+
+add_flow_and_break:
+ fq_flows_add_head(head, lock, flow, cnt);
+
+break_loop:
+ ctx->stop_iter = true;
+ return skb;
+}
+
+static struct sk_buff *fq_dequeue_prio(void)
+{
+ struct fq_flow_node *flow = NULL;
+ struct fq_stashed_flow *sflow;
+ struct bpf_rb_node *rb_node;
+ struct sk_buff *skb = NULL;
+ struct skb_node *skbn;
+ u64 hash = 0;
+
+ sflow = bpf_map_lookup_elem(&fq_prio_flows, &hash);
+ if (!sflow)
+ return NULL;
+
+ flow = bpf_kptr_xchg(&sflow->flow, flow);
+ if (!flow)
+ return NULL;
+
+ bpf_spin_lock(&flow->lock);
+ rb_node = bpf_rbtree_first(&flow->queue);
+ if (!rb_node) {
+ bpf_spin_unlock(&flow->lock);
+ goto out;
+ }
+
+ skbn = container_of(rb_node, struct skb_node, node);
+ rb_node = bpf_rbtree_remove(&flow->queue, &skbn->node);
+ bpf_spin_unlock(&flow->lock);
+
+ if (!rb_node)
+ goto out;
+
+ skbn = container_of(rb_node, struct skb_node, node);
+ skb = bpf_kptr_xchg(&skbn->skb, skb);
+ bpf_obj_drop(skbn);
+
+out:
+ bpf_kptr_xchg_back(&sflow->flow, flow);
+
+ return skb;
+}
+
+SEC("struct_ops/bpf_fq_dequeue")
+struct sk_buff *BPF_PROG(bpf_fq_dequeue, struct Qdisc *sch)
+{
+ struct dequeue_nonprio_ctx cb_ctx = {};
+ struct sk_buff *skb = NULL;
+ int i;
+
+ if (!sch->q.qlen)
+ goto out;
+
+ skb = fq_dequeue_prio();
+ if (skb)
+ goto dequeue;
+
+ q.ktime_cache = cb_ctx.now = bpf_ktime_get_ns();
+ fq_check_throttled(q.ktime_cache);
+ bpf_for(i, 0, sch->limit) {
+ skb = fq_dequeue_nonprio_flows(i, &cb_ctx);
+ if (cb_ctx.stop_iter)
+ break;
+ };
+
+ if (skb) {
+dequeue:
+ sch->q.qlen--;
+ sch->qstats.backlog -= qdisc_pkt_len(skb);
+ bpf_qdisc_bstats_update(sch, skb);
+ return skb;
+ }
+
+ if (cb_ctx.expire)
+ bpf_qdisc_watchdog_schedule(sch, cb_ctx.expire, q.timer_slack);
+out:
+ return NULL;
+}
+
+static int fq_remove_flows_in_list(u32 index, void *ctx)
+{
+ struct bpf_list_node *node;
+ struct fq_flow_node *flow;
+
+ bpf_spin_lock(&fq_new_flows_lock);
+ node = bpf_list_pop_front(&fq_new_flows);
+ bpf_spin_unlock(&fq_new_flows_lock);
+ if (!node) {
+ bpf_spin_lock(&fq_old_flows_lock);
+ node = bpf_list_pop_front(&fq_old_flows);
+ bpf_spin_unlock(&fq_old_flows_lock);
+ if (!node)
+ return 1;
+ }
+
+ flow = container_of(node, struct fq_flow_node, list_node);
+ bpf_obj_drop(flow);
+
+ return 0;
+}
+
+extern unsigned CONFIG_HZ __kconfig;
+
+/* limit number of collected flows per round */
+#define FQ_GC_MAX 8
+#define FQ_GC_AGE (3*CONFIG_HZ)
+
+static bool fq_gc_candidate(struct fq_flow_node *flow)
+{
+ u64 jiffies = bpf_jiffies64();
+
+ return fq_flow_is_detached(flow) &&
+ ((s64)(jiffies - (flow->age + FQ_GC_AGE)) > 0);
+}
+
+static int
+fq_remove_flows(struct bpf_map *flow_map, u64 *hash,
+ struct fq_stashed_flow *sflow, struct remove_flows_ctx *ctx)
+{
+ if (sflow->flow &&
+ (!ctx->gc_only || fq_gc_candidate(sflow->flow))) {
+ bpf_map_delete_elem(flow_map, hash);
+ ctx->reset_cnt++;
+ }
+
+ return ctx->reset_cnt < ctx->reset_max ? 0 : 1;
+}
+
+static void fq_gc(void)
+{
+ struct remove_flows_ctx cb_ctx = {
+ .gc_only = true,
+ .reset_cnt = 0,
+ .reset_max = FQ_GC_MAX,
+ };
+
+ bpf_for_each_map_elem(&fq_nonprio_flows, fq_remove_flows, &cb_ctx, 0);
+}
+
+SEC("struct_ops/bpf_fq_reset")
+void BPF_PROG(bpf_fq_reset, struct Qdisc *sch)
+{
+ struct unset_throttled_flows_ctx utf_ctx = {
+ .unset_all = true,
+ };
+ struct remove_flows_ctx rf_ctx = {
+ .gc_only = false,
+ .reset_cnt = 0,
+ .reset_max = NUM_QUEUE,
+ };
+ struct fq_stashed_flow *sflow;
+ u64 hash = 0;
+
+ sch->q.qlen = 0;
+ sch->qstats.backlog = 0;
+
+ bpf_for_each_map_elem(&fq_nonprio_flows, fq_remove_flows, &rf_ctx, 0);
+
+ rf_ctx.reset_cnt = 0;
+ bpf_for_each_map_elem(&fq_prio_flows, fq_remove_flows, &rf_ctx, 0);
+ fq_new_flow(&fq_prio_flows, &sflow, hash);
+
+ bpf_loop(NUM_QUEUE, fq_remove_flows_in_list, NULL, 0);
+ q.new_flow_cnt = 0;
+ q.old_flow_cnt = 0;
+
+ bpf_loop(NUM_QUEUE, fq_unset_throttled_flows, &utf_ctx, 0);
+}
+
+SEC("struct_ops/bpf_fq_init")
+int BPF_PROG(bpf_fq_init, struct Qdisc *sch, struct nlattr *opt,
+ struct netlink_ext_ack *extack)
+{
+ struct net_device *dev = sch->dev_queue->dev;
+ u32 psched_mtu = dev->mtu + dev->hard_header_len;
+ struct fq_stashed_flow *sflow;
+ u64 hash = 0;
+
+ if (fq_new_flow(&fq_prio_flows, &sflow, hash) < 0)
+ return -ENOMEM;
+
+ sch->limit = 10000;
+ q.initial_quantum = 10 * psched_mtu;
+ q.quantum = 2 * psched_mtu;
+ q.flow_refill_delay = 40;
+ q.flow_plimit = 100;
+ q.horizon = 10ULL * NSEC_PER_SEC;
+ q.horizon_drop = 1;
+ q.orphan_mask = 1024 - 1;
+ q.timer_slack = 10 * NSEC_PER_USEC;
+ q.time_next_delayed_flow = ~0ULL;
+ q.unthrottle_latency_ns = 0ULL;
+ q.new_flow_cnt = 0;
+ q.old_flow_cnt = 0;
+
+ return 0;
+}
+
+SEC("struct_ops")
+void BPF_PROG(bpf_fq_destroy, struct Qdisc *sch)
+{
+}
+
+SEC(".struct_ops")
+struct Qdisc_ops fq = {
+ .enqueue = (void *)bpf_fq_enqueue,
+ .dequeue = (void *)bpf_fq_dequeue,
+ .reset = (void *)bpf_fq_reset,
+ .init = (void *)bpf_fq_init,
+ .destroy = (void *)bpf_fq_destroy,
+ .id = "bpf_fq",
+};
diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
index 659694162739..17db400f0e0d 100644
--- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
+++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h
@@ -128,6 +128,7 @@
#define sk_refcnt __sk_common.skc_refcnt
#define sk_state __sk_common.skc_state
#define sk_net __sk_common.skc_net
+#define sk_rcv_saddr __sk_common.skc_rcv_saddr
#define sk_v6_daddr __sk_common.skc_v6_daddr
#define sk_v6_rcv_saddr __sk_common.skc_v6_rcv_saddr
#define sk_flags __sk_common.skc_flags
diff --git a/tools/testing/selftests/bpf/progs/for_each_hash_modify.c b/tools/testing/selftests/bpf/progs/for_each_hash_modify.c
new file mode 100644
index 000000000000..82307166f789
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/for_each_hash_modify.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2025 Intel Corporation */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+ __uint(type, BPF_MAP_TYPE_HASH);
+ __uint(max_entries, 128);
+ __type(key, __u64);
+ __type(value, __u64);
+} hashmap SEC(".maps");
+
+static int cb(struct bpf_map *map, __u64 *key, __u64 *val, void *arg)
+{
+ bpf_map_delete_elem(map, key);
+ bpf_map_update_elem(map, key, val, 0);
+ return 0;
+}
+
+SEC("tc")
+int test_pkt_access(struct __sk_buff *skb)
+{
+ (void)skb;
+
+ bpf_for_each_map_elem(&hashmap, cb, NULL, 0);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/sock_iter_batch.c b/tools/testing/selftests/bpf/progs/sock_iter_batch.c
index 96531b0d9d55..8f483337e103 100644
--- a/tools/testing/selftests/bpf/progs/sock_iter_batch.c
+++ b/tools/testing/selftests/bpf/progs/sock_iter_batch.c
@@ -17,6 +17,12 @@ static bool ipv6_addr_loopback(const struct in6_addr *a)
a->s6_addr32[2] | (a->s6_addr32[3] ^ bpf_htonl(1))) == 0;
}
+static bool ipv4_addr_loopback(__be32 a)
+{
+ return a == bpf_ntohl(0x7f000001);
+}
+
+volatile const unsigned int sf;
volatile const __u16 ports[2];
unsigned int bucket[2];
@@ -26,16 +32,20 @@ int iter_tcp_soreuse(struct bpf_iter__tcp *ctx)
struct sock *sk = (struct sock *)ctx->sk_common;
struct inet_hashinfo *hinfo;
unsigned int hash;
+ __u64 sock_cookie;
struct net *net;
int idx;
if (!sk)
return 0;
+ sock_cookie = bpf_get_socket_cookie(sk);
sk = bpf_core_cast(sk, struct sock);
- if (sk->sk_family != AF_INET6 ||
+ if (sk->sk_family != sf ||
sk->sk_state != TCP_LISTEN ||
- !ipv6_addr_loopback(&sk->sk_v6_rcv_saddr))
+ sk->sk_family == AF_INET6 ?
+ !ipv6_addr_loopback(&sk->sk_v6_rcv_saddr) :
+ !ipv4_addr_loopback(sk->sk_rcv_saddr))
return 0;
if (sk->sk_num == ports[0])
@@ -52,6 +62,7 @@ int iter_tcp_soreuse(struct bpf_iter__tcp *ctx)
hinfo = net->ipv4.tcp_death_row.hashinfo;
bucket[idx] = hash & hinfo->lhash2_mask;
bpf_seq_write(ctx->meta->seq, &idx, sizeof(idx));
+ bpf_seq_write(ctx->meta->seq, &sock_cookie, sizeof(sock_cookie));
return 0;
}
@@ -63,14 +74,18 @@ int iter_udp_soreuse(struct bpf_iter__udp *ctx)
{
struct sock *sk = (struct sock *)ctx->udp_sk;
struct udp_table *udptable;
+ __u64 sock_cookie;
int idx;
if (!sk)
return 0;
+ sock_cookie = bpf_get_socket_cookie(sk);
sk = bpf_core_cast(sk, struct sock);
- if (sk->sk_family != AF_INET6 ||
- !ipv6_addr_loopback(&sk->sk_v6_rcv_saddr))
+ if (sk->sk_family != sf ||
+ sk->sk_family == AF_INET6 ?
+ !ipv6_addr_loopback(&sk->sk_v6_rcv_saddr) :
+ !ipv4_addr_loopback(sk->sk_rcv_saddr))
return 0;
if (sk->sk_num == ports[0])
@@ -84,6 +99,7 @@ int iter_udp_soreuse(struct bpf_iter__udp *ctx)
udptable = sk->sk_net.net->ipv4.udp_table;
bucket[idx] = udp_sk(sk)->udp_portaddr_hash & udptable->mask;
bpf_seq_write(ctx->meta->seq, &idx, sizeof(idx));
+ bpf_seq_write(ctx->meta->seq, &sock_cookie, sizeof(sock_cookie));
return 0;
}
diff --git a/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c b/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c
index ccde6a4c6319..683306db8594 100644
--- a/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c
+++ b/tools/testing/selftests/bpf/progs/xsk_xdp_progs.c
@@ -4,6 +4,8 @@
#include <linux/bpf.h>
#include <bpf/bpf_helpers.h>
#include <linux/if_ether.h>
+#include <linux/ip.h>
+#include <linux/errno.h>
#include "xsk_xdp_common.h"
struct {
@@ -14,6 +16,7 @@ struct {
} xsk SEC(".maps");
static unsigned int idx;
+int adjust_value = 0;
int count = 0;
SEC("xdp.frags") int xsk_def_prog(struct xdp_md *xdp)
@@ -70,4 +73,51 @@ SEC("xdp") int xsk_xdp_shared_umem(struct xdp_md *xdp)
return bpf_redirect_map(&xsk, idx, XDP_DROP);
}
+SEC("xdp.frags") int xsk_xdp_adjust_tail(struct xdp_md *xdp)
+{
+ __u32 buff_len, curr_buff_len;
+ int ret;
+
+ buff_len = bpf_xdp_get_buff_len(xdp);
+ if (buff_len == 0)
+ return XDP_DROP;
+
+ ret = bpf_xdp_adjust_tail(xdp, adjust_value);
+ if (ret < 0) {
+ /* Handle unsupported cases */
+ if (ret == -EOPNOTSUPP) {
+ /* Set adjust_value to -EOPNOTSUPP to indicate to userspace that this case
+ * is unsupported
+ */
+ adjust_value = -EOPNOTSUPP;
+ return bpf_redirect_map(&xsk, 0, XDP_DROP);
+ }
+
+ return XDP_DROP;
+ }
+
+ curr_buff_len = bpf_xdp_get_buff_len(xdp);
+ if (curr_buff_len != buff_len + adjust_value)
+ return XDP_DROP;
+
+ if (curr_buff_len > buff_len) {
+ __u32 *pkt_data = (void *)(long)xdp->data;
+ __u32 len, words_to_end, seq_num;
+
+ len = curr_buff_len - PKT_HDR_ALIGN;
+ words_to_end = len / sizeof(*pkt_data) - 1;
+ seq_num = words_to_end;
+
+ /* Convert sequence number to network byte order. Store this in the last 4 bytes of
+ * the packet. Use 'adjust_value' to determine the position at the end of the
+ * packet for storing the sequence number.
+ */
+ seq_num = __constant_htonl(words_to_end);
+ bpf_xdp_store_bytes(xdp, curr_buff_len - sizeof(seq_num), &seq_num,
+ sizeof(seq_num));
+ }
+
+ return bpf_redirect_map(&xsk, 0, XDP_DROP);
+}
+
char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/xsk_xdp_common.h b/tools/testing/selftests/bpf/xsk_xdp_common.h
index 5a6f36f07383..45810ff552da 100644
--- a/tools/testing/selftests/bpf/xsk_xdp_common.h
+++ b/tools/testing/selftests/bpf/xsk_xdp_common.h
@@ -4,6 +4,7 @@
#define XSK_XDP_COMMON_H_
#define MAX_SOCKETS 2
+#define PKT_HDR_ALIGN (sizeof(struct ethhdr) + 2) /* Just to align the data in the packet */
struct xdp_info {
__u64 count;
diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c
index 11f047b8af75..0ced4026ee44 100644
--- a/tools/testing/selftests/bpf/xskxceiver.c
+++ b/tools/testing/selftests/bpf/xskxceiver.c
@@ -524,6 +524,8 @@ static void __test_spec_init(struct test_spec *test, struct ifobject *ifobj_tx,
test->nb_sockets = 1;
test->fail = false;
test->set_ring = false;
+ test->adjust_tail = false;
+ test->adjust_tail_support = false;
test->mtu = MAX_ETH_PKT_SIZE;
test->xdp_prog_rx = ifobj_rx->xdp_progs->progs.xsk_def_prog;
test->xskmap_rx = ifobj_rx->xdp_progs->maps.xsk;
@@ -757,14 +759,15 @@ static struct pkt_stream *pkt_stream_clone(struct pkt_stream *pkt_stream)
return pkt_stream_generate(pkt_stream->nb_pkts, pkt_stream->pkts[0].len);
}
-static void pkt_stream_replace(struct test_spec *test, u32 nb_pkts, u32 pkt_len)
+static void pkt_stream_replace_ifobject(struct ifobject *ifobj, u32 nb_pkts, u32 pkt_len)
{
- struct pkt_stream *pkt_stream;
+ ifobj->xsk->pkt_stream = pkt_stream_generate(nb_pkts, pkt_len);
+}
- pkt_stream = pkt_stream_generate(nb_pkts, pkt_len);
- test->ifobj_tx->xsk->pkt_stream = pkt_stream;
- pkt_stream = pkt_stream_generate(nb_pkts, pkt_len);
- test->ifobj_rx->xsk->pkt_stream = pkt_stream;
+static void pkt_stream_replace(struct test_spec *test, u32 nb_pkts, u32 pkt_len)
+{
+ pkt_stream_replace_ifobject(test->ifobj_tx, nb_pkts, pkt_len);
+ pkt_stream_replace_ifobject(test->ifobj_rx, nb_pkts, pkt_len);
}
static void __pkt_stream_replace_half(struct ifobject *ifobj, u32 pkt_len,
@@ -991,6 +994,31 @@ static bool is_metadata_correct(struct pkt *pkt, void *buffer, u64 addr)
return true;
}
+static bool is_adjust_tail_supported(struct xsk_xdp_progs *skel_rx)
+{
+ struct bpf_map *data_map;
+ int adjust_value = 0;
+ int key = 0;
+ int ret;
+
+ data_map = bpf_object__find_map_by_name(skel_rx->obj, "xsk_xdp_.bss");
+ if (!data_map || !bpf_map__is_internal(data_map)) {
+ ksft_print_msg("Error: could not find bss section of XDP program\n");
+ exit_with_error(errno);
+ }
+
+ ret = bpf_map_lookup_elem(bpf_map__fd(data_map), &key, &adjust_value);
+ if (ret) {
+ ksft_print_msg("Error: bpf_map_lookup_elem failed with error %d\n", ret);
+ exit_with_error(errno);
+ }
+
+ /* Set the 'adjust_value' variable to -EOPNOTSUPP in the XDP program if the adjust_tail
+ * helper is not supported. Skip the adjust_tail test case in this scenario.
+ */
+ return adjust_value != -EOPNOTSUPP;
+}
+
static bool is_frag_valid(struct xsk_umem_info *umem, u64 addr, u32 len, u32 expected_pkt_nb,
u32 bytes_processed)
{
@@ -1767,8 +1795,13 @@ static void *worker_testapp_validate_rx(void *arg)
if (!err && ifobject->validation_func)
err = ifobject->validation_func(ifobject);
- if (err)
- report_failure(test);
+
+ if (err) {
+ if (test->adjust_tail && !is_adjust_tail_supported(ifobject->xdp_progs))
+ test->adjust_tail_support = false;
+ else
+ report_failure(test);
+ }
pthread_exit(NULL);
}
@@ -2515,6 +2548,71 @@ static int testapp_hw_sw_max_ring_size(struct test_spec *test)
return testapp_validate_traffic(test);
}
+static int testapp_xdp_adjust_tail(struct test_spec *test, int adjust_value)
+{
+ struct xsk_xdp_progs *skel_rx = test->ifobj_rx->xdp_progs;
+ struct xsk_xdp_progs *skel_tx = test->ifobj_tx->xdp_progs;
+
+ test_spec_set_xdp_prog(test, skel_rx->progs.xsk_xdp_adjust_tail,
+ skel_tx->progs.xsk_xdp_adjust_tail,
+ skel_rx->maps.xsk, skel_tx->maps.xsk);
+
+ skel_rx->bss->adjust_value = adjust_value;
+
+ return testapp_validate_traffic(test);
+}
+
+static int testapp_adjust_tail(struct test_spec *test, u32 value, u32 pkt_len)
+{
+ int ret;
+
+ test->adjust_tail_support = true;
+ test->adjust_tail = true;
+ test->total_steps = 1;
+
+ pkt_stream_replace_ifobject(test->ifobj_tx, DEFAULT_BATCH_SIZE, pkt_len);
+ pkt_stream_replace_ifobject(test->ifobj_rx, DEFAULT_BATCH_SIZE, pkt_len + value);
+
+ ret = testapp_xdp_adjust_tail(test, value);
+ if (ret)
+ return ret;
+
+ if (!test->adjust_tail_support) {
+ ksft_test_result_skip("%s %sResize pkt with bpf_xdp_adjust_tail() not supported\n",
+ mode_string(test), busy_poll_string(test));
+ return TEST_SKIP;
+ }
+
+ return 0;
+}
+
+static int testapp_adjust_tail_shrink(struct test_spec *test)
+{
+ /* Shrink by 4 bytes for testing purpose */
+ return testapp_adjust_tail(test, -4, MIN_PKT_SIZE * 2);
+}
+
+static int testapp_adjust_tail_shrink_mb(struct test_spec *test)
+{
+ test->mtu = MAX_ETH_JUMBO_SIZE;
+ /* Shrink by the frag size */
+ return testapp_adjust_tail(test, -XSK_UMEM__MAX_FRAME_SIZE, XSK_UMEM__LARGE_FRAME_SIZE * 2);
+}
+
+static int testapp_adjust_tail_grow(struct test_spec *test)
+{
+ /* Grow by 4 bytes for testing purpose */
+ return testapp_adjust_tail(test, 4, MIN_PKT_SIZE * 2);
+}
+
+static int testapp_adjust_tail_grow_mb(struct test_spec *test)
+{
+ test->mtu = MAX_ETH_JUMBO_SIZE;
+ /* Grow by (frag_size - last_frag_Size) - 1 to stay inside the last fragment */
+ return testapp_adjust_tail(test, (XSK_UMEM__MAX_FRAME_SIZE / 2) - 1,
+ XSK_UMEM__LARGE_FRAME_SIZE * 2);
+}
+
static void run_pkt_test(struct test_spec *test)
{
int ret;
@@ -2621,6 +2719,10 @@ static const struct test_spec tests[] = {
{.name = "TOO_MANY_FRAGS", .test_func = testapp_too_many_frags},
{.name = "HW_SW_MIN_RING_SIZE", .test_func = testapp_hw_sw_min_ring_size},
{.name = "HW_SW_MAX_RING_SIZE", .test_func = testapp_hw_sw_max_ring_size},
+ {.name = "XDP_ADJUST_TAIL_SHRINK", .test_func = testapp_adjust_tail_shrink},
+ {.name = "XDP_ADJUST_TAIL_SHRINK_MULTI_BUFF", .test_func = testapp_adjust_tail_shrink_mb},
+ {.name = "XDP_ADJUST_TAIL_GROW", .test_func = testapp_adjust_tail_grow},
+ {.name = "XDP_ADJUST_TAIL_GROW_MULTI_BUFF", .test_func = testapp_adjust_tail_grow_mb},
};
static void print_tests(void)
diff --git a/tools/testing/selftests/bpf/xskxceiver.h b/tools/testing/selftests/bpf/xskxceiver.h
index e46e823f6a1a..67fc44b2813b 100644
--- a/tools/testing/selftests/bpf/xskxceiver.h
+++ b/tools/testing/selftests/bpf/xskxceiver.h
@@ -173,6 +173,8 @@ struct test_spec {
u16 nb_sockets;
bool fail;
bool set_ring;
+ bool adjust_tail;
+ bool adjust_tail_support;
enum test_mode mode;
char name[MAX_TEST_NAME_SIZE];
};
diff --git a/tools/testing/selftests/drivers/net/.gitignore b/tools/testing/selftests/drivers/net/.gitignore
index ec746f374e85..d634d8395d90 100644
--- a/tools/testing/selftests/drivers/net/.gitignore
+++ b/tools/testing/selftests/drivers/net/.gitignore
@@ -1,2 +1,2 @@
# SPDX-License-Identifier: GPL-2.0-only
-xdp_helper
+napi_id_helper
diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile
index 0c95bd944d56..17db31aa58c9 100644
--- a/tools/testing/selftests/drivers/net/Makefile
+++ b/tools/testing/selftests/drivers/net/Makefile
@@ -6,9 +6,12 @@ TEST_INCLUDES := $(wildcard lib/py/*.py) \
../../net/net_helper.sh \
../../net/lib.sh \
-TEST_GEN_FILES := xdp_helper
+TEST_GEN_FILES := \
+ napi_id_helper \
+# end of TEST_GEN_FILES
TEST_PROGS := \
+ napi_id.py \
netcons_basic.sh \
netcons_fragmented_msg.sh \
netcons_overflow.sh \
diff --git a/tools/testing/selftests/drivers/net/dsa/tc_taprio.sh b/tools/testing/selftests/drivers/net/dsa/tc_taprio.sh
new file mode 120000
index 000000000000..d16a65e7595d
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/dsa/tc_taprio.sh
@@ -0,0 +1 @@
+run_net_forwarding_test.sh \ No newline at end of file
diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile
index 07cddb19ba35..5447785c286e 100644
--- a/tools/testing/selftests/drivers/net/hw/Makefile
+++ b/tools/testing/selftests/drivers/net/hw/Makefile
@@ -21,6 +21,7 @@ TEST_PROGS = \
rss_ctx.py \
rss_input_xfrm.py \
tso.py \
+ xsk_reconfig.py \
#
TEST_FILES := \
diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.c b/tools/testing/selftests/drivers/net/hw/iou-zcrx.c
index c26b4180eddd..62456df947bc 100644
--- a/tools/testing/selftests/drivers/net/hw/iou-zcrx.c
+++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.c
@@ -37,8 +37,8 @@
#include <liburing.h>
-#define PAGE_SIZE (4096)
-#define AREA_SIZE (8192 * PAGE_SIZE)
+static long page_size;
+#define AREA_SIZE (8192 * page_size)
#define SEND_SIZE (512 * 4096)
#define min(a, b) \
({ \
@@ -66,7 +66,7 @@ static int cfg_oneshot_recvs;
static int cfg_send_size = SEND_SIZE;
static struct sockaddr_in6 cfg_addr;
-static char payload[SEND_SIZE] __attribute__((aligned(PAGE_SIZE)));
+static char *payload;
static void *area_ptr;
static void *ring_ptr;
static size_t ring_size;
@@ -114,8 +114,8 @@ static inline size_t get_refill_ring_size(unsigned int rq_entries)
ring_size = rq_entries * sizeof(struct io_uring_zcrx_rqe);
/* add space for the header (head/tail/etc.) */
- ring_size += PAGE_SIZE;
- return ALIGN_UP(ring_size, 4096);
+ ring_size += page_size;
+ return ALIGN_UP(ring_size, page_size);
}
static void setup_zcrx(struct io_uring *ring)
@@ -219,7 +219,7 @@ static void process_accept(struct io_uring *ring, struct io_uring_cqe *cqe)
connfd = cqe->res;
if (cfg_oneshot)
- add_recvzc_oneshot(ring, connfd, PAGE_SIZE);
+ add_recvzc_oneshot(ring, connfd, page_size);
else
add_recvzc(ring, connfd);
}
@@ -245,7 +245,7 @@ static void process_recvzc(struct io_uring *ring, struct io_uring_cqe *cqe)
if (cfg_oneshot) {
if (cqe->res == 0 && cqe->flags == 0 && cfg_oneshot_recvs) {
- add_recvzc_oneshot(ring, connfd, PAGE_SIZE);
+ add_recvzc_oneshot(ring, connfd, page_size);
cfg_oneshot_recvs--;
}
} else if (!(cqe->flags & IORING_CQE_F_MORE)) {
@@ -260,7 +260,7 @@ static void process_recvzc(struct io_uring *ring, struct io_uring_cqe *cqe)
for (i = 0; i < n; i++) {
if (*(data + i) != payload[(received + i)])
- error(1, 0, "payload mismatch at ", i);
+ error(1, 0, "payload mismatch at %d", i);
}
received += n;
@@ -354,7 +354,7 @@ static void run_client(void)
chunk = min_t(ssize_t, cfg_payload_len, to_send);
res = send(fd, src, chunk, 0);
if (res < 0)
- error(1, 0, "send(): %d", sent);
+ error(1, 0, "send(): %zd", sent);
sent += res;
to_send -= res;
}
@@ -370,7 +370,7 @@ static void usage(const char *filepath)
static void parse_opts(int argc, char **argv)
{
- const int max_payload_len = sizeof(payload) -
+ const int max_payload_len = SEND_SIZE -
sizeof(struct ipv6hdr) -
sizeof(struct tcphdr) -
40 /* max tcp options */;
@@ -443,6 +443,13 @@ int main(int argc, char **argv)
const char *cfg_test = argv[argc - 1];
int i;
+ page_size = sysconf(_SC_PAGESIZE);
+ if (page_size < 0)
+ return 1;
+
+ if (posix_memalign((void **)&payload, page_size, SEND_SIZE))
+ return 1;
+
parse_opts(argc, argv);
for (i = 0; i < SEND_SIZE; i++)
diff --git a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py
index 6a0378e06cab..9c03fd777f3d 100755
--- a/tools/testing/selftests/drivers/net/hw/iou-zcrx.py
+++ b/tools/testing/selftests/drivers/net/hw/iou-zcrx.py
@@ -5,13 +5,12 @@ import re
from os import path
from lib.py import ksft_run, ksft_exit
from lib.py import NetDrvEpEnv
-from lib.py import bkg, cmd, ethtool, wait_port_listen
+from lib.py import bkg, cmd, defer, ethtool, rand_port, wait_port_listen
-def _get_rx_ring_entries(cfg):
- output = ethtool(f"-g {cfg.ifname}", host=cfg.remote).stdout
- values = re.findall(r'RX:\s+(\d+)', output)
- return int(values[1])
+def _get_current_settings(cfg):
+ output = ethtool(f"-g {cfg.ifname}", json=True, host=cfg.remote)[0]
+ return (output['rx'], output['hds-thresh'])
def _get_combined_channels(cfg):
@@ -20,8 +19,21 @@ def _get_combined_channels(cfg):
return int(values[1])
-def _set_flow_rule(cfg, chan):
- output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port 9999 action {chan}", host=cfg.remote).stdout
+def _create_rss_ctx(cfg, chan):
+ output = ethtool(f"-X {cfg.ifname} context new start {chan} equal 1", host=cfg.remote).stdout
+ values = re.search(r'New RSS context is (\d+)', output).group(1)
+ ctx_id = int(values)
+ return (ctx_id, defer(ethtool, f"-X {cfg.ifname} delete context {ctx_id}", host=cfg.remote))
+
+
+def _set_flow_rule(cfg, port, chan):
+ output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} action {chan}", host=cfg.remote).stdout
+ values = re.search(r'ID (\d+)', output).group(1)
+ return int(values)
+
+
+def _set_flow_rule_rss(cfg, port, ctx_id):
+ output = ethtool(f"-N {cfg.ifname} flow-type tcp6 dst-port {port} context {ctx_id}", host=cfg.remote).stdout
values = re.search(r'ID (\d+)', output).group(1)
return int(values)
@@ -32,24 +44,29 @@ def test_zcrx(cfg) -> None:
combined_chans = _get_combined_channels(cfg)
if combined_chans < 2:
raise KsftSkipEx('at least 2 combined channels required')
- rx_ring = _get_rx_ring_entries(cfg)
-
- try:
- ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote)
- ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote)
- ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote)
- flow_rule_id = _set_flow_rule(cfg, combined_chans - 1)
-
- rx_cmd = f"{cfg.bin_remote} -s -p 9999 -i {cfg.ifname} -q {combined_chans - 1}"
- tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p 9999 -l 12840"
- with bkg(rx_cmd, host=cfg.remote, exit_wait=True):
- wait_port_listen(9999, proto="tcp", host=cfg.remote)
- cmd(tx_cmd)
- finally:
- ethtool(f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote)
- ethtool(f"-X {cfg.ifname} default", host=cfg.remote)
- ethtool(f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote)
- ethtool(f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote)
+ (rx_ring, hds_thresh) = _get_current_settings(cfg)
+ port = rand_port()
+
+ ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote)
+ defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote)
+
+ ethtool(f"-G {cfg.ifname} hds-thresh 0", host=cfg.remote)
+ defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}", host=cfg.remote)
+
+ ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote)
+ defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote)
+
+ ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote)
+ defer(ethtool, f"-X {cfg.ifname} default", host=cfg.remote)
+
+ flow_rule_id = _set_flow_rule(cfg, port, combined_chans - 1)
+ defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote)
+
+ rx_cmd = f"{cfg.bin_remote} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}"
+ tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p {port} -l 12840"
+ with bkg(rx_cmd, host=cfg.remote, exit_wait=True):
+ wait_port_listen(port, proto="tcp", host=cfg.remote)
+ cmd(tx_cmd)
def test_zcrx_oneshot(cfg) -> None:
@@ -58,24 +75,61 @@ def test_zcrx_oneshot(cfg) -> None:
combined_chans = _get_combined_channels(cfg)
if combined_chans < 2:
raise KsftSkipEx('at least 2 combined channels required')
- rx_ring = _get_rx_ring_entries(cfg)
-
- try:
- ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote)
- ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote)
- ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote)
- flow_rule_id = _set_flow_rule(cfg, combined_chans - 1)
-
- rx_cmd = f"{cfg.bin_remote} -s -p 9999 -i {cfg.ifname} -q {combined_chans - 1} -o 4"
- tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p 9999 -l 4096 -z 16384"
- with bkg(rx_cmd, host=cfg.remote, exit_wait=True):
- wait_port_listen(9999, proto="tcp", host=cfg.remote)
- cmd(tx_cmd)
- finally:
- ethtool(f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote)
- ethtool(f"-X {cfg.ifname} default", host=cfg.remote)
- ethtool(f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote)
- ethtool(f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote)
+ (rx_ring, hds_thresh) = _get_current_settings(cfg)
+ port = rand_port()
+
+ ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote)
+ defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote)
+
+ ethtool(f"-G {cfg.ifname} hds-thresh 0", host=cfg.remote)
+ defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}", host=cfg.remote)
+
+ ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote)
+ defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote)
+
+ ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote)
+ defer(ethtool, f"-X {cfg.ifname} default", host=cfg.remote)
+
+ flow_rule_id = _set_flow_rule(cfg, port, combined_chans - 1)
+ defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote)
+
+ rx_cmd = f"{cfg.bin_remote} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1} -o 4"
+ tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p {port} -l 4096 -z 16384"
+ with bkg(rx_cmd, host=cfg.remote, exit_wait=True):
+ wait_port_listen(port, proto="tcp", host=cfg.remote)
+ cmd(tx_cmd)
+
+
+def test_zcrx_rss(cfg) -> None:
+ cfg.require_ipver('6')
+
+ combined_chans = _get_combined_channels(cfg)
+ if combined_chans < 2:
+ raise KsftSkipEx('at least 2 combined channels required')
+ (rx_ring, hds_thresh) = _get_current_settings(cfg)
+ port = rand_port()
+
+ ethtool(f"-G {cfg.ifname} tcp-data-split on", host=cfg.remote)
+ defer(ethtool, f"-G {cfg.ifname} tcp-data-split auto", host=cfg.remote)
+
+ ethtool(f"-G {cfg.ifname} hds-thresh 0", host=cfg.remote)
+ defer(ethtool, f"-G {cfg.ifname} hds-thresh {hds_thresh}", host=cfg.remote)
+
+ ethtool(f"-G {cfg.ifname} rx 64", host=cfg.remote)
+ defer(ethtool, f"-G {cfg.ifname} rx {rx_ring}", host=cfg.remote)
+
+ ethtool(f"-X {cfg.ifname} equal {combined_chans - 1}", host=cfg.remote)
+ defer(ethtool, f"-X {cfg.ifname} default", host=cfg.remote)
+
+ (ctx_id, delete_ctx) = _create_rss_ctx(cfg, combined_chans - 1)
+ flow_rule_id = _set_flow_rule_rss(cfg, port, ctx_id)
+ defer(ethtool, f"-N {cfg.ifname} delete {flow_rule_id}", host=cfg.remote)
+
+ rx_cmd = f"{cfg.bin_remote} -s -p {port} -i {cfg.ifname} -q {combined_chans - 1}"
+ tx_cmd = f"{cfg.bin_local} -c -h {cfg.remote_addr_v['6']} -p {port} -l 12840"
+ with bkg(rx_cmd, host=cfg.remote, exit_wait=True):
+ wait_port_listen(port, proto="tcp", host=cfg.remote)
+ cmd(tx_cmd)
def main() -> None:
diff --git a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py
index 53bb08cc29ec..f439c434ba36 100755
--- a/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py
+++ b/tools/testing/selftests/drivers/net/hw/rss_input_xfrm.py
@@ -32,6 +32,11 @@ def test_rss_input_xfrm(cfg, ipver):
if multiprocessing.cpu_count() < 2:
raise KsftSkipEx("Need at least two CPUs to test symmetric RSS hash")
+ cfg.require_cmd("socat", remote=True)
+
+ if not hasattr(socket, "SO_INCOMING_CPU"):
+ raise KsftSkipEx("socket.SO_INCOMING_CPU was added in Python 3.11")
+
input_xfrm = cfg.ethnl.rss_get(
{'header': {'dev-name': cfg.ifname}}).get('input_xfrm')
diff --git a/tools/testing/selftests/drivers/net/hw/xsk_reconfig.py b/tools/testing/selftests/drivers/net/hw/xsk_reconfig.py
new file mode 100755
index 000000000000..d19d1d518208
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hw/xsk_reconfig.py
@@ -0,0 +1,60 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+# This is intended to be run on a virtio-net guest interface.
+# The test binds the XDP socket to the interface without setting
+# the fill ring to trigger delayed refill_work. This helps to
+# make it easier to reproduce the deadlock when XDP program,
+# XDP socket bind/unbind, rx ring resize race with refill_work on
+# the buggy kernel.
+#
+# The Qemu command to setup virtio-net
+# -netdev tap,id=hostnet1,vhost=on,script=no,downscript=no
+# -device virtio-net-pci,netdev=hostnet1,iommu_platform=on,disable-legacy=on
+
+from lib.py import ksft_exit, ksft_run
+from lib.py import KsftSkipEx, KsftFailEx
+from lib.py import NetDrvEnv
+from lib.py import bkg, ip, cmd, ethtool
+import time
+
+def _get_rx_ring_entries(cfg):
+ output = ethtool(f"-g {cfg.ifname}", json=True)
+ return output[0]["rx"]
+
+def setup_xsk(cfg, xdp_queue_id = 0) -> bkg:
+ # Probe for support
+ xdp = cmd(f'{cfg.net_lib_dir / "xdp_helper"} - -', fail=False)
+ if xdp.ret == 255:
+ raise KsftSkipEx('AF_XDP unsupported')
+ elif xdp.ret > 0:
+ raise KsftFailEx('unable to create AF_XDP socket')
+
+ try:
+ return bkg(f'{cfg.net_lib_dir / "xdp_helper"} {cfg.ifindex} ' \
+ '{xdp_queue_id} -z', ksft_wait=3)
+ except:
+ raise KsftSkipEx('Failed to bind XDP socket in zerocopy.\n' \
+ 'Please consider adding iommu_platform=on ' \
+ 'when setting up virtio-net-pci')
+
+def check_xdp_bind(cfg):
+ with setup_xsk(cfg):
+ ip(f"link set dev %s xdp obj %s sec xdp" %
+ (cfg.ifname, cfg.net_lib_dir / "xdp_dummy.bpf.o"))
+ ip(f"link set dev %s xdp off" % cfg.ifname)
+
+def check_rx_resize(cfg):
+ with setup_xsk(cfg):
+ rx_ring = _get_rx_ring_entries(cfg)
+ ethtool(f"-G %s rx %d" % (cfg.ifname, rx_ring // 2))
+ ethtool(f"-G %s rx %d" % (cfg.ifname, rx_ring))
+
+def main():
+ with NetDrvEnv(__file__, nsim_test=False) as cfg:
+ ksft_run([check_xdp_bind, check_rx_resize],
+ args=(cfg, ))
+ ksft_exit()
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/napi_id.py b/tools/testing/selftests/drivers/net/napi_id.py
new file mode 100755
index 000000000000..356bac46ba04
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/napi_id.py
@@ -0,0 +1,23 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+from lib.py import ksft_run, ksft_exit
+from lib.py import ksft_eq, NetDrvEpEnv
+from lib.py import bkg, cmd, rand_port, NetNSEnter
+
+def test_napi_id(cfg) -> None:
+ port = rand_port()
+ listen_cmd = f"{cfg.test_dir}/napi_id_helper {cfg.addr_v['4']} {port}"
+
+ with bkg(listen_cmd, ksft_wait=3) as server:
+ cmd(f"echo a | socat - TCP:{cfg.addr_v['4']}:{port}", host=cfg.remote, shell=True)
+
+ ksft_eq(0, server.ret)
+
+def main() -> None:
+ with NetDrvEpEnv(__file__) as cfg:
+ ksft_run([test_napi_id], args=(cfg,))
+ ksft_exit()
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/napi_id_helper.c b/tools/testing/selftests/drivers/net/napi_id_helper.c
new file mode 100644
index 000000000000..eecd610c2109
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/napi_id_helper.c
@@ -0,0 +1,83 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <errno.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <sys/socket.h>
+
+#include "../../net/lib/ksft.h"
+
+int main(int argc, char *argv[])
+{
+ struct sockaddr_in address;
+ unsigned int napi_id;
+ unsigned int port;
+ socklen_t optlen;
+ char buf[1024];
+ int opt = 1;
+ int server;
+ int client;
+ int ret;
+
+ server = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP);
+ if (server < 0) {
+ perror("socket creation failed");
+ if (errno == EAFNOSUPPORT)
+ return -1;
+ return 1;
+ }
+
+ port = atoi(argv[2]);
+
+ if (setsockopt(server, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt))) {
+ perror("setsockopt");
+ return 1;
+ }
+
+ address.sin_family = AF_INET;
+ inet_pton(AF_INET, argv[1], &address.sin_addr);
+ address.sin_port = htons(port);
+
+ if (bind(server, (struct sockaddr *)&address, sizeof(address)) < 0) {
+ perror("bind failed");
+ return 1;
+ }
+
+ if (listen(server, 1) < 0) {
+ perror("listen");
+ return 1;
+ }
+
+ ksft_ready();
+
+ client = accept(server, NULL, 0);
+ if (client < 0) {
+ perror("accept");
+ return 1;
+ }
+
+ optlen = sizeof(napi_id);
+ ret = getsockopt(client, SOL_SOCKET, SO_INCOMING_NAPI_ID, &napi_id,
+ &optlen);
+ if (ret != 0) {
+ perror("getsockopt");
+ return 1;
+ }
+
+ read(client, buf, 1024);
+
+ ksft_wait();
+
+ if (napi_id == 0) {
+ fprintf(stderr, "napi ID is 0\n");
+ return 1;
+ }
+
+ close(client);
+ close(server);
+
+ return 0;
+}
diff --git a/tools/testing/selftests/drivers/net/ocelot/psfp.sh b/tools/testing/selftests/drivers/net/ocelot/psfp.sh
index bed748dde4b0..8972f42dfe03 100755
--- a/tools/testing/selftests/drivers/net/ocelot/psfp.sh
+++ b/tools/testing/selftests/drivers/net/ocelot/psfp.sh
@@ -266,18 +266,14 @@ run_test()
"${base_time}" \
"${CYCLE_TIME_NS}" \
"${SHIFT_TIME_NS}" \
+ "${GATE_DURATION_NS}" \
"${NUM_PKTS}" \
"${STREAM_VID}" \
"${STREAM_PRIO}" \
"" \
"${isochron_dat}"
- # Count all received packets by looking at the non-zero RX timestamps
- received=$(isochron report \
- --input-file "${isochron_dat}" \
- --printf-format "%u\n" --printf-args "R" | \
- grep -w -v '0' | wc -l)
-
+ received=$(isochron_report_num_received "${isochron_dat}")
if [ "${received}" = "${expected}" ]; then
RET=0
else
diff --git a/tools/testing/selftests/drivers/net/queues.py b/tools/testing/selftests/drivers/net/queues.py
index 06abd3f233e1..236005290a33 100755
--- a/tools/testing/selftests/drivers/net/queues.py
+++ b/tools/testing/selftests/drivers/net/queues.py
@@ -26,13 +26,13 @@ def nl_get_queues(cfg, nl, qtype='rx'):
def check_xsk(cfg, nl, xdp_queue_id=0) -> None:
# Probe for support
- xdp = cmd(f'{cfg.test_dir / "xdp_helper"} - -', fail=False)
+ xdp = cmd(f'{cfg.net_lib_dir / "xdp_helper"} - -', fail=False)
if xdp.ret == 255:
raise KsftSkipEx('AF_XDP unsupported')
elif xdp.ret > 0:
raise KsftFailEx('unable to create AF_XDP socket')
- with bkg(f'{cfg.test_dir / "xdp_helper"} {cfg.ifindex} {xdp_queue_id}',
+ with bkg(f'{cfg.net_lib_dir / "xdp_helper"} {cfg.ifindex} {xdp_queue_id}',
ksft_wait=3):
rx = tx = False
diff --git a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c
index 4a2d5c454fd1..59a71f22fb11 100644
--- a/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c
+++ b/tools/testing/selftests/filesystems/mount-notify/mount-notify_test.c
@@ -48,8 +48,16 @@ static uint64_t get_mnt_id(struct __test_metadata *const _metadata,
static const char root_mntpoint_templ[] = "/tmp/mount-notify_test_root.XXXXXX";
+static const int mark_cmds[] = {
+ FAN_MARK_ADD,
+ FAN_MARK_REMOVE,
+ FAN_MARK_FLUSH
+};
+
+#define NUM_FAN_FDS ARRAY_SIZE(mark_cmds)
+
FIXTURE(fanotify) {
- int fan_fd;
+ int fan_fd[NUM_FAN_FDS];
char buf[256];
unsigned int rem;
void *next;
@@ -61,7 +69,7 @@ FIXTURE(fanotify) {
FIXTURE_SETUP(fanotify)
{
- int ret;
+ int i, ret;
ASSERT_EQ(unshare(CLONE_NEWNS), 0);
@@ -89,20 +97,34 @@ FIXTURE_SETUP(fanotify)
self->root_id = get_mnt_id(_metadata, "/");
ASSERT_NE(self->root_id, 0);
- self->fan_fd = fanotify_init(FAN_REPORT_MNT, 0);
- ASSERT_GE(self->fan_fd, 0);
-
- ret = fanotify_mark(self->fan_fd, FAN_MARK_ADD | FAN_MARK_MNTNS,
- FAN_MNT_ATTACH | FAN_MNT_DETACH, self->ns_fd, NULL);
- ASSERT_EQ(ret, 0);
+ for (i = 0; i < NUM_FAN_FDS; i++) {
+ self->fan_fd[i] = fanotify_init(FAN_REPORT_MNT | FAN_NONBLOCK,
+ 0);
+ ASSERT_GE(self->fan_fd[i], 0);
+ ret = fanotify_mark(self->fan_fd[i], FAN_MARK_ADD |
+ FAN_MARK_MNTNS,
+ FAN_MNT_ATTACH | FAN_MNT_DETACH,
+ self->ns_fd, NULL);
+ ASSERT_EQ(ret, 0);
+ // On fd[0] we do an extra ADD that changes nothing.
+ // On fd[1]/fd[2] we REMOVE/FLUSH which removes the mark.
+ ret = fanotify_mark(self->fan_fd[i], mark_cmds[i] |
+ FAN_MARK_MNTNS,
+ FAN_MNT_ATTACH | FAN_MNT_DETACH,
+ self->ns_fd, NULL);
+ ASSERT_EQ(ret, 0);
+ }
self->rem = 0;
}
FIXTURE_TEARDOWN(fanotify)
{
+ int i;
+
ASSERT_EQ(self->rem, 0);
- close(self->fan_fd);
+ for (i = 0; i < NUM_FAN_FDS; i++)
+ close(self->fan_fd[i]);
ASSERT_EQ(fchdir(self->orig_root), 0);
@@ -123,8 +145,21 @@ static uint64_t expect_notify(struct __test_metadata *const _metadata,
unsigned int thislen;
if (!self->rem) {
- ssize_t len = read(self->fan_fd, self->buf, sizeof(self->buf));
- ASSERT_GT(len, 0);
+ ssize_t len;
+ int i;
+
+ for (i = NUM_FAN_FDS - 1; i >= 0; i--) {
+ len = read(self->fan_fd[i], self->buf,
+ sizeof(self->buf));
+ if (i > 0) {
+ // Groups 1,2 should get EAGAIN
+ ASSERT_EQ(len, -1);
+ ASSERT_EQ(errno, EAGAIN);
+ } else {
+ // Group 0 should get events
+ ASSERT_GT(len, 0);
+ }
+ }
self->rem = len;
self->next = (void *) self->buf;
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/dynevent_limitations.tc b/tools/testing/selftests/ftrace/test.d/dynevent/dynevent_limitations.tc
index 6b94b678741a..f656bccb1a14 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/dynevent_limitations.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/dynevent_limitations.tc
@@ -7,11 +7,32 @@
MAX_ARGS=128
EXCEED_ARGS=$((MAX_ARGS + 1))
+# bash and dash evaluate variables differently.
+# dash will evaluate '\\' every time it is read whereas bash does not.
+#
+# TEST_STRING="$TEST_STRING \\$i"
+# echo $TEST_STRING
+#
+# With i=123
+# On bash, that will print "\123"
+# but on dash, that will print the escape sequence of \123 as the \ will
+# be interpreted again in the echo.
+#
+# Set a variable "bs" to save a double backslash, then echo that
+# to "ts" to see if $ts changed or not. If it changed, it's dash,
+# if not, it's bash, and then bs can equal a single backslash.
+bs='\\'
+ts=`echo $bs`
+if [ "$ts" = '\\' ]; then
+ # this is bash
+ bs='\'
+fi
+
check_max_args() { # event_header
TEST_STRING=$1
# Acceptable
for i in `seq 1 $MAX_ARGS`; do
- TEST_STRING="$TEST_STRING \\$i"
+ TEST_STRING="$TEST_STRING $bs$i"
done
echo "$TEST_STRING" >> dynamic_events
echo > dynamic_events
diff --git a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc
index 118247b8dd84..c62165fabd0c 100644
--- a/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc
+++ b/tools/testing/selftests/ftrace/test.d/filter/event-filter-function.tc
@@ -80,6 +80,26 @@ if [ $misscnt -gt 0 ]; then
exit_fail
fi
+# Check strings too
+if [ -f events/syscalls/sys_enter_openat/filter ]; then
+ DIRNAME=`basename $TMPDIR`
+ echo "filename.ustring ~ \"*$DIRNAME*\"" > events/syscalls/sys_enter_openat/filter
+ echo 1 > events/syscalls/sys_enter_openat/enable
+ echo 1 > tracing_on
+ ls /bin/sh
+ nocnt=`grep openat trace | wc -l`
+ ls $TMPDIR
+ echo 0 > tracing_on
+ hitcnt=`grep openat trace | wc -l`;
+ echo 0 > events/syscalls/sys_enter_openat/enable
+ if [ $nocnt -gt 0 ]; then
+ exit_fail
+ fi
+ if [ $hitcnt -eq 0 ]; then
+ exit_fail
+ fi
+fi
+
reset_events_filter
exit 0
diff --git a/tools/testing/selftests/landlock/audit.h b/tools/testing/selftests/landlock/audit.h
index b9054086a0c9..18a6014920b5 100644
--- a/tools/testing/selftests/landlock/audit.h
+++ b/tools/testing/selftests/landlock/audit.h
@@ -300,15 +300,22 @@ out:
return err;
}
-static int __maybe_unused matches_log_domain_allocated(int audit_fd,
+static int __maybe_unused matches_log_domain_allocated(int audit_fd, pid_t pid,
__u64 *domain_id)
{
- return audit_match_record(
- audit_fd, AUDIT_LANDLOCK_DOMAIN,
- REGEX_LANDLOCK_PREFIX
- " status=allocated mode=enforcing pid=[0-9]\\+ uid=[0-9]\\+"
- " exe=\"[^\"]\\+\" comm=\".*_test\"$",
- domain_id);
+ static const char log_template[] = REGEX_LANDLOCK_PREFIX
+ " status=allocated mode=enforcing pid=%d uid=[0-9]\\+"
+ " exe=\"[^\"]\\+\" comm=\".*_test\"$";
+ char log_match[sizeof(log_template) + 10];
+ int log_match_len;
+
+ log_match_len =
+ snprintf(log_match, sizeof(log_match), log_template, pid);
+ if (log_match_len > sizeof(log_match))
+ return -E2BIG;
+
+ return audit_match_record(audit_fd, AUDIT_LANDLOCK_DOMAIN, log_match,
+ domain_id);
}
static int __maybe_unused matches_log_domain_deallocated(
diff --git a/tools/testing/selftests/landlock/audit_test.c b/tools/testing/selftests/landlock/audit_test.c
index a0643070c403..cfc571afd0eb 100644
--- a/tools/testing/selftests/landlock/audit_test.c
+++ b/tools/testing/selftests/landlock/audit_test.c
@@ -9,6 +9,7 @@
#include <errno.h>
#include <limits.h>
#include <linux/landlock.h>
+#include <pthread.h>
#include <stdlib.h>
#include <sys/mount.h>
#include <sys/prctl.h>
@@ -40,7 +41,6 @@ FIXTURE(audit)
{
struct audit_filter audit_filter;
int audit_fd;
- __u64(*domain_stack)[16];
};
FIXTURE_SETUP(audit)
@@ -60,18 +60,10 @@ FIXTURE_SETUP(audit)
TH_LOG("Failed to initialize audit: %s", error_msg);
}
clear_cap(_metadata, CAP_AUDIT_CONTROL);
-
- self->domain_stack = mmap(NULL, sizeof(*self->domain_stack),
- PROT_READ | PROT_WRITE,
- MAP_SHARED | MAP_ANONYMOUS, -1, 0);
- ASSERT_NE(MAP_FAILED, self->domain_stack);
- memset(self->domain_stack, 0, sizeof(*self->domain_stack));
}
FIXTURE_TEARDOWN(audit)
{
- EXPECT_EQ(0, munmap(self->domain_stack, sizeof(*self->domain_stack)));
-
set_cap(_metadata, CAP_AUDIT_CONTROL);
EXPECT_EQ(0, audit_cleanup(self->audit_fd, &self->audit_filter));
clear_cap(_metadata, CAP_AUDIT_CONTROL);
@@ -83,9 +75,15 @@ TEST_F(audit, layers)
.scoped = LANDLOCK_SCOPE_SIGNAL,
};
int status, ruleset_fd, i;
+ __u64(*domain_stack)[16];
__u64 prev_dom = 3;
pid_t child;
+ domain_stack = mmap(NULL, sizeof(*domain_stack), PROT_READ | PROT_WRITE,
+ MAP_SHARED | MAP_ANONYMOUS, -1, 0);
+ ASSERT_NE(MAP_FAILED, domain_stack);
+ memset(domain_stack, 0, sizeof(*domain_stack));
+
ruleset_fd =
landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
ASSERT_LE(0, ruleset_fd);
@@ -94,7 +92,7 @@ TEST_F(audit, layers)
child = fork();
ASSERT_LE(0, child);
if (child == 0) {
- for (i = 0; i < ARRAY_SIZE(*self->domain_stack); i++) {
+ for (i = 0; i < ARRAY_SIZE(*domain_stack); i++) {
__u64 denial_dom = 1;
__u64 allocated_dom = 2;
@@ -107,7 +105,8 @@ TEST_F(audit, layers)
matches_log_signal(_metadata, self->audit_fd,
getppid(), &denial_dom));
EXPECT_EQ(0, matches_log_domain_allocated(
- self->audit_fd, &allocated_dom));
+ self->audit_fd, getpid(),
+ &allocated_dom));
EXPECT_NE(denial_dom, 1);
EXPECT_NE(denial_dom, 0);
EXPECT_EQ(denial_dom, allocated_dom);
@@ -115,7 +114,7 @@ TEST_F(audit, layers)
/* Checks that the new domain is younger than the previous one. */
EXPECT_GT(allocated_dom, prev_dom);
prev_dom = allocated_dom;
- (*self->domain_stack)[i] = allocated_dom;
+ (*domain_stack)[i] = allocated_dom;
}
/* Checks that we reached the maximum number of layers. */
@@ -142,23 +141,143 @@ TEST_F(audit, layers)
/* Purges log from deallocated domains. */
EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO,
&audit_tv_dom_drop, sizeof(audit_tv_dom_drop)));
- for (i = ARRAY_SIZE(*self->domain_stack) - 1; i >= 0; i--) {
+ for (i = ARRAY_SIZE(*domain_stack) - 1; i >= 0; i--) {
__u64 deallocated_dom = 2;
EXPECT_EQ(0, matches_log_domain_deallocated(self->audit_fd, 1,
&deallocated_dom));
- EXPECT_EQ((*self->domain_stack)[i], deallocated_dom)
+ EXPECT_EQ((*domain_stack)[i], deallocated_dom)
{
TH_LOG("Failed to match domain %llx (#%d)",
- (*self->domain_stack)[i], i);
+ (*domain_stack)[i], i);
}
}
+ EXPECT_EQ(0, munmap(domain_stack, sizeof(*domain_stack)));
EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO,
&audit_tv_default, sizeof(audit_tv_default)));
-
EXPECT_EQ(0, close(ruleset_fd));
}
+struct thread_data {
+ pid_t parent_pid;
+ int ruleset_fd, pipe_child, pipe_parent;
+};
+
+static void *thread_audit_test(void *arg)
+{
+ const struct thread_data *data = (struct thread_data *)arg;
+ uintptr_t err = 0;
+ char buffer;
+
+ /* TGID and TID are different for a second thread. */
+ if (getpid() == gettid()) {
+ err = 1;
+ goto out;
+ }
+
+ if (landlock_restrict_self(data->ruleset_fd, 0)) {
+ err = 2;
+ goto out;
+ }
+
+ if (close(data->ruleset_fd)) {
+ err = 3;
+ goto out;
+ }
+
+ /* Creates a denial to get the domain ID. */
+ if (kill(data->parent_pid, 0) != -1) {
+ err = 4;
+ goto out;
+ }
+
+ if (EPERM != errno) {
+ err = 5;
+ goto out;
+ }
+
+ /* Signals the parent to read denial logs. */
+ if (write(data->pipe_child, ".", 1) != 1) {
+ err = 6;
+ goto out;
+ }
+
+ /* Waits for the parent to update audit filters. */
+ if (read(data->pipe_parent, &buffer, 1) != 1) {
+ err = 7;
+ goto out;
+ }
+
+out:
+ close(data->pipe_child);
+ close(data->pipe_parent);
+ return (void *)err;
+}
+
+/* Checks that the PID tied to a domain is not a TID but the TGID. */
+TEST_F(audit, thread)
+{
+ const struct landlock_ruleset_attr ruleset_attr = {
+ .scoped = LANDLOCK_SCOPE_SIGNAL,
+ };
+ __u64 denial_dom = 1;
+ __u64 allocated_dom = 2;
+ __u64 deallocated_dom = 3;
+ pthread_t thread;
+ int pipe_child[2], pipe_parent[2];
+ char buffer;
+ struct thread_data child_data;
+
+ child_data.parent_pid = getppid();
+ ASSERT_EQ(0, pipe2(pipe_child, O_CLOEXEC));
+ child_data.pipe_child = pipe_child[1];
+ ASSERT_EQ(0, pipe2(pipe_parent, O_CLOEXEC));
+ child_data.pipe_parent = pipe_parent[0];
+ child_data.ruleset_fd =
+ landlock_create_ruleset(&ruleset_attr, sizeof(ruleset_attr), 0);
+ ASSERT_LE(0, child_data.ruleset_fd);
+
+ /* TGID and TID are the same for the initial thread . */
+ EXPECT_EQ(getpid(), gettid());
+ EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0));
+ ASSERT_EQ(0, pthread_create(&thread, NULL, thread_audit_test,
+ &child_data));
+
+ /* Waits for the child to generate a denial. */
+ ASSERT_EQ(1, read(pipe_child[0], &buffer, 1));
+ EXPECT_EQ(0, close(pipe_child[0]));
+
+ /* Matches the signal log to get the domain ID. */
+ EXPECT_EQ(0, matches_log_signal(_metadata, self->audit_fd,
+ child_data.parent_pid, &denial_dom));
+ EXPECT_NE(denial_dom, 1);
+ EXPECT_NE(denial_dom, 0);
+
+ EXPECT_EQ(0, matches_log_domain_allocated(self->audit_fd, getpid(),
+ &allocated_dom));
+ EXPECT_EQ(denial_dom, allocated_dom);
+
+ /* Updates filter rules to match the drop record. */
+ set_cap(_metadata, CAP_AUDIT_CONTROL);
+ EXPECT_EQ(0, audit_filter_drop(self->audit_fd, AUDIT_ADD_RULE));
+ EXPECT_EQ(0, audit_filter_exe(self->audit_fd, &self->audit_filter,
+ AUDIT_DEL_RULE));
+ clear_cap(_metadata, CAP_AUDIT_CONTROL);
+
+ /* Signals the thread to exit, which will generate a domain deallocation. */
+ ASSERT_EQ(1, write(pipe_parent[1], ".", 1));
+ EXPECT_EQ(0, close(pipe_parent[1]));
+ ASSERT_EQ(0, pthread_join(thread, NULL));
+
+ EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO,
+ &audit_tv_dom_drop, sizeof(audit_tv_dom_drop)));
+ EXPECT_EQ(0, matches_log_domain_deallocated(self->audit_fd, 1,
+ &deallocated_dom));
+ EXPECT_EQ(denial_dom, deallocated_dom);
+ EXPECT_EQ(0, setsockopt(self->audit_fd, SOL_SOCKET, SO_RCVTIMEO,
+ &audit_tv_default, sizeof(audit_tv_default)));
+}
+
FIXTURE(audit_flags)
{
struct audit_filter audit_filter;
@@ -273,7 +392,8 @@ TEST_F(audit_flags, signal)
/* Checks domain information records. */
EXPECT_EQ(0, matches_log_domain_allocated(
- self->audit_fd, &allocated_dom));
+ self->audit_fd, getpid(),
+ &allocated_dom));
EXPECT_NE(*self->domain_id, 1);
EXPECT_NE(*self->domain_id, 0);
EXPECT_EQ(*self->domain_id, allocated_dom);
diff --git a/tools/testing/selftests/landlock/fs_test.c b/tools/testing/selftests/landlock/fs_test.c
index f819011a8798..73729382d40f 100644
--- a/tools/testing/selftests/landlock/fs_test.c
+++ b/tools/testing/selftests/landlock/fs_test.c
@@ -5964,7 +5964,8 @@ TEST_F(audit_layout1, refer_handled)
EXPECT_EQ(EXDEV, errno);
EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.refer",
dir_s1d1));
- EXPECT_EQ(0, matches_log_domain_allocated(self->audit_fd, NULL));
+ EXPECT_EQ(0,
+ matches_log_domain_allocated(self->audit_fd, getpid(), NULL));
EXPECT_EQ(0, matches_log_fs(_metadata, self->audit_fd, "fs\\.refer",
dir_s1d3));
diff --git a/tools/testing/selftests/lib/config b/tools/testing/selftests/lib/config
index 81a1f64a22e8..377b3699ff31 100644
--- a/tools/testing/selftests/lib/config
+++ b/tools/testing/selftests/lib/config
@@ -1,2 +1,3 @@
CONFIG_TEST_BITMAP=m
+CONFIG_PRIME_NUMBERS=m
CONFIG_TEST_BITOPS=m
diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config
index 130d532b7e67..3cfef5153823 100644
--- a/tools/testing/selftests/net/config
+++ b/tools/testing/selftests/net/config
@@ -33,7 +33,6 @@ CONFIG_NETFILTER_ADVANCED=y
CONFIG_NF_CONNTRACK=m
CONFIG_IPV6_MROUTE=y
CONFIG_IPV6_SIT=y
-CONFIG_IP_DCCP=m
CONFIG_NF_NAT=m
CONFIG_IP6_NF_IPTABLES=m
CONFIG_IP_NF_IPTABLES=m
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 3ea6f886a210..c58dc4ac2810 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -11,7 +11,7 @@ TESTS="unregister down carrier nexthop suppress ipv6_notify ipv4_notify \
ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics \
ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr \
ipv6_del_addr ipv4_mangle ipv6_mangle ipv4_bcast_neigh fib6_gc_test \
- ipv4_mpath_list ipv6_mpath_list"
+ ipv4_mpath_list ipv6_mpath_list ipv4_mpath_balance ipv6_mpath_balance"
VERBOSE=0
PAUSE_ON_FAIL=no
@@ -1085,6 +1085,35 @@ route_setup()
set +e
}
+forwarding_cleanup()
+{
+ cleanup_ns $ns3
+
+ route_cleanup
+}
+
+# extend route_setup with an ns3 reachable through ns2 over both devices
+forwarding_setup()
+{
+ forwarding_cleanup
+
+ route_setup
+
+ setup_ns ns3
+
+ ip link add veth5 netns $ns3 type veth peer name veth6 netns $ns2
+ ip -netns $ns3 link set veth5 up
+ ip -netns $ns2 link set veth6 up
+
+ ip -netns $ns3 -4 addr add dev veth5 172.16.105.1/24
+ ip -netns $ns2 -4 addr add dev veth6 172.16.105.2/24
+ ip -netns $ns3 -4 route add 172.16.100.0/22 via 172.16.105.2
+
+ ip -netns $ns3 -6 addr add dev veth5 2001:db8:105::1/64 nodad
+ ip -netns $ns2 -6 addr add dev veth6 2001:db8:105::2/64 nodad
+ ip -netns $ns3 -6 route add 2001:db8:101::/33 via 2001:db8:105::2
+}
+
# assumption is that basic add of a single path route works
# otherwise just adding an address on an interface is broken
ipv6_rt_add()
@@ -2600,6 +2629,93 @@ ipv6_mpath_list_test()
route_cleanup
}
+tc_set_flower_counter__saddr_syn() {
+ tc_set_flower_counter $1 $2 $3 "src_ip $4 ip_proto tcp tcp_flags 0x2"
+}
+
+ip_mpath_balance_dep_check()
+{
+ if [ ! -x "$(command -v socat)" ]; then
+ echo "socat command not found. Skipping test"
+ return 1
+ fi
+
+ if [ ! -x "$(command -v jq)" ]; then
+ echo "jq command not found. Skipping test"
+ return 1
+ fi
+}
+
+ip_mpath_balance() {
+ local -r ipver=$1
+ local -r daddr=$2
+ local -r num_conn=20
+
+ for i in $(seq 1 $num_conn); do
+ ip netns exec $ns3 socat $ipver TCP-LISTEN:8000 STDIO >/dev/null &
+ sleep 0.02
+ echo -n a | ip netns exec $ns1 socat $ipver STDIO TCP:$daddr:8000
+ done
+
+ local -r syn0="$(tc_get_flower_counter $ns1 veth1)"
+ local -r syn1="$(tc_get_flower_counter $ns1 veth3)"
+ local -r syns=$((syn0+syn1))
+
+ [ "$VERBOSE" = "1" ] && echo "multipath: syns seen: ($syn0,$syn1)"
+
+ [[ $syns -ge $num_conn ]] && [[ $syn0 -gt 0 ]] && [[ $syn1 -gt 0 ]]
+}
+
+ipv4_mpath_balance_test()
+{
+ echo
+ echo "IPv4 multipath load balance test"
+
+ ip_mpath_balance_dep_check || return 1
+ forwarding_setup
+
+ $IP route add 172.16.105.1 \
+ nexthop via 172.16.101.2 \
+ nexthop via 172.16.103.2
+
+ ip netns exec $ns1 \
+ sysctl -q -w net.ipv4.fib_multipath_hash_policy=1
+
+ tc_set_flower_counter__saddr_syn $ns1 4 veth1 172.16.101.1
+ tc_set_flower_counter__saddr_syn $ns1 4 veth3 172.16.103.1
+
+ ip_mpath_balance -4 172.16.105.1
+
+ log_test $? 0 "IPv4 multipath loadbalance"
+
+ forwarding_cleanup
+}
+
+ipv6_mpath_balance_test()
+{
+ echo
+ echo "IPv6 multipath load balance test"
+
+ ip_mpath_balance_dep_check || return 1
+ forwarding_setup
+
+ $IP route add 2001:db8:105::1\
+ nexthop via 2001:db8:101::2 \
+ nexthop via 2001:db8:103::2
+
+ ip netns exec $ns1 \
+ sysctl -q -w net.ipv6.fib_multipath_hash_policy=1
+
+ tc_set_flower_counter__saddr_syn $ns1 6 veth1 2001:db8:101::1
+ tc_set_flower_counter__saddr_syn $ns1 6 veth3 2001:db8:103::1
+
+ ip_mpath_balance -6 "[2001:db8:105::1]"
+
+ log_test $? 0 "IPv6 multipath loadbalance"
+
+ forwarding_cleanup
+}
+
################################################################################
# usage
@@ -2683,6 +2799,8 @@ do
fib6_gc_test|ipv6_gc) fib6_gc_test;;
ipv4_mpath_list) ipv4_mpath_list_test;;
ipv6_mpath_list) ipv6_mpath_list_test;;
+ ipv4_mpath_balance) ipv4_mpath_balance_test;;
+ ipv6_mpath_balance) ipv6_mpath_balance_test;;
help) echo "Test names: $TESTS"; exit 0;;
esac
diff --git a/tools/testing/selftests/net/forwarding/bridge_igmp.sh b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
index e6a3e04fd83f..d4e7dd659354 100755
--- a/tools/testing/selftests/net/forwarding/bridge_igmp.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_igmp.sh
@@ -1,10 +1,24 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-ALL_TESTS="v2reportleave_test v3include_test v3inc_allow_test v3inc_is_include_test \
- v3inc_is_exclude_test v3inc_to_exclude_test v3exc_allow_test v3exc_is_include_test \
- v3exc_is_exclude_test v3exc_to_exclude_test v3inc_block_test v3exc_block_test \
- v3exc_timeout_test v3star_ex_auto_add_test"
+ALL_TESTS="
+ v2reportleave_test
+ v3include_test
+ v3inc_allow_test
+ v3inc_is_include_test
+ v3inc_is_exclude_test
+ v3inc_to_exclude_test
+ v3exc_allow_test
+ v3exc_is_include_test
+ v3exc_is_exclude_test
+ v3exc_to_exclude_test
+ v3inc_block_test
+ v3exc_block_test
+ v3exc_timeout_test
+ v3star_ex_auto_add_test
+ v2per_vlan_snooping_port_stp_test
+ v2per_vlan_snooping_vlan_stp_test
+"
NUM_NETIFS=4
CHECK_TC="yes"
TEST_GROUP="239.10.10.10"
@@ -554,6 +568,64 @@ v3star_ex_auto_add_test()
v3cleanup $swp2 $TEST_GROUP
}
+v2per_vlan_snooping_stp_test()
+{
+ local is_port=$1
+
+ local msg="port"
+ [[ $is_port -ne 1 ]] && msg="vlan"
+
+ ip link set br0 up type bridge vlan_filtering 1 \
+ mcast_igmp_version 2 \
+ mcast_snooping 1 \
+ mcast_vlan_snooping 1 \
+ mcast_querier 1 \
+ mcast_stats_enabled 1
+ bridge vlan global set vid 1 dev br0 \
+ mcast_snooping 1 \
+ mcast_querier 1 \
+ mcast_query_interval 100 \
+ mcast_startup_query_count 0
+ [[ $is_port -eq 1 ]] && bridge link set dev $swp1 state 0
+ [[ $is_port -ne 1 ]] && bridge vlan set vid 1 dev $swp1 state 4
+ sleep 5
+ local tx_s=$(ip -j -p stats show dev $swp1 \
+ group xstats_slave subgroup bridge suite mcast \
+ | jq '.[]["multicast"]["igmp_queries"]["tx_v2"]')
+
+ [[ $is_port -eq 1 ]] && bridge link set dev $swp1 state 3
+ [[ $is_port -ne 1 ]] && bridge vlan set vid 1 dev $swp1 state 3
+ sleep 5
+ local tx_e=$(ip -j -p stats show dev $swp1 \
+ group xstats_slave subgroup bridge suite mcast \
+ | jq '.[]["multicast"]["igmp_queries"]["tx_v2"]')
+
+ RET=0
+ local tx=$(expr $tx_e - $tx_s)
+ test $tx -gt 0
+ check_err $? "No IGMP queries after STP state becomes forwarding"
+ log_test "per vlan snooping with $msg stp state change"
+
+ # restore settings
+ bridge vlan global set vid 1 dev br0 \
+ mcast_querier 0 \
+ mcast_query_interval 12500 \
+ mcast_startup_query_count 2
+ ip link set br0 up type bridge vlan_filtering 0 \
+ mcast_vlan_snooping 0 \
+ mcast_stats_enabled 0
+}
+
+v2per_vlan_snooping_port_stp_test()
+{
+ v2per_vlan_snooping_stp_test 1
+}
+
+v2per_vlan_snooping_vlan_stp_test()
+{
+ v2per_vlan_snooping_stp_test 0
+}
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/bridge_mld.sh b/tools/testing/selftests/net/forwarding/bridge_mld.sh
index f84ab2e65754..4cacef5a813a 100755
--- a/tools/testing/selftests/net/forwarding/bridge_mld.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_mld.sh
@@ -1,10 +1,23 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-ALL_TESTS="mldv2include_test mldv2inc_allow_test mldv2inc_is_include_test mldv2inc_is_exclude_test \
- mldv2inc_to_exclude_test mldv2exc_allow_test mldv2exc_is_include_test \
- mldv2exc_is_exclude_test mldv2exc_to_exclude_test mldv2inc_block_test \
- mldv2exc_block_test mldv2exc_timeout_test mldv2star_ex_auto_add_test"
+ALL_TESTS="
+ mldv2include_test
+ mldv2inc_allow_test
+ mldv2inc_is_include_test
+ mldv2inc_is_exclude_test
+ mldv2inc_to_exclude_test
+ mldv2exc_allow_test
+ mldv2exc_is_include_test
+ mldv2exc_is_exclude_test
+ mldv2exc_to_exclude_test
+ mldv2inc_block_test
+ mldv2exc_block_test
+ mldv2exc_timeout_test
+ mldv2star_ex_auto_add_test
+ mldv2per_vlan_snooping_port_stp_test
+ mldv2per_vlan_snooping_vlan_stp_test
+"
NUM_NETIFS=4
CHECK_TC="yes"
TEST_GROUP="ff02::cc"
@@ -554,6 +567,66 @@ mldv2star_ex_auto_add_test()
mldv2cleanup $swp2
}
+mldv2per_vlan_snooping_stp_test()
+{
+ local is_port=$1
+
+ local msg="port"
+ [[ $is_port -ne 1 ]] && msg="vlan"
+
+ ip link set br0 up type bridge vlan_filtering 1 \
+ mcast_mld_version 2 \
+ mcast_snooping 1 \
+ mcast_vlan_snooping 1 \
+ mcast_querier 1 \
+ mcast_stats_enabled 1
+ bridge vlan global set vid 1 dev br0 \
+ mcast_mld_version 2 \
+ mcast_snooping 1 \
+ mcast_querier 1 \
+ mcast_query_interval 100 \
+ mcast_startup_query_count 0
+
+ [[ $is_port -eq 1 ]] && bridge link set dev $swp1 state 0
+ [[ $is_port -ne 1 ]] && bridge vlan set vid 1 dev $swp1 state 4
+ sleep 5
+ local tx_s=$(ip -j -p stats show dev $swp1 \
+ group xstats_slave subgroup bridge suite mcast \
+ | jq '.[]["multicast"]["mld_queries"]["tx_v2"]')
+ [[ $is_port -eq 1 ]] && bridge link set dev $swp1 state 3
+ [[ $is_port -ne 1 ]] && bridge vlan set vid 1 dev $swp1 state 3
+ sleep 5
+ local tx_e=$(ip -j -p stats show dev $swp1 \
+ group xstats_slave subgroup bridge suite mcast \
+ | jq '.[]["multicast"]["mld_queries"]["tx_v2"]')
+
+ RET=0
+ local tx=$(expr $tx_e - $tx_s)
+ test $tx -gt 0
+ check_err $? "No MLD queries after STP state becomes forwarding"
+ log_test "per vlan snooping with $msg stp state change"
+
+ # restore settings
+ bridge vlan global set vid 1 dev br0 \
+ mcast_querier 0 \
+ mcast_query_interval 12500 \
+ mcast_startup_query_count 2 \
+ mcast_mld_version 1
+ ip link set br0 up type bridge vlan_filtering 0 \
+ mcast_vlan_snooping 0 \
+ mcast_stats_enabled 0
+}
+
+mldv2per_vlan_snooping_port_stp_test()
+{
+ mldv2per_vlan_snooping_stp_test 1
+}
+
+mldv2per_vlan_snooping_vlan_stp_test()
+{
+ mldv2per_vlan_snooping_stp_test 0
+}
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
index 90f8a244ea90..e59fba366a0a 100755
--- a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
@@ -1,7 +1,7 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion extern_learn other_tpid"
+ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding vlan_deletion extern_learn other_tpid 8021p drop_untagged"
NUM_NETIFS=4
CHECK_TC="yes"
source lib.sh
@@ -194,6 +194,100 @@ other_tpid()
tc qdisc del dev $h2 clsact
}
+8021p_do()
+{
+ local should_fail=$1; shift
+ local mac=de:ad:be:ef:13:37
+
+ tc filter add dev $h2 ingress protocol all pref 1 handle 101 \
+ flower dst_mac $mac action drop
+
+ $MZ -q $h1 -c 1 -b $mac -a own "81:00 00:00 08:00 aa-aa-aa-aa-aa-aa-aa-aa-aa"
+ sleep 1
+
+ tc -j -s filter show dev $h2 ingress \
+ | jq -e ".[] | select(.options.handle == 101) \
+ | select(.options.actions[0].stats.packets == 1)" &> /dev/null
+ check_err_fail $should_fail $? "802.1p-tagged reception"
+
+ tc filter del dev $h2 ingress pref 1
+}
+
+8021p()
+{
+ RET=0
+
+ tc qdisc add dev $h2 clsact
+ ip link set $h2 promisc on
+
+ # Test that with the default_pvid, 1, packets tagged with VID 0 are
+ # accepted.
+ 8021p_do 0
+
+ # Test that packets tagged with VID 0 are still accepted after changing
+ # the default_pvid.
+ ip link set br0 type bridge vlan_default_pvid 10
+ 8021p_do 0
+
+ log_test "Reception of 802.1p-tagged traffic"
+
+ ip link set $h2 promisc off
+ tc qdisc del dev $h2 clsact
+}
+
+send_untagged_and_8021p()
+{
+ ping_do $h1 192.0.2.2
+ check_fail $?
+
+ 8021p_do 1
+}
+
+drop_untagged()
+{
+ RET=0
+
+ tc qdisc add dev $h2 clsact
+ ip link set $h2 promisc on
+
+ # Test that with no PVID, untagged and 802.1p-tagged traffic is
+ # dropped.
+ ip link set br0 type bridge vlan_default_pvid 1
+
+ # First we reconfigure the default_pvid, 1, as a non-PVID VLAN.
+ bridge vlan add dev $swp1 vid 1 untagged
+ send_untagged_and_8021p
+ bridge vlan add dev $swp1 vid 1 pvid untagged
+
+ # Next we try to delete VID 1 altogether
+ bridge vlan del dev $swp1 vid 1
+ send_untagged_and_8021p
+ bridge vlan add dev $swp1 vid 1 pvid untagged
+
+ # Set up the bridge without a default_pvid, then check that the 8021q
+ # module, when the bridge port goes down and then up again, does not
+ # accidentally re-enable untagged packet reception.
+ ip link set br0 type bridge vlan_default_pvid 0
+ ip link set $swp1 down
+ ip link set $swp1 up
+ setup_wait
+ send_untagged_and_8021p
+
+ # Remove swp1 as a bridge port and let it rejoin the bridge while it
+ # has no default_pvid.
+ ip link set $swp1 nomaster
+ ip link set $swp1 master br0
+ send_untagged_and_8021p
+
+ # Restore settings
+ ip link set br0 type bridge vlan_default_pvid 1
+
+ log_test "Dropping of untagged and 802.1p-tagged traffic with no PVID"
+
+ ip link set $h2 promisc off
+ tc qdisc del dev $h2 clsact
+}
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config
index 8d7a1a004b7c..18fd69d8d937 100644
--- a/tools/testing/selftests/net/forwarding/config
+++ b/tools/testing/selftests/net/forwarding/config
@@ -1,6 +1,7 @@
CONFIG_BRIDGE=m
CONFIG_VLAN_8021Q=m
CONFIG_BRIDGE_VLAN_FILTERING=y
+CONFIG_BRIDGE_IGMP_SNOOPING=y
CONFIG_NET_L3_MASTER_DEV=y
CONFIG_IPV6_MULTIPLE_TABLES=y
CONFIG_NET_VRF=m
diff --git a/tools/testing/selftests/net/forwarding/tc_taprio.sh b/tools/testing/selftests/net/forwarding/tc_taprio.sh
new file mode 100755
index 000000000000..8992aeabfe0b
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/tc_taprio.sh
@@ -0,0 +1,421 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+ALL_TESTS=" \
+ test_clock_jump_backward \
+ test_taprio_after_ptp \
+ test_max_sdu \
+ test_clock_jump_backward_forward \
+"
+NUM_NETIFS=4
+source tc_common.sh
+source lib.sh
+source tsn_lib.sh
+
+require_command python3
+
+# The test assumes the usual topology from the README, where h1 is connected to
+# swp1, h2 to swp2, and swp1 and swp2 are together in a bridge.
+# Additional assumption: h1 and h2 use the same PHC, and so do swp1 and swp2.
+# By synchronizing h1 to swp1 via PTP, h2 is also implicitly synchronized to
+# swp1 (and both to CLOCK_REALTIME).
+h1=${NETIFS[p1]}
+swp1=${NETIFS[p2]}
+swp2=${NETIFS[p3]}
+h2=${NETIFS[p4]}
+
+UDS_ADDRESS_H1="/var/run/ptp4l_h1"
+UDS_ADDRESS_SWP1="/var/run/ptp4l_swp1"
+
+H1_IPV4="192.0.2.1"
+H2_IPV4="192.0.2.2"
+H1_IPV6="2001:db8:1::1"
+H2_IPV6="2001:db8:1::2"
+
+# Tunables
+NUM_PKTS=100
+STREAM_VID=10
+STREAM_PRIO_1=6
+STREAM_PRIO_2=5
+STREAM_PRIO_3=4
+# PTP uses TC 0
+ALL_GATES=$((1 << 0 | 1 << STREAM_PRIO_1 | 1 << STREAM_PRIO_2))
+# Use a conservative cycle of 10 ms to allow the test to still pass when the
+# kernel has some extra overhead like lockdep etc
+CYCLE_TIME_NS=10000000
+# Create two Gate Control List entries, one OPEN and one CLOSE, of equal
+# durations
+GATE_DURATION_NS=$((CYCLE_TIME_NS / 2))
+# Give 2/3 of the cycle time to user space and 1/3 to the kernel
+FUDGE_FACTOR=$((CYCLE_TIME_NS / 3))
+# Shift the isochron base time by half the gate time, so that packets are
+# always received by swp1 close to the middle of the time slot, to minimize
+# inaccuracies due to network sync
+SHIFT_TIME_NS=$((GATE_DURATION_NS / 2))
+
+path_delay=
+
+h1_create()
+{
+ simple_if_init $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h1_destroy()
+{
+ simple_if_fini $h1 $H1_IPV4/24 $H1_IPV6/64
+}
+
+h2_create()
+{
+ simple_if_init $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+h2_destroy()
+{
+ simple_if_fini $h2 $H2_IPV4/24 $H2_IPV6/64
+}
+
+switch_create()
+{
+ local h2_mac_addr=$(mac_get $h2)
+
+ ip link set $swp1 up
+ ip link set $swp2 up
+
+ ip link add br0 type bridge vlan_filtering 1
+ ip link set $swp1 master br0
+ ip link set $swp2 master br0
+ ip link set br0 up
+
+ bridge vlan add dev $swp2 vid $STREAM_VID
+ bridge vlan add dev $swp1 vid $STREAM_VID
+ bridge fdb add dev $swp2 \
+ $h2_mac_addr vlan $STREAM_VID static master
+}
+
+switch_destroy()
+{
+ ip link del br0
+}
+
+ptp_setup()
+{
+ # Set up swp1 as a master PHC for h1, synchronized to the local
+ # CLOCK_REALTIME.
+ phc2sys_start $UDS_ADDRESS_SWP1
+ ptp4l_start $h1 true $UDS_ADDRESS_H1
+ ptp4l_start $swp1 false $UDS_ADDRESS_SWP1
+}
+
+ptp_cleanup()
+{
+ ptp4l_stop $swp1
+ ptp4l_stop $h1
+ phc2sys_stop
+}
+
+txtime_setup()
+{
+ local if_name=$1
+
+ tc qdisc add dev $if_name clsact
+ # Classify PTP on TC 7 and isochron on TC 6
+ tc filter add dev $if_name egress protocol 0x88f7 \
+ flower action skbedit priority 7
+ tc filter add dev $if_name egress protocol 802.1Q \
+ flower vlan_ethtype 0xdead action skbedit priority 6
+ tc qdisc add dev $if_name handle 100: parent root mqprio num_tc 8 \
+ queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 \
+ map 0 1 2 3 4 5 6 7 \
+ hw 1
+ # Set up TC 5, 6, 7 for SO_TXTIME. tc-mqprio queues count from 1.
+ tc qdisc replace dev $if_name parent 100:$((STREAM_PRIO_1 + 1)) etf \
+ clockid CLOCK_TAI offload delta $FUDGE_FACTOR
+ tc qdisc replace dev $if_name parent 100:$((STREAM_PRIO_2 + 1)) etf \
+ clockid CLOCK_TAI offload delta $FUDGE_FACTOR
+ tc qdisc replace dev $if_name parent 100:$((STREAM_PRIO_3 + 1)) etf \
+ clockid CLOCK_TAI offload delta $FUDGE_FACTOR
+}
+
+txtime_cleanup()
+{
+ local if_name=$1
+
+ tc qdisc del dev $if_name clsact
+ tc qdisc del dev $if_name root
+}
+
+taprio_replace()
+{
+ local if_name="$1"; shift
+ local extra_args="$1"; shift
+
+ # STREAM_PRIO_1 always has an open gate.
+ # STREAM_PRIO_2 has a gate open for GATE_DURATION_NS (half the cycle time)
+ # STREAM_PRIO_3 always has a closed gate.
+ tc qdisc replace dev $if_name root stab overhead 24 taprio num_tc 8 \
+ queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 \
+ map 0 1 2 3 4 5 6 7 \
+ sched-entry S $(printf "%x" $ALL_GATES) $GATE_DURATION_NS \
+ sched-entry S $(printf "%x" $((ALL_GATES & ~(1 << STREAM_PRIO_2)))) $GATE_DURATION_NS \
+ base-time 0 flags 0x2 $extra_args
+ taprio_wait_for_admin $if_name
+}
+
+taprio_cleanup()
+{
+ local if_name=$1
+
+ tc qdisc del dev $if_name root
+}
+
+probe_path_delay()
+{
+ local isochron_dat="$(mktemp)"
+ local received
+
+ log_info "Probing path delay"
+
+ isochron_do "$h1" "$h2" "$UDS_ADDRESS_H1" "" 0 \
+ "$CYCLE_TIME_NS" "" "" "$NUM_PKTS" \
+ "$STREAM_VID" "$STREAM_PRIO_1" "" "$isochron_dat"
+
+ received=$(isochron_report_num_received "$isochron_dat")
+ if [ "$received" != "$NUM_PKTS" ]; then
+ echo "Cannot establish basic data path between $h1 and $h2"
+ exit $ksft_fail
+ fi
+
+ printf "pdelay = {}\n" > isochron_data.py
+ isochron report --input-file "$isochron_dat" \
+ --printf-format "pdelay[%u] = %d - %d\n" \
+ --printf-args "qRT" \
+ >> isochron_data.py
+ cat <<-'EOF' > isochron_postprocess.py
+ #!/usr/bin/env python3
+
+ from isochron_data import pdelay
+ import numpy as np
+
+ w = np.array(list(pdelay.values()))
+ print("{}".format(np.max(w)))
+ EOF
+ path_delay=$(python3 ./isochron_postprocess.py)
+
+ log_info "Path delay from $h1 to $h2 estimated at $path_delay ns"
+
+ if [ "$path_delay" -gt "$GATE_DURATION_NS" ]; then
+ echo "Path delay larger than gate duration, aborting"
+ exit $ksft_fail
+ fi
+
+ rm -f ./isochron_data.py 2> /dev/null
+ rm -f ./isochron_postprocess.py 2> /dev/null
+ rm -f "$isochron_dat" 2> /dev/null
+}
+
+setup_prepare()
+{
+ vrf_prepare
+
+ h1_create
+ h2_create
+ switch_create
+
+ txtime_setup $h1
+
+ # Temporarily set up PTP just to probe the end-to-end path delay.
+ ptp_setup
+ probe_path_delay
+ ptp_cleanup
+}
+
+cleanup()
+{
+ pre_cleanup
+
+ isochron_recv_stop
+ txtime_cleanup $h1
+
+ switch_destroy
+ h2_destroy
+ h1_destroy
+
+ vrf_cleanup
+}
+
+run_test()
+{
+ local base_time=$1; shift
+ local stream_prio=$1; shift
+ local expected_delay=$1; shift
+ local should_fail=$1; shift
+ local test_name=$1; shift
+ local isochron_dat="$(mktemp)"
+ local received
+ local median_delay
+
+ RET=0
+
+ # Set the shift time equal to the cycle time, which effectively
+ # cancels the default advance time. Packets won't be sent early in
+ # software, which ensures that they won't prematurely enter through
+ # the open gate in __test_out_of_band(). Also, the gate is open for
+ # long enough that this won't cause a problem in __test_in_band().
+ isochron_do "$h1" "$h2" "$UDS_ADDRESS_H1" "" "$base_time" \
+ "$CYCLE_TIME_NS" "$SHIFT_TIME_NS" "$GATE_DURATION_NS" \
+ "$NUM_PKTS" "$STREAM_VID" "$stream_prio" "" "$isochron_dat"
+
+ received=$(isochron_report_num_received "$isochron_dat")
+ [ "$received" = "$NUM_PKTS" ]
+ check_err_fail $should_fail $? "Reception of $NUM_PKTS packets"
+
+ if [ $should_fail = 0 ] && [ "$received" = "$NUM_PKTS" ]; then
+ printf "pdelay = {}\n" > isochron_data.py
+ isochron report --input-file "$isochron_dat" \
+ --printf-format "pdelay[%u] = %d - %d\n" \
+ --printf-args "qRT" \
+ >> isochron_data.py
+ cat <<-'EOF' > isochron_postprocess.py
+ #!/usr/bin/env python3
+
+ from isochron_data import pdelay
+ import numpy as np
+
+ w = np.array(list(pdelay.values()))
+ print("{}".format(int(np.median(w))))
+ EOF
+ median_delay=$(python3 ./isochron_postprocess.py)
+
+ # If the condition below is true, packets were delayed by a closed gate
+ [ "$median_delay" -gt $((path_delay + expected_delay)) ]
+ check_fail $? "Median delay $median_delay is greater than expected delay $expected_delay plus path delay $path_delay"
+
+ # If the condition below is true, packets were sent expecting them to
+ # hit a closed gate in the switch, but were not delayed
+ [ "$expected_delay" -gt 0 ] && [ "$median_delay" -lt "$expected_delay" ]
+ check_fail $? "Median delay $median_delay is less than expected delay $expected_delay"
+ fi
+
+ log_test "$test_name"
+
+ rm -f ./isochron_data.py 2> /dev/null
+ rm -f ./isochron_postprocess.py 2> /dev/null
+ rm -f "$isochron_dat" 2> /dev/null
+}
+
+__test_always_open()
+{
+ run_test 0.000000000 $STREAM_PRIO_1 0 0 "Gate always open"
+}
+
+__test_always_closed()
+{
+ run_test 0.000000000 $STREAM_PRIO_3 0 1 "Gate always closed"
+}
+
+__test_in_band()
+{
+ # Send packets in-band with the OPEN gate entry
+ run_test 0.000000000 $STREAM_PRIO_2 0 0 "In band with gate"
+}
+
+__test_out_of_band()
+{
+ # Send packets in-band with the CLOSE gate entry
+ run_test 0.005000000 $STREAM_PRIO_2 \
+ $((GATE_DURATION_NS - SHIFT_TIME_NS)) 0 \
+ "Out of band with gate"
+}
+
+run_subtests()
+{
+ __test_always_open
+ __test_always_closed
+ __test_in_band
+ __test_out_of_band
+}
+
+test_taprio_after_ptp()
+{
+ log_info "Setting up taprio after PTP"
+ ptp_setup
+ taprio_replace $swp2
+ run_subtests
+ taprio_cleanup $swp2
+ ptp_cleanup
+}
+
+__test_under_max_sdu()
+{
+ # Limit max-sdu for STREAM_PRIO_1
+ taprio_replace "$swp2" "max-sdu 0 0 0 0 0 0 100 0"
+ run_test 0.000000000 $STREAM_PRIO_1 0 0 "Under maximum SDU"
+}
+
+__test_over_max_sdu()
+{
+ # Limit max-sdu for STREAM_PRIO_1
+ taprio_replace "$swp2" "max-sdu 0 0 0 0 0 0 20 0"
+ run_test 0.000000000 $STREAM_PRIO_1 0 1 "Over maximum SDU"
+}
+
+test_max_sdu()
+{
+ ptp_setup
+ __test_under_max_sdu
+ __test_over_max_sdu
+ taprio_cleanup $swp2
+ ptp_cleanup
+}
+
+# Perform a clock jump in the past without synchronization running, so that the
+# time base remains where it was set by phc_ctl.
+test_clock_jump_backward()
+{
+ # This is a more complex schedule specifically crafted in a way that
+ # has been problematic on NXP LS1028A. Not much to test with it other
+ # than the fact that it passes traffic.
+ tc qdisc replace dev $swp2 root stab overhead 24 taprio num_tc 8 \
+ queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 map 0 1 2 3 4 5 6 7 \
+ base-time 0 sched-entry S 20 300000 sched-entry S 10 200000 \
+ sched-entry S 20 300000 sched-entry S 48 200000 \
+ sched-entry S 20 300000 sched-entry S 83 200000 \
+ sched-entry S 40 300000 sched-entry S 00 200000 flags 2
+
+ log_info "Forcing a backward clock jump"
+ phc_ctl $swp1 set 0
+
+ ping_test $h1 192.0.2.2
+ taprio_cleanup $swp2
+}
+
+# Test that taprio tolerates clock jumps.
+# Since ptp4l and phc2sys are running, it is expected for the time to
+# eventually recover (through yet another clock jump). Isochron waits
+# until that is the case.
+test_clock_jump_backward_forward()
+{
+ log_info "Forcing a backward and a forward clock jump"
+ taprio_replace $swp2
+ phc_ctl $swp1 set 0
+ ptp_setup
+ ping_test $h1 192.0.2.2
+ run_subtests
+ ptp_cleanup
+ taprio_cleanup $swp2
+}
+
+tc_offload_check
+if [[ $? -ne 0 ]]; then
+ log_test_skip "Could not test offloaded functionality"
+ exit $EXIT_STATUS
+fi
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tsn_lib.sh b/tools/testing/selftests/net/forwarding/tsn_lib.sh
index b91bcd8008a9..08c044ff6689 100644
--- a/tools/testing/selftests/net/forwarding/tsn_lib.sh
+++ b/tools/testing/selftests/net/forwarding/tsn_lib.sh
@@ -2,6 +2,8 @@
# SPDX-License-Identifier: GPL-2.0
# Copyright 2021-2022 NXP
+tc_testing_scripts_dir=$(dirname $0)/../../tc-testing/scripts
+
REQUIRE_ISOCHRON=${REQUIRE_ISOCHRON:=yes}
REQUIRE_LINUXPTP=${REQUIRE_LINUXPTP:=yes}
@@ -18,6 +20,7 @@ fi
if [[ "$REQUIRE_LINUXPTP" = "yes" ]]; then
require_command phc2sys
require_command ptp4l
+ require_command phc_ctl
fi
phc2sys_start()
@@ -182,6 +185,7 @@ isochron_do()
local base_time=$1; shift
local cycle_time=$1; shift
local shift_time=$1; shift
+ local window_size=$1; shift
local num_pkts=$1; shift
local vid=$1; shift
local priority=$1; shift
@@ -212,6 +216,10 @@ isochron_do()
extra_args="${extra_args} --shift-time=${shift_time}"
fi
+ if ! [ -z "${window_size}" ]; then
+ extra_args="${extra_args} --window-size=${window_size}"
+ fi
+
if [ "${use_l2}" = "true" ]; then
extra_args="${extra_args} --l2 --etype=0xdead ${vid}"
receiver_extra_args="--l2 --etype=0xdead"
@@ -247,3 +255,21 @@ isochron_do()
cpufreq_restore ${ISOCHRON_CPU}
}
+
+isochron_report_num_received()
+{
+ local isochron_dat=$1; shift
+
+ # Count all received packets by looking at the non-zero RX timestamps
+ isochron report \
+ --input-file "${isochron_dat}" \
+ --printf-format "%u\n" --printf-args "R" | \
+ grep -w -v '0' | wc -l
+}
+
+taprio_wait_for_admin()
+{
+ local if_name="$1"; shift
+
+ "$tc_testing_scripts_dir/taprio_wait_for_admin.sh" "$(which tc)" "$if_name"
+}
diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh
index 701905eeff66..7e1e56318625 100644
--- a/tools/testing/selftests/net/lib.sh
+++ b/tools/testing/selftests/net/lib.sh
@@ -270,6 +270,30 @@ tc_rule_handle_stats_get()
.options.actions[0].stats$selector"
}
+# attach a qdisc with two children match/no-match and a flower filter to match
+tc_set_flower_counter() {
+ local -r ns=$1
+ local -r ipver=$2
+ local -r dev=$3
+ local -r flower_expr=$4
+
+ tc -n $ns qdisc add dev $dev root handle 1: prio bands 2 \
+ priomap 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0
+
+ tc -n $ns qdisc add dev $dev parent 1:1 handle 11: pfifo
+ tc -n $ns qdisc add dev $dev parent 1:2 handle 12: pfifo
+
+ tc -n $ns filter add dev $dev parent 1: protocol ipv$ipver \
+ flower $flower_expr classid 1:2
+}
+
+tc_get_flower_counter() {
+ local -r ns=$1
+ local -r dev=$2
+
+ tc -n $ns -j -s qdisc show dev $dev handle 12: | jq .[0].packets
+}
+
ret_set_ksft_status()
{
local ksft_status=$1; shift
diff --git a/tools/testing/selftests/net/lib/.gitignore b/tools/testing/selftests/net/lib/.gitignore
index 1ebc6187f421..bbc97d6bf556 100644
--- a/tools/testing/selftests/net/lib/.gitignore
+++ b/tools/testing/selftests/net/lib/.gitignore
@@ -1,2 +1,3 @@
# SPDX-License-Identifier: GPL-2.0-only
csum
+xdp_helper
diff --git a/tools/testing/selftests/net/lib/Makefile b/tools/testing/selftests/net/lib/Makefile
index c22623b9a2a5..88c4bc461459 100644
--- a/tools/testing/selftests/net/lib/Makefile
+++ b/tools/testing/selftests/net/lib/Makefile
@@ -10,6 +10,7 @@ TEST_FILES += ../../../../net/ynl
TEST_GEN_FILES += csum
TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c))
+TEST_GEN_FILES += xdp_helper
TEST_INCLUDES := $(wildcard py/*.py sh/*.sh)
diff --git a/tools/testing/selftests/net/lib/ksft.h b/tools/testing/selftests/net/lib/ksft.h
new file mode 100644
index 000000000000..17dc34a612c6
--- /dev/null
+++ b/tools/testing/selftests/net/lib/ksft.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#if !defined(__NET_KSFT_H__)
+#define __NET_KSFT_H__
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+
+static inline void ksft_ready(void)
+{
+ const char msg[7] = "ready\n";
+ char *env_str;
+ int fd;
+
+ env_str = getenv("KSFT_READY_FD");
+ if (env_str) {
+ fd = atoi(env_str);
+ if (!fd) {
+ fprintf(stderr, "invalid KSFT_READY_FD = '%s'\n",
+ env_str);
+ return;
+ }
+ } else {
+ fd = STDOUT_FILENO;
+ }
+
+ write(fd, msg, sizeof(msg));
+ if (fd != STDOUT_FILENO)
+ close(fd);
+}
+
+static inline void ksft_wait(void)
+{
+ char *env_str;
+ char byte;
+ int fd;
+
+ env_str = getenv("KSFT_WAIT_FD");
+ if (env_str) {
+ fd = atoi(env_str);
+ if (!fd) {
+ fprintf(stderr, "invalid KSFT_WAIT_FD = '%s'\n",
+ env_str);
+ return;
+ }
+ } else {
+ /* Not running in KSFT env, wait for input from STDIN instead */
+ fd = STDIN_FILENO;
+ }
+
+ read(fd, &byte, sizeof(byte));
+ if (fd != STDIN_FILENO)
+ close(fd);
+}
+
+#endif
diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py
index 3cfad0fd4570..61287c203b6e 100644
--- a/tools/testing/selftests/net/lib/py/ksft.py
+++ b/tools/testing/selftests/net/lib/py/ksft.py
@@ -3,6 +3,7 @@
import builtins
import functools
import inspect
+import signal
import sys
import time
import traceback
@@ -26,6 +27,10 @@ class KsftXfailEx(Exception):
pass
+class KsftTerminate(KeyboardInterrupt):
+ pass
+
+
def ksft_pr(*objs, **kwargs):
print("#", *objs, **kwargs)
@@ -193,6 +198,17 @@ def ksft_setup(env):
return env
+def _ksft_intr(signum, frame):
+ # ksft runner.sh sends 2 SIGTERMs in a row on a timeout
+ # if we don't ignore the second one it will stop us from handling cleanup
+ global term_cnt
+ term_cnt += 1
+ if term_cnt == 1:
+ raise KsftTerminate()
+ else:
+ ksft_pr(f"Ignoring SIGTERM (cnt: {term_cnt}), already exiting...")
+
+
def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
cases = cases or []
@@ -205,6 +221,10 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
cases.append(value)
break
+ global term_cnt
+ term_cnt = 0
+ prev_sigterm = signal.signal(signal.SIGTERM, _ksft_intr)
+
totals = {"pass": 0, "fail": 0, "skip": 0, "xfail": 0}
print("TAP version 13")
@@ -233,7 +253,7 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
for line in tb.strip().split('\n'):
ksft_pr("Exception|", line)
if stop:
- ksft_pr("Stopping tests due to KeyboardInterrupt.")
+ ksft_pr(f"Stopping tests due to {type(e).__name__}.")
KSFT_RESULT = False
cnt_key = 'fail'
@@ -248,6 +268,8 @@ def ksft_run(cases=None, globs=None, case_pfx=None, args=()):
if stop:
break
+ signal.signal(signal.SIGTERM, prev_sigterm)
+
print(
f"# Totals: pass:{totals['pass']} fail:{totals['fail']} xfail:{totals['xfail']} xpass:0 skip:{totals['skip']} error:0"
)
diff --git a/tools/testing/selftests/net/lib/py/ynl.py b/tools/testing/selftests/net/lib/py/ynl.py
index 8986c584cb37..6329ae805abf 100644
--- a/tools/testing/selftests/net/lib/py/ynl.py
+++ b/tools/testing/selftests/net/lib/py/ynl.py
@@ -39,12 +39,12 @@ class EthtoolFamily(YnlFamily):
class RtnlFamily(YnlFamily):
def __init__(self, recv_size=0):
- super().__init__((SPEC_PATH / Path('rt_link.yaml')).as_posix(),
+ super().__init__((SPEC_PATH / Path('rt-link.yaml')).as_posix(),
schema='', recv_size=recv_size)
class RtnlAddrFamily(YnlFamily):
def __init__(self, recv_size=0):
- super().__init__((SPEC_PATH / Path('rt_addr.yaml')).as_posix(),
+ super().__init__((SPEC_PATH / Path('rt-addr.yaml')).as_posix(),
schema='', recv_size=recv_size)
class NetdevFamily(YnlFamily):
diff --git a/tools/testing/selftests/drivers/net/xdp_helper.c b/tools/testing/selftests/net/lib/xdp_helper.c
index aeed25914104..eb025a9f35b1 100644
--- a/tools/testing/selftests/drivers/net/xdp_helper.c
+++ b/tools/testing/selftests/net/lib/xdp_helper.c
@@ -11,55 +11,16 @@
#include <net/if.h>
#include <inttypes.h>
+#include "ksft.h"
+
#define UMEM_SZ (1U << 16)
#define NUM_DESC (UMEM_SZ / 2048)
-/* Move this to a common header when reused! */
-static void ksft_ready(void)
-{
- const char msg[7] = "ready\n";
- char *env_str;
- int fd;
-
- env_str = getenv("KSFT_READY_FD");
- if (env_str) {
- fd = atoi(env_str);
- if (!fd) {
- fprintf(stderr, "invalid KSFT_READY_FD = '%s'\n",
- env_str);
- return;
- }
- } else {
- fd = STDOUT_FILENO;
- }
-
- write(fd, msg, sizeof(msg));
- if (fd != STDOUT_FILENO)
- close(fd);
-}
-static void ksft_wait(void)
+static void print_usage(const char *bin)
{
- char *env_str;
- char byte;
- int fd;
-
- env_str = getenv("KSFT_WAIT_FD");
- if (env_str) {
- fd = atoi(env_str);
- if (!fd) {
- fprintf(stderr, "invalid KSFT_WAIT_FD = '%s'\n",
- env_str);
- return;
- }
- } else {
- /* Not running in KSFT env, wait for input from STDIN instead */
- fd = STDIN_FILENO;
- }
-
- read(fd, &byte, sizeof(byte));
- if (fd != STDIN_FILENO)
- close(fd);
+ fprintf(stderr, "Usage: %s ifindex queue_id [-z]\n\n"
+ "where:\n\t-z: force zerocopy mode", bin);
}
/* this is a simple helper program that creates an XDP socket and does the
@@ -77,12 +38,13 @@ int main(int argc, char **argv)
struct sockaddr_xdp sxdp = { 0 };
int num_desc = NUM_DESC;
void *umem_area;
+ int retry = 0;
int ifindex;
int sock_fd;
int queue;
- if (argc != 3) {
- fprintf(stderr, "Usage: %s ifindex queue_id\n", argv[0]);
+ if (argc != 3 && argc != 4) {
+ print_usage(argv[0]);
return 1;
}
@@ -132,11 +94,29 @@ int main(int argc, char **argv)
sxdp.sxdp_queue_id = queue;
sxdp.sxdp_flags = 0;
- if (bind(sock_fd, (struct sockaddr *)&sxdp, sizeof(sxdp)) != 0) {
- munmap(umem_area, UMEM_SZ);
- perror("bind failed");
- close(sock_fd);
- return 1;
+ if (argc > 3) {
+ if (!strcmp(argv[3], "-z")) {
+ sxdp.sxdp_flags = XDP_ZEROCOPY;
+ } else {
+ print_usage(argv[0]);
+ return 1;
+ }
+ }
+
+ while (1) {
+ if (bind(sock_fd, (struct sockaddr *)&sxdp, sizeof(sxdp)) == 0)
+ break;
+
+ if (errno == EBUSY && retry < 3) {
+ retry++;
+ sleep(1);
+ continue;
+ } else {
+ perror("bind failed");
+ munmap(umem_area, UMEM_SZ);
+ close(sock_fd);
+ return 1;
+ }
}
ksft_ready();
diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh
index 4f55477ffe08..7a3cb4c09e45 100755
--- a/tools/testing/selftests/net/mptcp/diag.sh
+++ b/tools/testing/selftests/net/mptcp/diag.sh
@@ -206,9 +206,8 @@ chk_dump_one()
local token
local msg
- ss_token="$(ss -inmHMN $ns | grep 'token:' |\
- head -n 1 |\
- sed 's/.*token:\([0-9a-f]*\).*/\1/')"
+ ss_token="$(ss -inmHMN $ns |
+ mptcp_lib_get_info_value "token" "token")"
token="$(ip netns exec $ns ./mptcp_diag -t $ss_token |\
awk -F':[ \t]+' '/^token/ {print $2}')"
@@ -226,6 +225,37 @@ chk_dump_one()
fi
}
+chk_dump_subflow()
+{
+ local inet_diag_token
+ local subflow_line
+ local ss_output
+ local ss_token
+ local msg
+
+ ss_output=$(ss -tniN $ns)
+
+ subflow_line=$(echo "$ss_output" | \
+ grep -m1 -Eo '[0-9.]+:[0-9].+ +[0-9.]+:[0-9.]+')
+
+ ss_token=$(echo "$ss_output" | grep -m1 -Eo 'token:[^ ]+')
+
+ inet_diag_token=$(ip netns exec $ns ./mptcp_diag -s "$subflow_line" | \
+ grep -Eo 'token:[^ ]+')
+
+ msg="....chk dump_subflow"
+
+ mptcp_lib_print_title "$msg"
+ if [ -n "$ss_token" ] && [ "$ss_token" = "$inet_diag_token" ]; then
+ mptcp_lib_pr_ok
+ mptcp_lib_result_pass "${msg}"
+ else
+ mptcp_lib_pr_fail "expected $ss_token found $inet_diag_token"
+ mptcp_lib_result_fail "${msg}"
+ ret=${KSFT_FAIL}
+ fi
+}
+
msk_info_get_value()
{
local port="${1}"
@@ -317,6 +347,7 @@ chk_msk_fallback_nr 0 "....chk no fallback"
chk_msk_inuse 2
chk_msk_cestab 2
chk_dump_one
+chk_dump_subflow
flush_pids
chk_msk_inuse 0 "2->0"
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index c83a8b47bbdf..ac1349c4b9e5 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -180,13 +180,26 @@ static void xgetnameinfo(const struct sockaddr *addr, socklen_t addrlen,
}
static void xgetaddrinfo(const char *node, const char *service,
- const struct addrinfo *hints,
+ struct addrinfo *hints,
struct addrinfo **res)
{
+again:
int err = getaddrinfo(node, service, hints, res);
if (err) {
- const char *errstr = getxinfo_strerr(err);
+ const char *errstr;
+
+ /* glibc starts to support MPTCP since v2.42.
+ * For older versions, use IPPROTO_TCP to resolve,
+ * and use TCP/MPTCP to create socket.
+ * Link: https://sourceware.org/git/?p=glibc.git;a=commit;h=a8e9022e0f82
+ */
+ if (err == EAI_SOCKTYPE) {
+ hints->ai_protocol = IPPROTO_TCP;
+ goto again;
+ }
+
+ errstr = getxinfo_strerr(err);
fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n",
node ? node : "", service ? service : "", errstr);
@@ -292,7 +305,7 @@ static int sock_listen_mptcp(const char * const listenaddr,
{
int sock = -1;
struct addrinfo hints = {
- .ai_protocol = IPPROTO_TCP,
+ .ai_protocol = IPPROTO_MPTCP,
.ai_socktype = SOCK_STREAM,
.ai_flags = AI_PASSIVE | AI_NUMERICHOST
};
@@ -356,7 +369,7 @@ static int sock_connect_mptcp(const char * const remoteaddr,
int infd, struct wstate *winfo)
{
struct addrinfo hints = {
- .ai_protocol = IPPROTO_TCP,
+ .ai_protocol = IPPROTO_MPTCP,
.ai_socktype = SOCK_STREAM,
};
struct addrinfo *a, *addr;
diff --git a/tools/testing/selftests/net/mptcp/mptcp_diag.c b/tools/testing/selftests/net/mptcp/mptcp_diag.c
index 284286c524cf..e084796e804d 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_diag.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_diag.c
@@ -8,6 +8,7 @@
#include <sys/socket.h>
#include <netinet/in.h>
#include <linux/tcp.h>
+#include <arpa/inet.h>
#include <unistd.h>
#include <stdlib.h>
@@ -19,6 +20,15 @@
#define IPPROTO_MPTCP 262
#endif
+#define parse_rtattr_nested(tb, max, rta) \
+ (parse_rtattr_flags((tb), (max), RTA_DATA(rta), RTA_PAYLOAD(rta), \
+ NLA_F_NESTED))
+
+struct params {
+ __u32 target_token;
+ char subflow_addrs[1024];
+};
+
struct mptcp_info {
__u8 mptcpi_subflows;
__u8 mptcpi_add_addr_signal;
@@ -46,6 +56,37 @@ struct mptcp_info {
__u32 mptcpi_last_ack_recv;
};
+enum {
+ MPTCP_SUBFLOW_ATTR_UNSPEC,
+ MPTCP_SUBFLOW_ATTR_TOKEN_REM,
+ MPTCP_SUBFLOW_ATTR_TOKEN_LOC,
+ MPTCP_SUBFLOW_ATTR_RELWRITE_SEQ,
+ MPTCP_SUBFLOW_ATTR_MAP_SEQ,
+ MPTCP_SUBFLOW_ATTR_MAP_SFSEQ,
+ MPTCP_SUBFLOW_ATTR_SSN_OFFSET,
+ MPTCP_SUBFLOW_ATTR_MAP_DATALEN,
+ MPTCP_SUBFLOW_ATTR_FLAGS,
+ MPTCP_SUBFLOW_ATTR_ID_REM,
+ MPTCP_SUBFLOW_ATTR_ID_LOC,
+ MPTCP_SUBFLOW_ATTR_PAD,
+
+ __MPTCP_SUBFLOW_ATTR_MAX
+};
+
+#define MPTCP_SUBFLOW_ATTR_MAX (__MPTCP_SUBFLOW_ATTR_MAX - 1)
+
+#define MPTCP_SUBFLOW_FLAG_MCAP_REM _BITUL(0)
+#define MPTCP_SUBFLOW_FLAG_MCAP_LOC _BITUL(1)
+#define MPTCP_SUBFLOW_FLAG_JOIN_REM _BITUL(2)
+#define MPTCP_SUBFLOW_FLAG_JOIN_LOC _BITUL(3)
+#define MPTCP_SUBFLOW_FLAG_BKUP_REM _BITUL(4)
+#define MPTCP_SUBFLOW_FLAG_BKUP_LOC _BITUL(5)
+#define MPTCP_SUBFLOW_FLAG_FULLY_ESTABLISHED _BITUL(6)
+#define MPTCP_SUBFLOW_FLAG_CONNECTED _BITUL(7)
+#define MPTCP_SUBFLOW_FLAG_MAPVALID _BITUL(8)
+
+#define rta_getattr(type, value) (*(type *)RTA_DATA(value))
+
static void die_perror(const char *msg)
{
perror(msg);
@@ -54,11 +95,13 @@ static void die_perror(const char *msg)
static void die_usage(int r)
{
- fprintf(stderr, "Usage: mptcp_diag -t\n");
+ fprintf(stderr, "Usage:\n"
+ "mptcp_diag -t <token>\n"
+ "mptcp_diag -s \"<saddr>:<sport> <daddr>:<dport>\"\n");
exit(r);
}
-static void send_query(int fd, __u32 token)
+static void send_query(int fd, struct inet_diag_req_v2 *r, __u32 proto)
{
struct sockaddr_nl nladdr = {
.nl_family = AF_NETLINK
@@ -72,31 +115,26 @@ static void send_query(int fd, __u32 token)
.nlmsg_type = SOCK_DIAG_BY_FAMILY,
.nlmsg_flags = NLM_F_REQUEST
},
- .r = {
- .sdiag_family = AF_INET,
- /* Real proto is set via INET_DIAG_REQ_PROTOCOL */
- .sdiag_protocol = IPPROTO_TCP,
- .id.idiag_cookie[0] = token,
- }
+ .r = *r
};
struct rtattr rta_proto;
struct iovec iov[6];
- int iovlen = 1;
- __u32 proto;
-
- req.r.idiag_ext |= (1 << (INET_DIAG_INFO - 1));
- proto = IPPROTO_MPTCP;
- rta_proto.rta_type = INET_DIAG_REQ_PROTOCOL;
- rta_proto.rta_len = RTA_LENGTH(sizeof(proto));
+ int iovlen = 0;
- iov[0] = (struct iovec) {
+ iov[iovlen++] = (struct iovec) {
.iov_base = &req,
.iov_len = sizeof(req)
};
- iov[iovlen] = (struct iovec){ &rta_proto, sizeof(rta_proto)};
- iov[iovlen + 1] = (struct iovec){ &proto, sizeof(proto)};
- req.nlh.nlmsg_len += RTA_LENGTH(sizeof(proto));
- iovlen += 2;
+
+ if (proto == IPPROTO_MPTCP) {
+ rta_proto.rta_type = INET_DIAG_REQ_PROTOCOL;
+ rta_proto.rta_len = RTA_LENGTH(sizeof(proto));
+
+ iov[iovlen++] = (struct iovec){ &rta_proto, sizeof(rta_proto)};
+ iov[iovlen++] = (struct iovec){ &proto, sizeof(proto)};
+ req.nlh.nlmsg_len += RTA_LENGTH(sizeof(proto));
+ }
+
struct msghdr msg = {
.msg_name = &nladdr,
.msg_namelen = sizeof(nladdr),
@@ -160,7 +198,67 @@ static void print_info_msg(struct mptcp_info *info)
printf("bytes_acked: %llu\n", info->mptcpi_bytes_acked);
}
-static void parse_nlmsg(struct nlmsghdr *nlh)
+/*
+ * 'print_subflow_info' is from 'mptcp_subflow_info'
+ * which is a function in 'misc/ss.c' of iproute2.
+ */
+static void print_subflow_info(struct rtattr *tb[])
+{
+ u_int32_t flags = 0;
+
+ printf("It's a mptcp subflow, the subflow info:\n");
+ if (tb[MPTCP_SUBFLOW_ATTR_FLAGS]) {
+ char caps[32 + 1] = { 0 }, *cap = &caps[0];
+
+ flags = rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_FLAGS]);
+
+ if (flags & MPTCP_SUBFLOW_FLAG_MCAP_REM)
+ *cap++ = 'M';
+ if (flags & MPTCP_SUBFLOW_FLAG_MCAP_LOC)
+ *cap++ = 'm';
+ if (flags & MPTCP_SUBFLOW_FLAG_JOIN_REM)
+ *cap++ = 'J';
+ if (flags & MPTCP_SUBFLOW_FLAG_JOIN_LOC)
+ *cap++ = 'j';
+ if (flags & MPTCP_SUBFLOW_FLAG_BKUP_REM)
+ *cap++ = 'B';
+ if (flags & MPTCP_SUBFLOW_FLAG_BKUP_LOC)
+ *cap++ = 'b';
+ if (flags & MPTCP_SUBFLOW_FLAG_FULLY_ESTABLISHED)
+ *cap++ = 'e';
+ if (flags & MPTCP_SUBFLOW_FLAG_CONNECTED)
+ *cap++ = 'c';
+ if (flags & MPTCP_SUBFLOW_FLAG_MAPVALID)
+ *cap++ = 'v';
+
+ if (flags)
+ printf(" flags:%s", caps);
+ }
+ if (tb[MPTCP_SUBFLOW_ATTR_TOKEN_REM] &&
+ tb[MPTCP_SUBFLOW_ATTR_TOKEN_LOC] &&
+ tb[MPTCP_SUBFLOW_ATTR_ID_REM] &&
+ tb[MPTCP_SUBFLOW_ATTR_ID_LOC])
+ printf(" token:%04x(id:%u)/%04x(id:%u)",
+ rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_TOKEN_REM]),
+ rta_getattr(__u8, tb[MPTCP_SUBFLOW_ATTR_ID_REM]),
+ rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_TOKEN_LOC]),
+ rta_getattr(__u8, tb[MPTCP_SUBFLOW_ATTR_ID_LOC]));
+ if (tb[MPTCP_SUBFLOW_ATTR_MAP_SEQ])
+ printf(" seq:%llu",
+ rta_getattr(__u64, tb[MPTCP_SUBFLOW_ATTR_MAP_SEQ]));
+ if (tb[MPTCP_SUBFLOW_ATTR_MAP_SFSEQ])
+ printf(" sfseq:%u",
+ rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_MAP_SFSEQ]));
+ if (tb[MPTCP_SUBFLOW_ATTR_SSN_OFFSET])
+ printf(" ssnoff:%u",
+ rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_SSN_OFFSET]));
+ if (tb[MPTCP_SUBFLOW_ATTR_MAP_DATALEN])
+ printf(" maplen:%u",
+ rta_getattr(__u32, tb[MPTCP_SUBFLOW_ATTR_MAP_DATALEN]));
+ printf("\n");
+}
+
+static void parse_nlmsg(struct nlmsghdr *nlh, __u32 proto)
{
struct inet_diag_msg *r = NLMSG_DATA(nlh);
struct rtattr *tb[INET_DIAG_MAX + 1];
@@ -169,7 +267,7 @@ static void parse_nlmsg(struct nlmsghdr *nlh)
nlh->nlmsg_len - NLMSG_LENGTH(sizeof(*r)),
NLA_F_NESTED);
- if (tb[INET_DIAG_INFO]) {
+ if (proto == IPPROTO_MPTCP && tb[INET_DIAG_INFO]) {
int len = RTA_PAYLOAD(tb[INET_DIAG_INFO]);
struct mptcp_info *info;
@@ -183,11 +281,28 @@ static void parse_nlmsg(struct nlmsghdr *nlh)
}
print_info_msg(info);
}
+ if (proto == IPPROTO_TCP && tb[INET_DIAG_ULP_INFO]) {
+ struct rtattr *ulpinfo[INET_ULP_INFO_MAX + 1] = { 0 };
+
+ parse_rtattr_nested(ulpinfo, INET_ULP_INFO_MAX,
+ tb[INET_DIAG_ULP_INFO]);
+
+ if (ulpinfo[INET_ULP_INFO_MPTCP]) {
+ struct rtattr *sfinfo[MPTCP_SUBFLOW_ATTR_MAX + 1] = { 0 };
+
+ parse_rtattr_nested(sfinfo, MPTCP_SUBFLOW_ATTR_MAX,
+ ulpinfo[INET_ULP_INFO_MPTCP]);
+ print_subflow_info(sfinfo);
+ } else {
+ printf("It's a normal TCP!\n");
+ }
+ }
}
-static void recv_nlmsg(int fd, struct nlmsghdr *nlh)
+static void recv_nlmsg(int fd, __u32 proto)
{
char rcv_buff[8192];
+ struct nlmsghdr *nlh = (struct nlmsghdr *)rcv_buff;
struct sockaddr_nl rcv_nladdr = {
.nl_family = AF_NETLINK
};
@@ -204,7 +319,6 @@ static void recv_nlmsg(int fd, struct nlmsghdr *nlh)
int len;
len = recvmsg(fd, &rcv_msg, 0);
- nlh = (struct nlmsghdr *)rcv_buff;
while (NLMSG_OK(nlh, len)) {
if (nlh->nlmsg_type == NLMSG_DONE) {
@@ -218,40 +332,84 @@ static void recv_nlmsg(int fd, struct nlmsghdr *nlh)
-(err->error), strerror(-(err->error)));
break;
}
- parse_nlmsg(nlh);
+ parse_nlmsg(nlh, proto);
nlh = NLMSG_NEXT(nlh, len);
}
}
static void get_mptcpinfo(__u32 token)
{
- struct nlmsghdr *nlh = NULL;
+ struct inet_diag_req_v2 r = {
+ .sdiag_family = AF_INET,
+ /* Real proto is set via INET_DIAG_REQ_PROTOCOL */
+ .sdiag_protocol = IPPROTO_TCP,
+ .idiag_ext = 1 << (INET_DIAG_INFO - 1),
+ .id.idiag_cookie[0] = token,
+ };
+ __u32 proto = IPPROTO_MPTCP;
int fd;
fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG);
if (fd < 0)
die_perror("Netlink socket");
- send_query(fd, token);
- recv_nlmsg(fd, nlh);
+ send_query(fd, &r, proto);
+ recv_nlmsg(fd, proto);
close(fd);
}
-static void parse_opts(int argc, char **argv, __u32 *target_token)
+static void get_subflow_info(char *subflow_addrs)
+{
+ struct inet_diag_req_v2 r = {
+ .sdiag_family = AF_INET,
+ .sdiag_protocol = IPPROTO_TCP,
+ .idiag_ext = 1 << (INET_DIAG_INFO - 1),
+ .id.idiag_cookie[0] = INET_DIAG_NOCOOKIE,
+ .id.idiag_cookie[1] = INET_DIAG_NOCOOKIE,
+ };
+ char saddr[64], daddr[64];
+ int sport, dport;
+ int ret;
+ int fd;
+
+ ret = sscanf(subflow_addrs, "%[^:]:%d %[^:]:%d", saddr, &sport, daddr, &dport);
+ if (ret != 4)
+ die_perror("IP PORT Pairs has style problems!");
+
+ printf("%s:%d -> %s:%d\n", saddr, sport, daddr, dport);
+
+ fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_SOCK_DIAG);
+ if (fd < 0)
+ die_perror("Netlink socket");
+
+ r.id.idiag_sport = htons(sport);
+ r.id.idiag_dport = htons(dport);
+
+ inet_pton(AF_INET, saddr, &r.id.idiag_src);
+ inet_pton(AF_INET, daddr, &r.id.idiag_dst);
+ send_query(fd, &r, IPPROTO_TCP);
+ recv_nlmsg(fd, IPPROTO_TCP);
+}
+
+static void parse_opts(int argc, char **argv, struct params *p)
{
int c;
if (argc < 2)
die_usage(1);
- while ((c = getopt(argc, argv, "ht:")) != -1) {
+ while ((c = getopt(argc, argv, "ht:s:")) != -1) {
switch (c) {
case 'h':
die_usage(0);
break;
case 't':
- sscanf(optarg, "%x", target_token);
+ sscanf(optarg, "%x", &p->target_token);
+ break;
+ case 's':
+ strncpy(p->subflow_addrs, optarg,
+ sizeof(p->subflow_addrs) - 1);
break;
default:
die_usage(1);
@@ -262,10 +420,15 @@ static void parse_opts(int argc, char **argv, __u32 *target_token)
int main(int argc, char *argv[])
{
- __u32 target_token;
+ struct params p = { 0 };
+
+ parse_opts(argc, argv, &p);
+
+ if (p.target_token)
+ get_mptcpinfo(p.target_token);
- parse_opts(argc, argv, &target_token);
- get_mptcpinfo(target_token);
+ if (p.subflow_addrs[0] != '\0')
+ get_subflow_info(p.subflow_addrs);
return 0;
}
diff --git a/tools/testing/selftests/net/mptcp/mptcp_inq.c b/tools/testing/selftests/net/mptcp/mptcp_inq.c
index 218aac467321..3cf1e2a612ce 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_inq.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_inq.c
@@ -72,13 +72,21 @@ static const char *getxinfo_strerr(int err)
}
static void xgetaddrinfo(const char *node, const char *service,
- const struct addrinfo *hints,
+ struct addrinfo *hints,
struct addrinfo **res)
{
+again:
int err = getaddrinfo(node, service, hints, res);
if (err) {
- const char *errstr = getxinfo_strerr(err);
+ const char *errstr;
+
+ if (err == EAI_SOCKTYPE) {
+ hints->ai_protocol = IPPROTO_TCP;
+ goto again;
+ }
+
+ errstr = getxinfo_strerr(err);
fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n",
node ? node : "", service ? service : "", errstr);
@@ -91,7 +99,7 @@ static int sock_listen_mptcp(const char * const listenaddr,
{
int sock = -1;
struct addrinfo hints = {
- .ai_protocol = IPPROTO_TCP,
+ .ai_protocol = IPPROTO_MPTCP,
.ai_socktype = SOCK_STREAM,
.ai_flags = AI_PASSIVE | AI_NUMERICHOST
};
@@ -136,7 +144,7 @@ static int sock_connect_mptcp(const char * const remoteaddr,
const char * const port, int proto)
{
struct addrinfo hints = {
- .ai_protocol = IPPROTO_TCP,
+ .ai_protocol = IPPROTO_MPTCP,
.ai_socktype = SOCK_STREAM,
};
struct addrinfo *a, *addr;
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index befa66f5a366..b8af65373b3a 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -62,6 +62,7 @@ unset sflags
unset fastclose
unset fullmesh
unset speed
+unset join_syn_rej
unset join_csum_ns1
unset join_csum_ns2
unset join_fail_nr
@@ -1403,6 +1404,7 @@ chk_join_nr()
local syn_nr=$1
local syn_ack_nr=$2
local ack_nr=$3
+ local syn_rej=${join_syn_rej:-0}
local csum_ns1=${join_csum_ns1:-0}
local csum_ns2=${join_csum_ns2:-0}
local fail_nr=${join_fail_nr:-0}
@@ -1468,6 +1470,15 @@ chk_join_nr()
fail_test "got $count JOIN[s] ack HMAC failure expected 0"
fi
+ count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinRejected")
+ if [ -z "$count" ]; then
+ rc=${KSFT_SKIP}
+ elif [ "$count" != "$syn_rej" ]; then
+ rc=${KSFT_FAIL}
+ print_check "syn rejected"
+ fail_test "got $count JOIN[s] syn rejected expected $syn_rej"
+ fi
+
print_results "join Rx" ${rc}
join_syn_tx="${join_syn_tx:-${syn_nr}}" \
@@ -1963,7 +1974,8 @@ subflows_tests()
pm_nl_set_limits $ns2 0 1
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 1 1 0
+ join_syn_rej=1 \
+ chk_join_nr 1 1 0
fi
# subflow
@@ -1992,7 +2004,8 @@ subflows_tests()
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 2 2 1
+ join_syn_rej=1 \
+ chk_join_nr 2 2 1
fi
# single subflow, dev
@@ -3061,7 +3074,8 @@ syncookies_tests()
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
pm_nl_add_endpoint $ns2 10.0.2.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 2 1 1
+ join_syn_rej=1 \
+ chk_join_nr 2 1 1
fi
# test signal address with cookies
@@ -3545,7 +3559,8 @@ userspace_tests()
pm_nl_set_limits $ns2 1 1
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 1 1 0
+ join_syn_rej=1 \
+ chk_join_nr 1 1 0
fi
# userspace pm type does not send join
@@ -3568,7 +3583,8 @@ userspace_tests()
pm_nl_add_endpoint $ns2 10.0.3.2 flags subflow
sflags=backup speed=slow \
run_tests $ns1 $ns2 10.0.1.1
- chk_join_nr 1 1 0
+ join_syn_rej=1 \
+ chk_join_nr 1 1 0
chk_prio_nr 0 0 0 0
fi
diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
index 051e289d7967..99c87cd6e255 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
@@ -331,12 +331,15 @@ mptcp_lib_result_print_all_tap() {
# get the value of keyword $1 in the line marked by keyword $2
mptcp_lib_get_info_value() {
- grep "${2}" | sed -n 's/.*\('"${1}"':\)\([0-9a-f:.]*\).*$/\2/p;q'
+ grep "${2}" 2>/dev/null |
+ sed -n 's/.*\('"${1}"':\)\([0-9a-f:.]*\).*$/\2/p;q'
+ # the ';q' at the end limits to the first matched entry.
}
# $1: info name ; $2: evts_ns ; [$3: event type; [$4: addr]]
mptcp_lib_evts_get_info() {
- grep "${4:-}" "${2}" | mptcp_lib_get_info_value "${1}" "^type:${3:-1},"
+ grep "${4:-}" "${2}" 2>/dev/null |
+ mptcp_lib_get_info_value "${1}" "^type:${3:-1},"
}
# $1: PID
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
index 926b0be87c99..9934a68df237 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.c
@@ -159,13 +159,21 @@ static const char *getxinfo_strerr(int err)
}
static void xgetaddrinfo(const char *node, const char *service,
- const struct addrinfo *hints,
+ struct addrinfo *hints,
struct addrinfo **res)
{
+again:
int err = getaddrinfo(node, service, hints, res);
if (err) {
- const char *errstr = getxinfo_strerr(err);
+ const char *errstr;
+
+ if (err == EAI_SOCKTYPE) {
+ hints->ai_protocol = IPPROTO_TCP;
+ goto again;
+ }
+
+ errstr = getxinfo_strerr(err);
fprintf(stderr, "Fatal: getaddrinfo(%s:%s): %s\n",
node ? node : "", service ? service : "", errstr);
@@ -178,7 +186,7 @@ static int sock_listen_mptcp(const char * const listenaddr,
{
int sock = -1;
struct addrinfo hints = {
- .ai_protocol = IPPROTO_TCP,
+ .ai_protocol = IPPROTO_MPTCP,
.ai_socktype = SOCK_STREAM,
.ai_flags = AI_PASSIVE | AI_NUMERICHOST
};
@@ -223,7 +231,7 @@ static int sock_connect_mptcp(const char * const remoteaddr,
const char * const port, int proto)
{
struct addrinfo hints = {
- .ai_protocol = IPPROTO_TCP,
+ .ai_protocol = IPPROTO_MPTCP,
.ai_socktype = SOCK_STREAM,
};
struct addrinfo *a, *addr;
diff --git a/tools/testing/selftests/net/netfilter/Makefile b/tools/testing/selftests/net/netfilter/Makefile
index ffe161fac8b5..3bdcbbdba925 100644
--- a/tools/testing/selftests/net/netfilter/Makefile
+++ b/tools/testing/selftests/net/netfilter/Makefile
@@ -12,6 +12,7 @@ TEST_PROGS += conntrack_dump_flush.sh
TEST_PROGS += conntrack_icmp_related.sh
TEST_PROGS += conntrack_ipip_mtu.sh
TEST_PROGS += conntrack_tcp_unreplied.sh
+TEST_PROGS += conntrack_resize.sh
TEST_PROGS += conntrack_sctp_collision.sh
TEST_PROGS += conntrack_vrf.sh
TEST_PROGS += conntrack_reverse_clash.sh
diff --git a/tools/testing/selftests/net/netfilter/config b/tools/testing/selftests/net/netfilter/config
index 43d8b500d391..363646f4fefe 100644
--- a/tools/testing/selftests/net/netfilter/config
+++ b/tools/testing/selftests/net/netfilter/config
@@ -46,6 +46,7 @@ CONFIG_NETFILTER_XT_MATCH_STATE=m
CONFIG_NETFILTER_XT_MATCH_STRING=m
CONFIG_NETFILTER_XT_TARGET_REDIRECT=m
CONFIG_NF_CONNTRACK=m
+CONFIG_NF_CONNTRACK_PROCFS=y
CONFIG_NF_CONNTRACK_EVENTS=y
CONFIG_NF_CONNTRACK_FTP=m
CONFIG_NF_CONNTRACK_MARK=y
diff --git a/tools/testing/selftests/net/netfilter/conntrack_resize.sh b/tools/testing/selftests/net/netfilter/conntrack_resize.sh
new file mode 100755
index 000000000000..aabc7c51181e
--- /dev/null
+++ b/tools/testing/selftests/net/netfilter/conntrack_resize.sh
@@ -0,0 +1,406 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+
+checktool "conntrack --version" "run test without conntrack"
+checktool "nft --version" "run test without nft tool"
+
+init_net_max=0
+ct_buckets=0
+tmpfile=""
+ret=0
+
+modprobe -q nf_conntrack
+if ! sysctl -q net.netfilter.nf_conntrack_max >/dev/null;then
+ echo "SKIP: conntrack sysctls not available"
+ exit $KSFT_SKIP
+fi
+
+init_net_max=$(sysctl -n net.netfilter.nf_conntrack_max) || exit 1
+ct_buckets=$(sysctl -n net.netfilter.nf_conntrack_buckets) || exit 1
+
+cleanup() {
+ cleanup_all_ns
+
+ rm -f "$tmpfile"
+
+ # restore original sysctl setting
+ sysctl -q net.netfilter.nf_conntrack_max=$init_net_max
+ sysctl -q net.netfilter.nf_conntrack_buckets=$ct_buckets
+}
+trap cleanup EXIT
+
+check_max_alias()
+{
+ local expected="$1"
+ # old name, expected to alias to the first, i.e. changing one
+ # changes the other as well.
+ local lv=$(sysctl -n net.nf_conntrack_max)
+
+ if [ $expected -ne "$lv" ];then
+ echo "nf_conntrack_max sysctls should have identical values"
+ exit 1
+ fi
+}
+
+insert_ctnetlink() {
+ local ns="$1"
+ local count="$2"
+ local i=0
+ local bulk=16
+
+ while [ $i -lt $count ] ;do
+ ip netns exec "$ns" bash -c "for i in \$(seq 1 $bulk); do \
+ if ! conntrack -I -s \$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%255+1)) \
+ -d \$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%256)).\$((\$RANDOM%255+1)) \
+ --protonum 17 --timeout 120 --status ASSURED,SEEN_REPLY --sport \$RANDOM --dport 53; then \
+ return;\
+ fi & \
+ done ; wait" 2>/dev/null
+
+ i=$((i+bulk))
+ done
+}
+
+check_ctcount() {
+ local ns="$1"
+ local count="$2"
+ local msg="$3"
+
+ local now=$(ip netns exec "$ns" conntrack -C)
+
+ if [ $now -ne "$count" ] ;then
+ echo "expected $count entries in $ns, not $now: $msg"
+ exit 1
+ fi
+
+ echo "PASS: got $count connections: $msg"
+}
+
+ctresize() {
+ local duration="$1"
+ local now=$(date +%s)
+ local end=$((now + duration))
+
+ while [ $now -lt $end ]; do
+ sysctl -q net.netfilter.nf_conntrack_buckets=$RANDOM
+ now=$(date +%s)
+ done
+}
+
+do_rsleep() {
+ local limit="$1"
+ local r=$RANDOM
+
+ r=$((r%limit))
+ sleep "$r"
+}
+
+ct_flush_once() {
+ local ns="$1"
+
+ ip netns exec "$ns" conntrack -F 2>/dev/null
+}
+
+ctflush() {
+ local ns="$1"
+ local duration="$2"
+ local now=$(date +%s)
+ local end=$((now + duration))
+
+ do_rsleep "$duration"
+
+ while [ $now -lt $end ]; do
+ ct_flush_once "$ns"
+ do_rsleep "$duration"
+ now=$(date +%s)
+ done
+}
+
+ctflood()
+{
+ local ns="$1"
+ local duration="$2"
+ local msg="$3"
+ local now=$(date +%s)
+ local end=$((now + duration))
+ local j=0
+ local k=0
+
+ while [ $now -lt $end ]; do
+ j=$((j%256))
+ k=$((k%256))
+
+ ip netns exec "$ns" bash -c \
+ "j=$j k=$k; for i in \$(seq 1 254); do ping -q -c 1 127.\$k.\$j.\$i & done; wait" >/dev/null 2>&1
+
+ j=$((j+1))
+
+ if [ $j -eq 256 ];then
+ k=$((k+1))
+ fi
+
+ now=$(date +%s)
+ done
+
+ wait
+}
+
+# dump to /dev/null. We don't want dumps to cause infinite loops
+# or use-after-free even when conntrack table is altered while dumps
+# are in progress.
+ct_nulldump()
+{
+ local ns="$1"
+
+ ip netns exec "$ns" conntrack -L > /dev/null 2>&1 &
+
+ # Don't require /proc support in conntrack
+ if [ -r /proc/self/net/nf_conntrack ] ; then
+ ip netns exec "$ns" bash -c "wc -l < /proc/self/net/nf_conntrack" > /dev/null &
+ fi
+
+ wait
+}
+
+check_taint()
+{
+ local tainted_then="$1"
+ local msg="$2"
+
+ local tainted_now=0
+
+ if [ "$tainted_then" -ne 0 ];then
+ return
+ fi
+
+ read tainted_now < /proc/sys/kernel/tainted
+
+ if [ "$tainted_now" -eq 0 ];then
+ echo "PASS: $msg"
+ else
+ echo "TAINT: $msg"
+ dmesg
+ exit 1
+ fi
+}
+
+insert_flood()
+{
+ local n="$1"
+ local r=0
+
+ r=$((RANDOM%2000))
+
+ ctflood "$n" "$timeout" "floodresize" &
+ insert_ctnetlink "$n" "$r" &
+ ctflush "$n" "$timeout" &
+ ct_nulldump "$n" &
+
+ wait
+}
+
+test_floodresize_all()
+{
+ local timeout=20
+ local n=""
+ local tainted_then=""
+
+ read tainted_then < /proc/sys/kernel/tainted
+
+ for n in "$nsclient1" "$nsclient2";do
+ insert_flood "$n" &
+ done
+
+ # resize table constantly while flood/insert/dump/flushs
+ # are happening in parallel.
+ ctresize "$timeout"
+
+ # wait for subshells to complete, everything is limited
+ # by $timeout.
+ wait
+
+ check_taint "$tainted_then" "resize+flood"
+}
+
+check_dump()
+{
+ local ns="$1"
+ local protoname="$2"
+ local c=0
+ local proto=0
+ local proc=0
+ local unique=""
+
+ c=$(ip netns exec "$ns" conntrack -C)
+
+ # NOTE: assumes timeouts are large enough to not have
+ # expirations in all following tests.
+ l=$(ip netns exec "$ns" conntrack -L 2>/dev/null | tee "$tmpfile" | wc -l)
+
+ if [ "$c" -ne "$l" ]; then
+ echo "FAIL: count inconsistency for $ns: $c != $l"
+ ret=1
+ fi
+
+ # check the dump we retrieved is free of duplicated entries.
+ unique=$(sort "$tmpfile" | uniq | wc -l)
+ if [ "$l" -ne "$unique" ]; then
+ echo "FAIL: count identical but listing contained redundant entries: $l != $unique"
+ ret=1
+ fi
+
+ # we either inserted icmp or only udp, hence, --proto should return same entry count as without filter.
+ proto=$(ip netns exec "$ns" conntrack -L --proto $protoname 2>/dev/null | wc -l)
+ if [ "$l" -ne "$proto" ]; then
+ echo "FAIL: dump inconsistency for $ns: $l != $proto"
+ ret=1
+ fi
+
+ if [ -r /proc/self/net/nf_conntrack ] ; then
+ proc=$(ip netns exec "$ns" bash -c "wc -l < /proc/self/net/nf_conntrack")
+
+ if [ "$l" -ne "$proc" ]; then
+ echo "FAIL: proc inconsistency for $ns: $l != $proc"
+ ret=1
+ fi
+
+ proc=$(ip netns exec "$ns" bash -c "sort < /proc/self/net/nf_conntrack | uniq | wc -l")
+
+ if [ "$l" -ne "$proc" ]; then
+ echo "FAIL: proc inconsistency after uniq filter for $ns: $l != $proc"
+ ret=1
+ fi
+ fi
+
+ echo "PASS: dump in netns had same entry count (-C $c, -L $l, -p $proto, /proc $proc)"
+}
+
+test_dump_all()
+{
+ local timeout=3
+ local tainted_then=""
+
+ read tainted_then < /proc/sys/kernel/tainted
+
+ ct_flush_once "$nsclient1"
+ ct_flush_once "$nsclient2"
+
+ ctflood "$nsclient1" $timeout "dumpall" &
+ insert_ctnetlink "$nsclient2" 2000
+
+ wait
+
+ check_dump "$nsclient1" "icmp"
+ check_dump "$nsclient2" "udp"
+
+ check_taint "$tainted_then" "test parallel conntrack dumps"
+}
+
+check_sysctl_immutable()
+{
+ local ns="$1"
+ local name="$2"
+ local failhard="$3"
+ local o=0
+ local n=0
+
+ o=$(ip netns exec "$ns" sysctl -n "$name" 2>/dev/null)
+ n=$((o+1))
+
+ # return value isn't reliable, need to read it back
+ ip netns exec "$ns" sysctl -q "$name"=$n 2>/dev/null >/dev/null
+
+ n=$(ip netns exec "$ns" sysctl -n "$name" 2>/dev/null)
+
+ [ -z "$n" ] && return 1
+
+ if [ $o -ne $n ]; then
+ if [ $failhard -gt 0 ] ;then
+ echo "FAIL: net.$name should not be changeable from namespace (now $n)"
+ ret=1
+ fi
+ return 0
+ fi
+
+ return 1
+}
+
+test_conntrack_max_limit()
+{
+ sysctl -q net.netfilter.nf_conntrack_max=100
+ insert_ctnetlink "$nsclient1" 101
+
+ # check netns is clamped by init_net, i.e., either netns follows
+ # init_net value, or a higher pernet limit (compared to init_net) is ignored.
+ check_ctcount "$nsclient1" 100 "netns conntrack_max is init_net bound"
+
+ sysctl -q net.netfilter.nf_conntrack_max=$init_net_max
+}
+
+test_conntrack_disable()
+{
+ local timeout=2
+
+ # disable conntrack pickups
+ ip netns exec "$nsclient1" nft flush table ip test_ct
+
+ ct_flush_once "$nsclient1"
+ ct_flush_once "$nsclient2"
+
+ ctflood "$nsclient1" "$timeout" "conntrack disable"
+ ip netns exec "$nsclient2" ping -q -c 1 127.0.0.1 >/dev/null 2>&1
+
+ # Disabled, should not have picked up any connection.
+ check_ctcount "$nsclient1" 0 "conntrack disabled"
+
+ # This one is still active, expect 1 connection.
+ check_ctcount "$nsclient2" 1 "conntrack enabled"
+}
+
+init_net_max=$(sysctl -n net.netfilter.nf_conntrack_max)
+
+check_max_alias $init_net_max
+
+sysctl -q net.netfilter.nf_conntrack_max="262000"
+check_max_alias 262000
+
+setup_ns nsclient1 nsclient2
+
+# check this only works from init_net
+for n in netfilter.nf_conntrack_buckets netfilter.nf_conntrack_expect_max net.nf_conntrack_max;do
+ check_sysctl_immutable "$nsclient1" "net.$n" 1
+done
+
+# won't work on older kernels. If it works, check that the netns obeys the limit
+if check_sysctl_immutable "$nsclient1" net.netfilter.nf_conntrack_max 0;then
+ # subtest: if pernet is changeable, check that reducing it in pernet
+ # limits the pernet entries. Inverse, pernet clamped by a lower init_net
+ # setting, is already checked by "test_conntrack_max_limit" test.
+
+ ip netns exec "$nsclient1" sysctl -q net.netfilter.nf_conntrack_max=1
+ insert_ctnetlink "$nsclient1" 2
+ check_ctcount "$nsclient1" 1 "netns conntrack_max is pernet bound"
+ ip netns exec "$nsclient1" sysctl -q net.netfilter.nf_conntrack_max=$init_net_max
+fi
+
+for n in "$nsclient1" "$nsclient2";do
+# enable conntrack in both namespaces
+ip netns exec "$n" nft -f - <<EOF
+table ip test_ct {
+ chain input {
+ type filter hook input priority 0
+ ct state new counter
+ }
+}
+EOF
+done
+
+tmpfile=$(mktemp)
+test_conntrack_max_limit
+test_dump_all
+test_floodresize_all
+test_conntrack_disable
+
+exit $ret
diff --git a/tools/testing/selftests/net/netfilter/nft_fib.sh b/tools/testing/selftests/net/netfilter/nft_fib.sh
index ce1451c275fd..ea47dd246a08 100755
--- a/tools/testing/selftests/net/netfilter/nft_fib.sh
+++ b/tools/testing/selftests/net/netfilter/nft_fib.sh
@@ -45,6 +45,19 @@ table inet filter {
EOF
}
+load_input_ruleset() {
+ local netns=$1
+
+ip netns exec "$netns" nft -f /dev/stdin <<EOF
+table inet filter {
+ chain input {
+ type filter hook input priority 0; policy accept;
+ fib saddr . iif oif missing counter log prefix "$netns nft_rpfilter: " drop
+ }
+}
+EOF
+}
+
load_pbr_ruleset() {
local netns=$1
@@ -165,6 +178,16 @@ check_drops || exit 1
echo "PASS: fib expression did not cause unwanted packet drops"
+load_input_ruleset "$ns1"
+
+test_ping 127.0.0.1 ::1 || exit 1
+check_drops || exit 1
+
+test_ping 10.0.1.99 dead:1::99 || exit 1
+check_drops || exit 1
+
+echo "PASS: fib expression did not discard loopback packets"
+
ip netns exec "$nsrouter" nft flush table inet filter
ip -net "$ns1" route del default
diff --git a/tools/testing/selftests/net/ovpn/.gitignore b/tools/testing/selftests/net/ovpn/.gitignore
new file mode 100644
index 000000000000..ee44c081ca7c
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/.gitignore
@@ -0,0 +1,2 @@
+# SPDX-License-Identifier: GPL-2.0+
+ovpn-cli
diff --git a/tools/testing/selftests/net/ovpn/Makefile b/tools/testing/selftests/net/ovpn/Makefile
new file mode 100644
index 000000000000..2d102878cb6d
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/Makefile
@@ -0,0 +1,31 @@
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2020-2025 OpenVPN, Inc.
+#
+CFLAGS = -pedantic -Wextra -Wall -Wl,--no-as-needed -g -O0 -ggdb $(KHDR_INCLUDES)
+VAR_CFLAGS = $(shell pkg-config --cflags libnl-3.0 libnl-genl-3.0 2>/dev/null)
+ifeq ($(VAR_CFLAGS),)
+VAR_CFLAGS = -I/usr/include/libnl3
+endif
+CFLAGS += $(VAR_CFLAGS)
+
+
+LDLIBS = -lmbedtls -lmbedcrypto
+VAR_LDLIBS = $(shell pkg-config --libs libnl-3.0 libnl-genl-3.0 2>/dev/null)
+ifeq ($(VAR_LDLIBS),)
+VAR_LDLIBS = -lnl-genl-3 -lnl-3
+endif
+LDLIBS += $(VAR_LDLIBS)
+
+
+TEST_FILES = common.sh
+
+TEST_PROGS = test.sh \
+ test-chachapoly.sh \
+ test-tcp.sh \
+ test-float.sh \
+ test-close-socket.sh \
+ test-close-socket-tcp.sh
+
+TEST_GEN_FILES := ovpn-cli
+
+include ../../lib.mk
diff --git a/tools/testing/selftests/net/ovpn/common.sh b/tools/testing/selftests/net/ovpn/common.sh
new file mode 100644
index 000000000000..7502292a1ee0
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/common.sh
@@ -0,0 +1,92 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2020-2025 OpenVPN, Inc.
+#
+# Author: Antonio Quartulli <antonio@openvpn.net>
+
+UDP_PEERS_FILE=${UDP_PEERS_FILE:-udp_peers.txt}
+TCP_PEERS_FILE=${TCP_PEERS_FILE:-tcp_peers.txt}
+OVPN_CLI=${OVPN_CLI:-./ovpn-cli}
+ALG=${ALG:-aes}
+PROTO=${PROTO:-UDP}
+FLOAT=${FLOAT:-0}
+
+create_ns() {
+ ip netns add peer${1}
+}
+
+setup_ns() {
+ MODE="P2P"
+
+ if [ ${1} -eq 0 ]; then
+ MODE="MP"
+ for p in $(seq 1 ${NUM_PEERS}); do
+ ip link add veth${p} netns peer0 type veth peer name veth${p} netns peer${p}
+
+ ip -n peer0 addr add 10.10.${p}.1/24 dev veth${p}
+ ip -n peer0 link set veth${p} up
+
+ ip -n peer${p} addr add 10.10.${p}.2/24 dev veth${p}
+ ip -n peer${p} link set veth${p} up
+ done
+ fi
+
+ ip netns exec peer${1} ${OVPN_CLI} new_iface tun${1} $MODE
+ ip -n peer${1} addr add ${2} dev tun${1}
+ ip -n peer${1} link set tun${1} up
+}
+
+add_peer() {
+ if [ "${PROTO}" == "UDP" ]; then
+ if [ ${1} -eq 0 ]; then
+ ip netns exec peer0 ${OVPN_CLI} new_multi_peer tun0 1 ${UDP_PEERS_FILE}
+
+ for p in $(seq 1 ${NUM_PEERS}); do
+ ip netns exec peer0 ${OVPN_CLI} new_key tun0 ${p} 1 0 ${ALG} 0 \
+ data64.key
+ done
+ else
+ ip netns exec peer${1} ${OVPN_CLI} new_peer tun${1} ${1} 1 10.10.${1}.1 1
+ ip netns exec peer${1} ${OVPN_CLI} new_key tun${1} ${1} 1 0 ${ALG} 1 \
+ data64.key
+ fi
+ else
+ if [ ${1} -eq 0 ]; then
+ (ip netns exec peer0 ${OVPN_CLI} listen tun0 1 ${TCP_PEERS_FILE} && {
+ for p in $(seq 1 ${NUM_PEERS}); do
+ ip netns exec peer0 ${OVPN_CLI} new_key tun0 ${p} 1 0 \
+ ${ALG} 0 data64.key
+ done
+ }) &
+ sleep 5
+ else
+ ip netns exec peer${1} ${OVPN_CLI} connect tun${1} ${1} 10.10.${1}.1 1 \
+ data64.key
+ fi
+ fi
+}
+
+cleanup() {
+ # some ovpn-cli processes sleep in background so they need manual poking
+ killall $(basename ${OVPN_CLI}) 2>/dev/null || true
+
+ # netns peer0 is deleted without erasing ifaces first
+ for p in $(seq 1 10); do
+ ip -n peer${p} link set tun${p} down 2>/dev/null || true
+ ip netns exec peer${p} ${OVPN_CLI} del_iface tun${p} 2>/dev/null || true
+ done
+ for p in $(seq 1 10); do
+ ip -n peer0 link del veth${p} 2>/dev/null || true
+ done
+ for p in $(seq 0 10); do
+ ip netns del peer${p} 2>/dev/null || true
+ done
+}
+
+if [ "${PROTO}" == "UDP" ]; then
+ NUM_PEERS=${NUM_PEERS:-$(wc -l ${UDP_PEERS_FILE} | awk '{print $1}')}
+else
+ NUM_PEERS=${NUM_PEERS:-$(wc -l ${TCP_PEERS_FILE} | awk '{print $1}')}
+fi
+
+
diff --git a/tools/testing/selftests/net/ovpn/config b/tools/testing/selftests/net/ovpn/config
new file mode 100644
index 000000000000..71946ba9fa17
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/config
@@ -0,0 +1,10 @@
+CONFIG_NET=y
+CONFIG_INET=y
+CONFIG_STREAM_PARSER=y
+CONFIG_NET_UDP_TUNNEL=y
+CONFIG_DST_CACHE=y
+CONFIG_CRYPTO=y
+CONFIG_CRYPTO_AES=y
+CONFIG_CRYPTO_GCM=y
+CONFIG_CRYPTO_CHACHA20POLY1305=y
+CONFIG_OVPN=m
diff --git a/tools/testing/selftests/net/ovpn/data64.key b/tools/testing/selftests/net/ovpn/data64.key
new file mode 100644
index 000000000000..a99e88c4e290
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/data64.key
@@ -0,0 +1,5 @@
+jRqMACN7d7/aFQNT8S7jkrBD8uwrgHbG5OQZP2eu4R1Y7tfpS2bf5RHv06Vi163CGoaIiTX99R3B
+ia9ycAH8Wz1+9PWv51dnBLur9jbShlgZ2QHLtUc4a/gfT7zZwULXuuxdLnvR21DDeMBaTbkgbai9
+uvAa7ne1liIgGFzbv+Bas4HDVrygxIxuAnP5Qgc3648IJkZ0QEXPF+O9f0n5+QIvGCxkAUVx+5K6
+KIs+SoeWXnAopELmoGSjUpFtJbagXK82HfdqpuUxT2Tnuef0/14SzVE/vNleBNu2ZbyrSAaah8tE
+BofkPJUBFY+YQcfZNM5Dgrw3i+Bpmpq/gpdg5w==
diff --git a/tools/testing/selftests/net/ovpn/ovpn-cli.c b/tools/testing/selftests/net/ovpn/ovpn-cli.c
new file mode 100644
index 000000000000..69e41fc07fbc
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/ovpn-cli.c
@@ -0,0 +1,2376 @@
+// SPDX-License-Identifier: GPL-2.0
+/* OpenVPN data channel accelerator
+ *
+ * Copyright (C) 2020-2025 OpenVPN, Inc.
+ *
+ * Author: Antonio Quartulli <antonio@openvpn.net>
+ */
+
+#include <stdio.h>
+#include <inttypes.h>
+#include <stdbool.h>
+#include <string.h>
+#include <errno.h>
+#include <unistd.h>
+#include <arpa/inet.h>
+#include <net/if.h>
+#include <netinet/in.h>
+#include <time.h>
+
+#include <linux/ovpn.h>
+#include <linux/types.h>
+#include <linux/netlink.h>
+
+#include <netlink/socket.h>
+#include <netlink/netlink.h>
+#include <netlink/genl/genl.h>
+#include <netlink/genl/family.h>
+#include <netlink/genl/ctrl.h>
+
+#include <mbedtls/base64.h>
+#include <mbedtls/error.h>
+
+#include <sys/socket.h>
+
+/* defines to make checkpatch happy */
+#define strscpy strncpy
+#define __always_unused __attribute__((__unused__))
+
+/* libnl < 3.5.0 does not set the NLA_F_NESTED on its own, therefore we
+ * have to explicitly do it to prevent the kernel from failing upon
+ * parsing of the message
+ */
+#define nla_nest_start(_msg, _type) \
+ nla_nest_start(_msg, (_type) | NLA_F_NESTED)
+
+/* libnl < 3.11.0 does not implement nla_get_uint() */
+uint64_t ovpn_nla_get_uint(struct nlattr *attr)
+{
+ if (nla_len(attr) == sizeof(uint32_t))
+ return nla_get_u32(attr);
+ else
+ return nla_get_u64(attr);
+}
+
+typedef int (*ovpn_nl_cb)(struct nl_msg *msg, void *arg);
+
+enum ovpn_key_direction {
+ KEY_DIR_IN = 0,
+ KEY_DIR_OUT,
+};
+
+#define KEY_LEN (256 / 8)
+#define NONCE_LEN 8
+
+#define PEER_ID_UNDEF 0x00FFFFFF
+#define MAX_PEERS 10
+
+struct nl_ctx {
+ struct nl_sock *nl_sock;
+ struct nl_msg *nl_msg;
+ struct nl_cb *nl_cb;
+
+ int ovpn_dco_id;
+};
+
+enum ovpn_cmd {
+ CMD_INVALID,
+ CMD_NEW_IFACE,
+ CMD_DEL_IFACE,
+ CMD_LISTEN,
+ CMD_CONNECT,
+ CMD_NEW_PEER,
+ CMD_NEW_MULTI_PEER,
+ CMD_SET_PEER,
+ CMD_DEL_PEER,
+ CMD_GET_PEER,
+ CMD_NEW_KEY,
+ CMD_DEL_KEY,
+ CMD_GET_KEY,
+ CMD_SWAP_KEYS,
+ CMD_LISTEN_MCAST,
+};
+
+struct ovpn_ctx {
+ enum ovpn_cmd cmd;
+
+ __u8 key_enc[KEY_LEN];
+ __u8 key_dec[KEY_LEN];
+ __u8 nonce[NONCE_LEN];
+
+ enum ovpn_cipher_alg cipher;
+
+ sa_family_t sa_family;
+
+ unsigned long peer_id;
+ unsigned long lport;
+
+ union {
+ struct sockaddr_in in4;
+ struct sockaddr_in6 in6;
+ } remote;
+
+ union {
+ struct sockaddr_in in4;
+ struct sockaddr_in6 in6;
+ } peer_ip;
+
+ bool peer_ip_set;
+
+ unsigned int ifindex;
+ char ifname[IFNAMSIZ];
+ enum ovpn_mode mode;
+ bool mode_set;
+
+ int socket;
+ int cli_sockets[MAX_PEERS];
+
+ __u32 keepalive_interval;
+ __u32 keepalive_timeout;
+
+ enum ovpn_key_direction key_dir;
+ enum ovpn_key_slot key_slot;
+ int key_id;
+
+ const char *peers_file;
+};
+
+static int ovpn_nl_recvmsgs(struct nl_ctx *ctx)
+{
+ int ret;
+
+ ret = nl_recvmsgs(ctx->nl_sock, ctx->nl_cb);
+
+ switch (ret) {
+ case -NLE_INTR:
+ fprintf(stderr,
+ "netlink received interrupt due to signal - ignoring\n");
+ break;
+ case -NLE_NOMEM:
+ fprintf(stderr, "netlink out of memory error\n");
+ break;
+ case -NLE_AGAIN:
+ fprintf(stderr,
+ "netlink reports blocking read - aborting wait\n");
+ break;
+ default:
+ if (ret)
+ fprintf(stderr, "netlink reports error (%d): %s\n",
+ ret, nl_geterror(-ret));
+ break;
+ }
+
+ return ret;
+}
+
+static struct nl_ctx *nl_ctx_alloc_flags(struct ovpn_ctx *ovpn, int cmd,
+ int flags)
+{
+ struct nl_ctx *ctx;
+ int err, ret;
+
+ ctx = calloc(1, sizeof(*ctx));
+ if (!ctx)
+ return NULL;
+
+ ctx->nl_sock = nl_socket_alloc();
+ if (!ctx->nl_sock) {
+ fprintf(stderr, "cannot allocate netlink socket\n");
+ goto err_free;
+ }
+
+ nl_socket_set_buffer_size(ctx->nl_sock, 8192, 8192);
+
+ ret = genl_connect(ctx->nl_sock);
+ if (ret) {
+ fprintf(stderr, "cannot connect to generic netlink: %s\n",
+ nl_geterror(ret));
+ goto err_sock;
+ }
+
+ /* enable Extended ACK for detailed error reporting */
+ err = 1;
+ setsockopt(nl_socket_get_fd(ctx->nl_sock), SOL_NETLINK, NETLINK_EXT_ACK,
+ &err, sizeof(err));
+
+ ctx->ovpn_dco_id = genl_ctrl_resolve(ctx->nl_sock, OVPN_FAMILY_NAME);
+ if (ctx->ovpn_dco_id < 0) {
+ fprintf(stderr, "cannot find ovpn_dco netlink component: %d\n",
+ ctx->ovpn_dco_id);
+ goto err_free;
+ }
+
+ ctx->nl_msg = nlmsg_alloc();
+ if (!ctx->nl_msg) {
+ fprintf(stderr, "cannot allocate netlink message\n");
+ goto err_sock;
+ }
+
+ ctx->nl_cb = nl_cb_alloc(NL_CB_DEFAULT);
+ if (!ctx->nl_cb) {
+ fprintf(stderr, "failed to allocate netlink callback\n");
+ goto err_msg;
+ }
+
+ nl_socket_set_cb(ctx->nl_sock, ctx->nl_cb);
+
+ genlmsg_put(ctx->nl_msg, 0, 0, ctx->ovpn_dco_id, 0, flags, cmd, 0);
+
+ if (ovpn->ifindex > 0)
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_IFINDEX, ovpn->ifindex);
+
+ return ctx;
+nla_put_failure:
+err_msg:
+ nlmsg_free(ctx->nl_msg);
+err_sock:
+ nl_socket_free(ctx->nl_sock);
+err_free:
+ free(ctx);
+ return NULL;
+}
+
+static struct nl_ctx *nl_ctx_alloc(struct ovpn_ctx *ovpn, int cmd)
+{
+ return nl_ctx_alloc_flags(ovpn, cmd, 0);
+}
+
+static void nl_ctx_free(struct nl_ctx *ctx)
+{
+ if (!ctx)
+ return;
+
+ nl_socket_free(ctx->nl_sock);
+ nlmsg_free(ctx->nl_msg);
+ nl_cb_put(ctx->nl_cb);
+ free(ctx);
+}
+
+static int ovpn_nl_cb_error(struct sockaddr_nl (*nla)__always_unused,
+ struct nlmsgerr *err, void *arg)
+{
+ struct nlmsghdr *nlh = (struct nlmsghdr *)err - 1;
+ struct nlattr *tb_msg[NLMSGERR_ATTR_MAX + 1];
+ int len = nlh->nlmsg_len;
+ struct nlattr *attrs;
+ int *ret = arg;
+ int ack_len = sizeof(*nlh) + sizeof(int) + sizeof(*nlh);
+
+ *ret = err->error;
+
+ if (!(nlh->nlmsg_flags & NLM_F_ACK_TLVS))
+ return NL_STOP;
+
+ if (!(nlh->nlmsg_flags & NLM_F_CAPPED))
+ ack_len += err->msg.nlmsg_len - sizeof(*nlh);
+
+ if (len <= ack_len)
+ return NL_STOP;
+
+ attrs = (void *)((uint8_t *)nlh + ack_len);
+ len -= ack_len;
+
+ nla_parse(tb_msg, NLMSGERR_ATTR_MAX, attrs, len, NULL);
+ if (tb_msg[NLMSGERR_ATTR_MSG]) {
+ len = strnlen((char *)nla_data(tb_msg[NLMSGERR_ATTR_MSG]),
+ nla_len(tb_msg[NLMSGERR_ATTR_MSG]));
+ fprintf(stderr, "kernel error: %*s\n", len,
+ (char *)nla_data(tb_msg[NLMSGERR_ATTR_MSG]));
+ }
+
+ if (tb_msg[NLMSGERR_ATTR_MISS_NEST]) {
+ fprintf(stderr, "missing required nesting type %u\n",
+ nla_get_u32(tb_msg[NLMSGERR_ATTR_MISS_NEST]));
+ }
+
+ if (tb_msg[NLMSGERR_ATTR_MISS_TYPE]) {
+ fprintf(stderr, "missing required attribute type %u\n",
+ nla_get_u32(tb_msg[NLMSGERR_ATTR_MISS_TYPE]));
+ }
+
+ return NL_STOP;
+}
+
+static int ovpn_nl_cb_finish(struct nl_msg (*msg)__always_unused,
+ void *arg)
+{
+ int *status = arg;
+
+ *status = 0;
+ return NL_SKIP;
+}
+
+static int ovpn_nl_cb_ack(struct nl_msg (*msg)__always_unused,
+ void *arg)
+{
+ int *status = arg;
+
+ *status = 0;
+ return NL_STOP;
+}
+
+static int ovpn_nl_msg_send(struct nl_ctx *ctx, ovpn_nl_cb cb)
+{
+ int status = 1;
+
+ nl_cb_err(ctx->nl_cb, NL_CB_CUSTOM, ovpn_nl_cb_error, &status);
+ nl_cb_set(ctx->nl_cb, NL_CB_FINISH, NL_CB_CUSTOM, ovpn_nl_cb_finish,
+ &status);
+ nl_cb_set(ctx->nl_cb, NL_CB_ACK, NL_CB_CUSTOM, ovpn_nl_cb_ack, &status);
+
+ if (cb)
+ nl_cb_set(ctx->nl_cb, NL_CB_VALID, NL_CB_CUSTOM, cb, ctx);
+
+ nl_send_auto_complete(ctx->nl_sock, ctx->nl_msg);
+
+ while (status == 1)
+ ovpn_nl_recvmsgs(ctx);
+
+ if (status < 0)
+ fprintf(stderr, "failed to send netlink message: %s (%d)\n",
+ strerror(-status), status);
+
+ return status;
+}
+
+static int ovpn_parse_key(const char *file, struct ovpn_ctx *ctx)
+{
+ int idx_enc, idx_dec, ret = -1;
+ unsigned char *ckey = NULL;
+ __u8 *bkey = NULL;
+ size_t olen = 0;
+ long ckey_len;
+ FILE *fp;
+
+ fp = fopen(file, "r");
+ if (!fp) {
+ fprintf(stderr, "cannot open: %s\n", file);
+ return -1;
+ }
+
+ /* get file size */
+ fseek(fp, 0L, SEEK_END);
+ ckey_len = ftell(fp);
+ rewind(fp);
+
+ /* if the file is longer, let's just read a portion */
+ if (ckey_len > 256)
+ ckey_len = 256;
+
+ ckey = malloc(ckey_len);
+ if (!ckey)
+ goto err;
+
+ ret = fread(ckey, 1, ckey_len, fp);
+ if (ret != ckey_len) {
+ fprintf(stderr,
+ "couldn't read enough data from key file: %dbytes read\n",
+ ret);
+ goto err;
+ }
+
+ olen = 0;
+ ret = mbedtls_base64_decode(NULL, 0, &olen, ckey, ckey_len);
+ if (ret != MBEDTLS_ERR_BASE64_BUFFER_TOO_SMALL) {
+ char buf[256];
+
+ mbedtls_strerror(ret, buf, sizeof(buf));
+ fprintf(stderr, "unexpected base64 error1: %s (%d)\n", buf,
+ ret);
+
+ goto err;
+ }
+
+ bkey = malloc(olen);
+ if (!bkey) {
+ fprintf(stderr, "cannot allocate binary key buffer\n");
+ goto err;
+ }
+
+ ret = mbedtls_base64_decode(bkey, olen, &olen, ckey, ckey_len);
+ if (ret) {
+ char buf[256];
+
+ mbedtls_strerror(ret, buf, sizeof(buf));
+ fprintf(stderr, "unexpected base64 error2: %s (%d)\n", buf,
+ ret);
+
+ goto err;
+ }
+
+ if (olen < 2 * KEY_LEN + NONCE_LEN) {
+ fprintf(stderr,
+ "not enough data in key file, found %zdB but needs %dB\n",
+ olen, 2 * KEY_LEN + NONCE_LEN);
+ goto err;
+ }
+
+ switch (ctx->key_dir) {
+ case KEY_DIR_IN:
+ idx_enc = 0;
+ idx_dec = 1;
+ break;
+ case KEY_DIR_OUT:
+ idx_enc = 1;
+ idx_dec = 0;
+ break;
+ default:
+ goto err;
+ }
+
+ memcpy(ctx->key_enc, bkey + KEY_LEN * idx_enc, KEY_LEN);
+ memcpy(ctx->key_dec, bkey + KEY_LEN * idx_dec, KEY_LEN);
+ memcpy(ctx->nonce, bkey + 2 * KEY_LEN, NONCE_LEN);
+
+ ret = 0;
+
+err:
+ fclose(fp);
+ free(bkey);
+ free(ckey);
+
+ return ret;
+}
+
+static int ovpn_parse_cipher(const char *cipher, struct ovpn_ctx *ctx)
+{
+ if (strcmp(cipher, "aes") == 0)
+ ctx->cipher = OVPN_CIPHER_ALG_AES_GCM;
+ else if (strcmp(cipher, "chachapoly") == 0)
+ ctx->cipher = OVPN_CIPHER_ALG_CHACHA20_POLY1305;
+ else if (strcmp(cipher, "none") == 0)
+ ctx->cipher = OVPN_CIPHER_ALG_NONE;
+ else
+ return -ENOTSUP;
+
+ return 0;
+}
+
+static int ovpn_parse_key_direction(const char *dir, struct ovpn_ctx *ctx)
+{
+ int in_dir;
+
+ in_dir = strtoll(dir, NULL, 10);
+ switch (in_dir) {
+ case KEY_DIR_IN:
+ case KEY_DIR_OUT:
+ ctx->key_dir = in_dir;
+ break;
+ default:
+ fprintf(stderr,
+ "invalid key direction provided. Can be 0 or 1 only\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+static int ovpn_socket(struct ovpn_ctx *ctx, sa_family_t family, int proto)
+{
+ struct sockaddr_storage local_sock = { 0 };
+ struct sockaddr_in6 *in6;
+ struct sockaddr_in *in;
+ int ret, s, sock_type;
+ size_t sock_len;
+
+ if (proto == IPPROTO_UDP)
+ sock_type = SOCK_DGRAM;
+ else if (proto == IPPROTO_TCP)
+ sock_type = SOCK_STREAM;
+ else
+ return -EINVAL;
+
+ s = socket(family, sock_type, 0);
+ if (s < 0) {
+ perror("cannot create socket");
+ return -1;
+ }
+
+ switch (family) {
+ case AF_INET:
+ in = (struct sockaddr_in *)&local_sock;
+ in->sin_family = family;
+ in->sin_port = htons(ctx->lport);
+ in->sin_addr.s_addr = htonl(INADDR_ANY);
+ sock_len = sizeof(*in);
+ break;
+ case AF_INET6:
+ in6 = (struct sockaddr_in6 *)&local_sock;
+ in6->sin6_family = family;
+ in6->sin6_port = htons(ctx->lport);
+ in6->sin6_addr = in6addr_any;
+ sock_len = sizeof(*in6);
+ break;
+ default:
+ return -1;
+ }
+
+ int opt = 1;
+
+ ret = setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &opt, sizeof(opt));
+
+ if (ret < 0) {
+ perror("setsockopt for SO_REUSEADDR");
+ return ret;
+ }
+
+ ret = setsockopt(s, SOL_SOCKET, SO_REUSEPORT, &opt, sizeof(opt));
+ if (ret < 0) {
+ perror("setsockopt for SO_REUSEPORT");
+ return ret;
+ }
+
+ if (family == AF_INET6) {
+ opt = 0;
+ if (setsockopt(s, IPPROTO_IPV6, IPV6_V6ONLY, &opt,
+ sizeof(opt))) {
+ perror("failed to set IPV6_V6ONLY");
+ return -1;
+ }
+ }
+
+ ret = bind(s, (struct sockaddr *)&local_sock, sock_len);
+ if (ret < 0) {
+ perror("cannot bind socket");
+ goto err_socket;
+ }
+
+ ctx->socket = s;
+ ctx->sa_family = family;
+ return 0;
+
+err_socket:
+ close(s);
+ return -1;
+}
+
+static int ovpn_udp_socket(struct ovpn_ctx *ctx, sa_family_t family)
+{
+ return ovpn_socket(ctx, family, IPPROTO_UDP);
+}
+
+static int ovpn_listen(struct ovpn_ctx *ctx, sa_family_t family)
+{
+ int ret;
+
+ ret = ovpn_socket(ctx, family, IPPROTO_TCP);
+ if (ret < 0)
+ return ret;
+
+ ret = listen(ctx->socket, 10);
+ if (ret < 0) {
+ perror("listen");
+ close(ctx->socket);
+ return -1;
+ }
+
+ return 0;
+}
+
+static int ovpn_accept(struct ovpn_ctx *ctx)
+{
+ socklen_t socklen;
+ int ret;
+
+ socklen = sizeof(ctx->remote);
+ ret = accept(ctx->socket, (struct sockaddr *)&ctx->remote, &socklen);
+ if (ret < 0) {
+ perror("accept");
+ goto err;
+ }
+
+ fprintf(stderr, "Connection received!\n");
+
+ switch (socklen) {
+ case sizeof(struct sockaddr_in):
+ case sizeof(struct sockaddr_in6):
+ break;
+ default:
+ fprintf(stderr, "error: expecting IPv4 or IPv6 connection\n");
+ close(ret);
+ ret = -EINVAL;
+ goto err;
+ }
+
+ return ret;
+err:
+ close(ctx->socket);
+ return ret;
+}
+
+static int ovpn_connect(struct ovpn_ctx *ovpn)
+{
+ socklen_t socklen;
+ int s, ret;
+
+ s = socket(ovpn->remote.in4.sin_family, SOCK_STREAM, 0);
+ if (s < 0) {
+ perror("cannot create socket");
+ return -1;
+ }
+
+ switch (ovpn->remote.in4.sin_family) {
+ case AF_INET:
+ socklen = sizeof(struct sockaddr_in);
+ break;
+ case AF_INET6:
+ socklen = sizeof(struct sockaddr_in6);
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
+ ret = connect(s, (struct sockaddr *)&ovpn->remote, socklen);
+ if (ret < 0) {
+ perror("connect");
+ goto err;
+ }
+
+ fprintf(stderr, "connected\n");
+
+ ovpn->socket = s;
+
+ return 0;
+err:
+ close(s);
+ return ret;
+}
+
+static int ovpn_new_peer(struct ovpn_ctx *ovpn, bool is_tcp)
+{
+ struct nlattr *attr;
+ struct nl_ctx *ctx;
+ int ret = -1;
+
+ ctx = nl_ctx_alloc(ovpn, OVPN_CMD_PEER_NEW);
+ if (!ctx)
+ return -ENOMEM;
+
+ attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_SOCKET, ovpn->socket);
+
+ if (!is_tcp) {
+ switch (ovpn->remote.in4.sin_family) {
+ case AF_INET:
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_REMOTE_IPV4,
+ ovpn->remote.in4.sin_addr.s_addr);
+ NLA_PUT_U16(ctx->nl_msg, OVPN_A_PEER_REMOTE_PORT,
+ ovpn->remote.in4.sin_port);
+ break;
+ case AF_INET6:
+ NLA_PUT(ctx->nl_msg, OVPN_A_PEER_REMOTE_IPV6,
+ sizeof(ovpn->remote.in6.sin6_addr),
+ &ovpn->remote.in6.sin6_addr);
+ NLA_PUT_U32(ctx->nl_msg,
+ OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID,
+ ovpn->remote.in6.sin6_scope_id);
+ NLA_PUT_U16(ctx->nl_msg, OVPN_A_PEER_REMOTE_PORT,
+ ovpn->remote.in6.sin6_port);
+ break;
+ default:
+ fprintf(stderr,
+ "Invalid family for remote socket address\n");
+ goto nla_put_failure;
+ }
+ }
+
+ if (ovpn->peer_ip_set) {
+ switch (ovpn->peer_ip.in4.sin_family) {
+ case AF_INET:
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_VPN_IPV4,
+ ovpn->peer_ip.in4.sin_addr.s_addr);
+ break;
+ case AF_INET6:
+ NLA_PUT(ctx->nl_msg, OVPN_A_PEER_VPN_IPV6,
+ sizeof(struct in6_addr),
+ &ovpn->peer_ip.in6.sin6_addr);
+ break;
+ default:
+ fprintf(stderr, "Invalid family for peer address\n");
+ goto nla_put_failure;
+ }
+ }
+
+ nla_nest_end(ctx->nl_msg, attr);
+
+ ret = ovpn_nl_msg_send(ctx, NULL);
+nla_put_failure:
+ nl_ctx_free(ctx);
+ return ret;
+}
+
+static int ovpn_set_peer(struct ovpn_ctx *ovpn)
+{
+ struct nlattr *attr;
+ struct nl_ctx *ctx;
+ int ret = -1;
+
+ ctx = nl_ctx_alloc(ovpn, OVPN_CMD_PEER_SET);
+ if (!ctx)
+ return -ENOMEM;
+
+ attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_KEEPALIVE_INTERVAL,
+ ovpn->keepalive_interval);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_KEEPALIVE_TIMEOUT,
+ ovpn->keepalive_timeout);
+ nla_nest_end(ctx->nl_msg, attr);
+
+ ret = ovpn_nl_msg_send(ctx, NULL);
+nla_put_failure:
+ nl_ctx_free(ctx);
+ return ret;
+}
+
+static int ovpn_del_peer(struct ovpn_ctx *ovpn)
+{
+ struct nlattr *attr;
+ struct nl_ctx *ctx;
+ int ret = -1;
+
+ ctx = nl_ctx_alloc(ovpn, OVPN_CMD_PEER_DEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id);
+ nla_nest_end(ctx->nl_msg, attr);
+
+ ret = ovpn_nl_msg_send(ctx, NULL);
+nla_put_failure:
+ nl_ctx_free(ctx);
+ return ret;
+}
+
+static int ovpn_handle_peer(struct nl_msg *msg, void (*arg)__always_unused)
+{
+ struct nlattr *pattrs[OVPN_A_PEER_MAX + 1];
+ struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg));
+ struct nlattr *attrs[OVPN_A_MAX + 1];
+ __u16 rport = 0, lport = 0;
+
+ nla_parse(attrs, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0),
+ genlmsg_attrlen(gnlh, 0), NULL);
+
+ if (!attrs[OVPN_A_PEER]) {
+ fprintf(stderr, "no packet content in netlink message\n");
+ return NL_SKIP;
+ }
+
+ nla_parse(pattrs, OVPN_A_PEER_MAX, nla_data(attrs[OVPN_A_PEER]),
+ nla_len(attrs[OVPN_A_PEER]), NULL);
+
+ if (pattrs[OVPN_A_PEER_ID])
+ fprintf(stderr, "* Peer %u\n",
+ nla_get_u32(pattrs[OVPN_A_PEER_ID]));
+
+ if (pattrs[OVPN_A_PEER_SOCKET_NETNSID])
+ fprintf(stderr, "\tsocket NetNS ID: %d\n",
+ nla_get_s32(pattrs[OVPN_A_PEER_SOCKET_NETNSID]));
+
+ if (pattrs[OVPN_A_PEER_VPN_IPV4]) {
+ char buf[INET_ADDRSTRLEN];
+
+ inet_ntop(AF_INET, nla_data(pattrs[OVPN_A_PEER_VPN_IPV4]),
+ buf, sizeof(buf));
+ fprintf(stderr, "\tVPN IPv4: %s\n", buf);
+ }
+
+ if (pattrs[OVPN_A_PEER_VPN_IPV6]) {
+ char buf[INET6_ADDRSTRLEN];
+
+ inet_ntop(AF_INET6, nla_data(pattrs[OVPN_A_PEER_VPN_IPV6]),
+ buf, sizeof(buf));
+ fprintf(stderr, "\tVPN IPv6: %s\n", buf);
+ }
+
+ if (pattrs[OVPN_A_PEER_LOCAL_PORT])
+ lport = ntohs(nla_get_u16(pattrs[OVPN_A_PEER_LOCAL_PORT]));
+
+ if (pattrs[OVPN_A_PEER_REMOTE_PORT])
+ rport = ntohs(nla_get_u16(pattrs[OVPN_A_PEER_REMOTE_PORT]));
+
+ if (pattrs[OVPN_A_PEER_REMOTE_IPV6]) {
+ void *ip = pattrs[OVPN_A_PEER_REMOTE_IPV6];
+ char buf[INET6_ADDRSTRLEN];
+ int scope_id = -1;
+
+ if (pattrs[OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID]) {
+ void *p = pattrs[OVPN_A_PEER_REMOTE_IPV6_SCOPE_ID];
+
+ scope_id = nla_get_u32(p);
+ }
+
+ inet_ntop(AF_INET6, nla_data(ip), buf, sizeof(buf));
+ fprintf(stderr, "\tRemote: %s:%hu (scope-id: %u)\n", buf, rport,
+ scope_id);
+
+ if (pattrs[OVPN_A_PEER_LOCAL_IPV6]) {
+ void *ip = pattrs[OVPN_A_PEER_LOCAL_IPV6];
+
+ inet_ntop(AF_INET6, nla_data(ip), buf, sizeof(buf));
+ fprintf(stderr, "\tLocal: %s:%hu\n", buf, lport);
+ }
+ }
+
+ if (pattrs[OVPN_A_PEER_REMOTE_IPV4]) {
+ void *ip = pattrs[OVPN_A_PEER_REMOTE_IPV4];
+ char buf[INET_ADDRSTRLEN];
+
+ inet_ntop(AF_INET, nla_data(ip), buf, sizeof(buf));
+ fprintf(stderr, "\tRemote: %s:%hu\n", buf, rport);
+
+ if (pattrs[OVPN_A_PEER_LOCAL_IPV4]) {
+ void *p = pattrs[OVPN_A_PEER_LOCAL_IPV4];
+
+ inet_ntop(AF_INET, nla_data(p), buf, sizeof(buf));
+ fprintf(stderr, "\tLocal: %s:%hu\n", buf, lport);
+ }
+ }
+
+ if (pattrs[OVPN_A_PEER_KEEPALIVE_INTERVAL]) {
+ void *p = pattrs[OVPN_A_PEER_KEEPALIVE_INTERVAL];
+
+ fprintf(stderr, "\tKeepalive interval: %u sec\n",
+ nla_get_u32(p));
+ }
+
+ if (pattrs[OVPN_A_PEER_KEEPALIVE_TIMEOUT])
+ fprintf(stderr, "\tKeepalive timeout: %u sec\n",
+ nla_get_u32(pattrs[OVPN_A_PEER_KEEPALIVE_TIMEOUT]));
+
+ if (pattrs[OVPN_A_PEER_VPN_RX_BYTES])
+ fprintf(stderr, "\tVPN RX bytes: %" PRIu64 "\n",
+ ovpn_nla_get_uint(pattrs[OVPN_A_PEER_VPN_RX_BYTES]));
+
+ if (pattrs[OVPN_A_PEER_VPN_TX_BYTES])
+ fprintf(stderr, "\tVPN TX bytes: %" PRIu64 "\n",
+ ovpn_nla_get_uint(pattrs[OVPN_A_PEER_VPN_TX_BYTES]));
+
+ if (pattrs[OVPN_A_PEER_VPN_RX_PACKETS])
+ fprintf(stderr, "\tVPN RX packets: %" PRIu64 "\n",
+ ovpn_nla_get_uint(pattrs[OVPN_A_PEER_VPN_RX_PACKETS]));
+
+ if (pattrs[OVPN_A_PEER_VPN_TX_PACKETS])
+ fprintf(stderr, "\tVPN TX packets: %" PRIu64 "\n",
+ ovpn_nla_get_uint(pattrs[OVPN_A_PEER_VPN_TX_PACKETS]));
+
+ if (pattrs[OVPN_A_PEER_LINK_RX_BYTES])
+ fprintf(stderr, "\tLINK RX bytes: %" PRIu64 "\n",
+ ovpn_nla_get_uint(pattrs[OVPN_A_PEER_LINK_RX_BYTES]));
+
+ if (pattrs[OVPN_A_PEER_LINK_TX_BYTES])
+ fprintf(stderr, "\tLINK TX bytes: %" PRIu64 "\n",
+ ovpn_nla_get_uint(pattrs[OVPN_A_PEER_LINK_TX_BYTES]));
+
+ if (pattrs[OVPN_A_PEER_LINK_RX_PACKETS])
+ fprintf(stderr, "\tLINK RX packets: %" PRIu64 "\n",
+ ovpn_nla_get_uint(pattrs[OVPN_A_PEER_LINK_RX_PACKETS]));
+
+ if (pattrs[OVPN_A_PEER_LINK_TX_PACKETS])
+ fprintf(stderr, "\tLINK TX packets: %" PRIu64 "\n",
+ ovpn_nla_get_uint(pattrs[OVPN_A_PEER_LINK_TX_PACKETS]));
+
+ return NL_SKIP;
+}
+
+static int ovpn_get_peer(struct ovpn_ctx *ovpn)
+{
+ int flags = 0, ret = -1;
+ struct nlattr *attr;
+ struct nl_ctx *ctx;
+
+ if (ovpn->peer_id == PEER_ID_UNDEF)
+ flags = NLM_F_DUMP;
+
+ ctx = nl_ctx_alloc_flags(ovpn, OVPN_CMD_PEER_GET, flags);
+ if (!ctx)
+ return -ENOMEM;
+
+ if (ovpn->peer_id != PEER_ID_UNDEF) {
+ attr = nla_nest_start(ctx->nl_msg, OVPN_A_PEER);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_PEER_ID, ovpn->peer_id);
+ nla_nest_end(ctx->nl_msg, attr);
+ }
+
+ ret = ovpn_nl_msg_send(ctx, ovpn_handle_peer);
+nla_put_failure:
+ nl_ctx_free(ctx);
+ return ret;
+}
+
+static int ovpn_new_key(struct ovpn_ctx *ovpn)
+{
+ struct nlattr *keyconf, *key_dir;
+ struct nl_ctx *ctx;
+ int ret = -1;
+
+ ctx = nl_ctx_alloc(ovpn, OVPN_CMD_KEY_NEW);
+ if (!ctx)
+ return -ENOMEM;
+
+ keyconf = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_PEER_ID, ovpn->peer_id);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_SLOT, ovpn->key_slot);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_KEY_ID, ovpn->key_id);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_CIPHER_ALG, ovpn->cipher);
+
+ key_dir = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF_ENCRYPT_DIR);
+ NLA_PUT(ctx->nl_msg, OVPN_A_KEYDIR_CIPHER_KEY, KEY_LEN, ovpn->key_enc);
+ NLA_PUT(ctx->nl_msg, OVPN_A_KEYDIR_NONCE_TAIL, NONCE_LEN, ovpn->nonce);
+ nla_nest_end(ctx->nl_msg, key_dir);
+
+ key_dir = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF_DECRYPT_DIR);
+ NLA_PUT(ctx->nl_msg, OVPN_A_KEYDIR_CIPHER_KEY, KEY_LEN, ovpn->key_dec);
+ NLA_PUT(ctx->nl_msg, OVPN_A_KEYDIR_NONCE_TAIL, NONCE_LEN, ovpn->nonce);
+ nla_nest_end(ctx->nl_msg, key_dir);
+
+ nla_nest_end(ctx->nl_msg, keyconf);
+
+ ret = ovpn_nl_msg_send(ctx, NULL);
+nla_put_failure:
+ nl_ctx_free(ctx);
+ return ret;
+}
+
+static int ovpn_del_key(struct ovpn_ctx *ovpn)
+{
+ struct nlattr *keyconf;
+ struct nl_ctx *ctx;
+ int ret = -1;
+
+ ctx = nl_ctx_alloc(ovpn, OVPN_CMD_KEY_DEL);
+ if (!ctx)
+ return -ENOMEM;
+
+ keyconf = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_PEER_ID, ovpn->peer_id);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_SLOT, ovpn->key_slot);
+ nla_nest_end(ctx->nl_msg, keyconf);
+
+ ret = ovpn_nl_msg_send(ctx, NULL);
+nla_put_failure:
+ nl_ctx_free(ctx);
+ return ret;
+}
+
+static int ovpn_handle_key(struct nl_msg *msg, void (*arg)__always_unused)
+{
+ struct nlattr *kattrs[OVPN_A_KEYCONF_MAX + 1];
+ struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg));
+ struct nlattr *attrs[OVPN_A_MAX + 1];
+
+ nla_parse(attrs, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0),
+ genlmsg_attrlen(gnlh, 0), NULL);
+
+ if (!attrs[OVPN_A_KEYCONF]) {
+ fprintf(stderr, "no packet content in netlink message\n");
+ return NL_SKIP;
+ }
+
+ nla_parse(kattrs, OVPN_A_KEYCONF_MAX, nla_data(attrs[OVPN_A_KEYCONF]),
+ nla_len(attrs[OVPN_A_KEYCONF]), NULL);
+
+ if (kattrs[OVPN_A_KEYCONF_PEER_ID])
+ fprintf(stderr, "* Peer %u\n",
+ nla_get_u32(kattrs[OVPN_A_KEYCONF_PEER_ID]));
+ if (kattrs[OVPN_A_KEYCONF_SLOT]) {
+ fprintf(stderr, "\t- Slot: ");
+ switch (nla_get_u32(kattrs[OVPN_A_KEYCONF_SLOT])) {
+ case OVPN_KEY_SLOT_PRIMARY:
+ fprintf(stderr, "primary\n");
+ break;
+ case OVPN_KEY_SLOT_SECONDARY:
+ fprintf(stderr, "secondary\n");
+ break;
+ default:
+ fprintf(stderr, "invalid (%u)\n",
+ nla_get_u32(kattrs[OVPN_A_KEYCONF_SLOT]));
+ break;
+ }
+ }
+ if (kattrs[OVPN_A_KEYCONF_KEY_ID])
+ fprintf(stderr, "\t- Key ID: %u\n",
+ nla_get_u32(kattrs[OVPN_A_KEYCONF_KEY_ID]));
+ if (kattrs[OVPN_A_KEYCONF_CIPHER_ALG]) {
+ fprintf(stderr, "\t- Cipher: ");
+ switch (nla_get_u32(kattrs[OVPN_A_KEYCONF_CIPHER_ALG])) {
+ case OVPN_CIPHER_ALG_NONE:
+ fprintf(stderr, "none\n");
+ break;
+ case OVPN_CIPHER_ALG_AES_GCM:
+ fprintf(stderr, "aes-gcm\n");
+ break;
+ case OVPN_CIPHER_ALG_CHACHA20_POLY1305:
+ fprintf(stderr, "chacha20poly1305\n");
+ break;
+ default:
+ fprintf(stderr, "invalid (%u)\n",
+ nla_get_u32(kattrs[OVPN_A_KEYCONF_CIPHER_ALG]));
+ break;
+ }
+ }
+
+ return NL_SKIP;
+}
+
+static int ovpn_get_key(struct ovpn_ctx *ovpn)
+{
+ struct nlattr *keyconf;
+ struct nl_ctx *ctx;
+ int ret = -1;
+
+ ctx = nl_ctx_alloc(ovpn, OVPN_CMD_KEY_GET);
+ if (!ctx)
+ return -ENOMEM;
+
+ keyconf = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_PEER_ID, ovpn->peer_id);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_SLOT, ovpn->key_slot);
+ nla_nest_end(ctx->nl_msg, keyconf);
+
+ ret = ovpn_nl_msg_send(ctx, ovpn_handle_key);
+nla_put_failure:
+ nl_ctx_free(ctx);
+ return ret;
+}
+
+static int ovpn_swap_keys(struct ovpn_ctx *ovpn)
+{
+ struct nl_ctx *ctx;
+ struct nlattr *kc;
+ int ret = -1;
+
+ ctx = nl_ctx_alloc(ovpn, OVPN_CMD_KEY_SWAP);
+ if (!ctx)
+ return -ENOMEM;
+
+ kc = nla_nest_start(ctx->nl_msg, OVPN_A_KEYCONF);
+ NLA_PUT_U32(ctx->nl_msg, OVPN_A_KEYCONF_PEER_ID, ovpn->peer_id);
+ nla_nest_end(ctx->nl_msg, kc);
+
+ ret = ovpn_nl_msg_send(ctx, NULL);
+nla_put_failure:
+ nl_ctx_free(ctx);
+ return ret;
+}
+
+/* Helper function used to easily add attributes to a rtnl message */
+static int ovpn_addattr(struct nlmsghdr *n, int maxlen, int type,
+ const void *data, int alen)
+{
+ int len = RTA_LENGTH(alen);
+ struct rtattr *rta;
+
+ if ((int)(NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len)) > maxlen) {
+ fprintf(stderr, "%s: rtnl: message exceeded bound of %d\n",
+ __func__, maxlen);
+ return -EMSGSIZE;
+ }
+
+ rta = nlmsg_tail(n);
+ rta->rta_type = type;
+ rta->rta_len = len;
+
+ if (!data)
+ memset(RTA_DATA(rta), 0, alen);
+ else
+ memcpy(RTA_DATA(rta), data, alen);
+
+ n->nlmsg_len = NLMSG_ALIGN(n->nlmsg_len) + RTA_ALIGN(len);
+
+ return 0;
+}
+
+static struct rtattr *ovpn_nest_start(struct nlmsghdr *msg, size_t max_size,
+ int attr)
+{
+ struct rtattr *nest = nlmsg_tail(msg);
+
+ if (ovpn_addattr(msg, max_size, attr, NULL, 0) < 0)
+ return NULL;
+
+ return nest;
+}
+
+static void ovpn_nest_end(struct nlmsghdr *msg, struct rtattr *nest)
+{
+ nest->rta_len = (uint8_t *)nlmsg_tail(msg) - (uint8_t *)nest;
+}
+
+#define RT_SNDBUF_SIZE (1024 * 2)
+#define RT_RCVBUF_SIZE (1024 * 4)
+
+/* Open RTNL socket */
+static int ovpn_rt_socket(void)
+{
+ int sndbuf = RT_SNDBUF_SIZE, rcvbuf = RT_RCVBUF_SIZE, fd;
+
+ fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+ if (fd < 0) {
+ fprintf(stderr, "%s: cannot open netlink socket\n", __func__);
+ return fd;
+ }
+
+ if (setsockopt(fd, SOL_SOCKET, SO_SNDBUF, &sndbuf,
+ sizeof(sndbuf)) < 0) {
+ fprintf(stderr, "%s: SO_SNDBUF\n", __func__);
+ close(fd);
+ return -1;
+ }
+
+ if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &rcvbuf,
+ sizeof(rcvbuf)) < 0) {
+ fprintf(stderr, "%s: SO_RCVBUF\n", __func__);
+ close(fd);
+ return -1;
+ }
+
+ return fd;
+}
+
+/* Bind socket to Netlink subsystem */
+static int ovpn_rt_bind(int fd, uint32_t groups)
+{
+ struct sockaddr_nl local = { 0 };
+ socklen_t addr_len;
+
+ local.nl_family = AF_NETLINK;
+ local.nl_groups = groups;
+
+ if (bind(fd, (struct sockaddr *)&local, sizeof(local)) < 0) {
+ fprintf(stderr, "%s: cannot bind netlink socket: %d\n",
+ __func__, errno);
+ return -errno;
+ }
+
+ addr_len = sizeof(local);
+ if (getsockname(fd, (struct sockaddr *)&local, &addr_len) < 0) {
+ fprintf(stderr, "%s: cannot getsockname: %d\n", __func__,
+ errno);
+ return -errno;
+ }
+
+ if (addr_len != sizeof(local)) {
+ fprintf(stderr, "%s: wrong address length %d\n", __func__,
+ addr_len);
+ return -EINVAL;
+ }
+
+ if (local.nl_family != AF_NETLINK) {
+ fprintf(stderr, "%s: wrong address family %d\n", __func__,
+ local.nl_family);
+ return -EINVAL;
+ }
+
+ return 0;
+}
+
+typedef int (*ovpn_parse_reply_cb)(struct nlmsghdr *msg, void *arg);
+
+/* Send Netlink message and run callback on reply (if specified) */
+static int ovpn_rt_send(struct nlmsghdr *payload, pid_t peer,
+ unsigned int groups, ovpn_parse_reply_cb cb,
+ void *arg_cb)
+{
+ int len, rem_len, fd, ret, rcv_len;
+ struct sockaddr_nl nladdr = { 0 };
+ struct nlmsgerr *err;
+ struct nlmsghdr *h;
+ char buf[1024 * 16];
+ struct iovec iov = {
+ .iov_base = payload,
+ .iov_len = payload->nlmsg_len,
+ };
+ struct msghdr nlmsg = {
+ .msg_name = &nladdr,
+ .msg_namelen = sizeof(nladdr),
+ .msg_iov = &iov,
+ .msg_iovlen = 1,
+ };
+
+ nladdr.nl_family = AF_NETLINK;
+ nladdr.nl_pid = peer;
+ nladdr.nl_groups = groups;
+
+ payload->nlmsg_seq = time(NULL);
+
+ /* no need to send reply */
+ if (!cb)
+ payload->nlmsg_flags |= NLM_F_ACK;
+
+ fd = ovpn_rt_socket();
+ if (fd < 0) {
+ fprintf(stderr, "%s: can't open rtnl socket\n", __func__);
+ return -errno;
+ }
+
+ ret = ovpn_rt_bind(fd, 0);
+ if (ret < 0) {
+ fprintf(stderr, "%s: can't bind rtnl socket\n", __func__);
+ ret = -errno;
+ goto out;
+ }
+
+ ret = sendmsg(fd, &nlmsg, 0);
+ if (ret < 0) {
+ fprintf(stderr, "%s: rtnl: error on sendmsg()\n", __func__);
+ ret = -errno;
+ goto out;
+ }
+
+ /* prepare buffer to store RTNL replies */
+ memset(buf, 0, sizeof(buf));
+ iov.iov_base = buf;
+
+ while (1) {
+ /*
+ * iov_len is modified by recvmsg(), therefore has to be initialized before
+ * using it again
+ */
+ iov.iov_len = sizeof(buf);
+ rcv_len = recvmsg(fd, &nlmsg, 0);
+ if (rcv_len < 0) {
+ if (errno == EINTR || errno == EAGAIN) {
+ fprintf(stderr, "%s: interrupted call\n",
+ __func__);
+ continue;
+ }
+ fprintf(stderr, "%s: rtnl: error on recvmsg()\n",
+ __func__);
+ ret = -errno;
+ goto out;
+ }
+
+ if (rcv_len == 0) {
+ fprintf(stderr,
+ "%s: rtnl: socket reached unexpected EOF\n",
+ __func__);
+ ret = -EIO;
+ goto out;
+ }
+
+ if (nlmsg.msg_namelen != sizeof(nladdr)) {
+ fprintf(stderr,
+ "%s: sender address length: %u (expected %zu)\n",
+ __func__, nlmsg.msg_namelen, sizeof(nladdr));
+ ret = -EIO;
+ goto out;
+ }
+
+ h = (struct nlmsghdr *)buf;
+ while (rcv_len >= (int)sizeof(*h)) {
+ len = h->nlmsg_len;
+ rem_len = len - sizeof(*h);
+
+ if (rem_len < 0 || len > rcv_len) {
+ if (nlmsg.msg_flags & MSG_TRUNC) {
+ fprintf(stderr, "%s: truncated message\n",
+ __func__);
+ ret = -EIO;
+ goto out;
+ }
+ fprintf(stderr, "%s: malformed message: len=%d\n",
+ __func__, len);
+ ret = -EIO;
+ goto out;
+ }
+
+ if (h->nlmsg_type == NLMSG_DONE) {
+ ret = 0;
+ goto out;
+ }
+
+ if (h->nlmsg_type == NLMSG_ERROR) {
+ err = (struct nlmsgerr *)NLMSG_DATA(h);
+ if (rem_len < (int)sizeof(struct nlmsgerr)) {
+ fprintf(stderr, "%s: ERROR truncated\n",
+ __func__);
+ ret = -EIO;
+ goto out;
+ }
+
+ if (err->error) {
+ fprintf(stderr, "%s: (%d) %s\n",
+ __func__, err->error,
+ strerror(-err->error));
+ ret = err->error;
+ goto out;
+ }
+
+ ret = 0;
+ if (cb) {
+ int r = cb(h, arg_cb);
+
+ if (r <= 0)
+ ret = r;
+ }
+ goto out;
+ }
+
+ if (cb) {
+ int r = cb(h, arg_cb);
+
+ if (r <= 0) {
+ ret = r;
+ goto out;
+ }
+ } else {
+ fprintf(stderr, "%s: RTNL: unexpected reply\n",
+ __func__);
+ }
+
+ rcv_len -= NLMSG_ALIGN(len);
+ h = (struct nlmsghdr *)((uint8_t *)h +
+ NLMSG_ALIGN(len));
+ }
+
+ if (nlmsg.msg_flags & MSG_TRUNC) {
+ fprintf(stderr, "%s: message truncated\n", __func__);
+ continue;
+ }
+
+ if (rcv_len) {
+ fprintf(stderr, "%s: rtnl: %d not parsed bytes\n",
+ __func__, rcv_len);
+ ret = -1;
+ goto out;
+ }
+ }
+out:
+ close(fd);
+
+ return ret;
+}
+
+struct ovpn_link_req {
+ struct nlmsghdr n;
+ struct ifinfomsg i;
+ char buf[256];
+};
+
+static int ovpn_new_iface(struct ovpn_ctx *ovpn)
+{
+ struct rtattr *linkinfo, *data;
+ struct ovpn_link_req req = { 0 };
+ int ret = -1;
+
+ fprintf(stdout, "Creating interface %s with mode %u\n", ovpn->ifname,
+ ovpn->mode);
+
+ req.n.nlmsg_len = NLMSG_LENGTH(sizeof(req.i));
+ req.n.nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | NLM_F_EXCL;
+ req.n.nlmsg_type = RTM_NEWLINK;
+
+ if (ovpn_addattr(&req.n, sizeof(req), IFLA_IFNAME, ovpn->ifname,
+ strlen(ovpn->ifname) + 1) < 0)
+ goto err;
+
+ linkinfo = ovpn_nest_start(&req.n, sizeof(req), IFLA_LINKINFO);
+ if (!linkinfo)
+ goto err;
+
+ if (ovpn_addattr(&req.n, sizeof(req), IFLA_INFO_KIND, OVPN_FAMILY_NAME,
+ strlen(OVPN_FAMILY_NAME) + 1) < 0)
+ goto err;
+
+ if (ovpn->mode_set) {
+ data = ovpn_nest_start(&req.n, sizeof(req), IFLA_INFO_DATA);
+ if (!data)
+ goto err;
+
+ if (ovpn_addattr(&req.n, sizeof(req), IFLA_OVPN_MODE,
+ &ovpn->mode, sizeof(uint8_t)) < 0)
+ goto err;
+
+ ovpn_nest_end(&req.n, data);
+ }
+
+ ovpn_nest_end(&req.n, linkinfo);
+
+ req.i.ifi_family = AF_PACKET;
+
+ ret = ovpn_rt_send(&req.n, 0, 0, NULL, NULL);
+err:
+ return ret;
+}
+
+static int ovpn_del_iface(struct ovpn_ctx *ovpn)
+{
+ struct ovpn_link_req req = { 0 };
+
+ fprintf(stdout, "Deleting interface %s ifindex %u\n", ovpn->ifname,
+ ovpn->ifindex);
+
+ req.n.nlmsg_len = NLMSG_LENGTH(sizeof(req.i));
+ req.n.nlmsg_flags = NLM_F_REQUEST;
+ req.n.nlmsg_type = RTM_DELLINK;
+
+ req.i.ifi_family = AF_PACKET;
+ req.i.ifi_index = ovpn->ifindex;
+
+ return ovpn_rt_send(&req.n, 0, 0, NULL, NULL);
+}
+
+static int nl_seq_check(struct nl_msg (*msg)__always_unused,
+ void (*arg)__always_unused)
+{
+ return NL_OK;
+}
+
+struct mcast_handler_args {
+ const char *group;
+ int id;
+};
+
+static int mcast_family_handler(struct nl_msg *msg, void *arg)
+{
+ struct mcast_handler_args *grp = arg;
+ struct nlattr *tb[CTRL_ATTR_MAX + 1];
+ struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg));
+ struct nlattr *mcgrp;
+ int rem_mcgrp;
+
+ nla_parse(tb, CTRL_ATTR_MAX, genlmsg_attrdata(gnlh, 0),
+ genlmsg_attrlen(gnlh, 0), NULL);
+
+ if (!tb[CTRL_ATTR_MCAST_GROUPS])
+ return NL_SKIP;
+
+ nla_for_each_nested(mcgrp, tb[CTRL_ATTR_MCAST_GROUPS], rem_mcgrp) {
+ struct nlattr *tb_mcgrp[CTRL_ATTR_MCAST_GRP_MAX + 1];
+
+ nla_parse(tb_mcgrp, CTRL_ATTR_MCAST_GRP_MAX,
+ nla_data(mcgrp), nla_len(mcgrp), NULL);
+
+ if (!tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME] ||
+ !tb_mcgrp[CTRL_ATTR_MCAST_GRP_ID])
+ continue;
+ if (strncmp(nla_data(tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME]),
+ grp->group, nla_len(tb_mcgrp[CTRL_ATTR_MCAST_GRP_NAME])))
+ continue;
+ grp->id = nla_get_u32(tb_mcgrp[CTRL_ATTR_MCAST_GRP_ID]);
+ break;
+ }
+
+ return NL_SKIP;
+}
+
+static int mcast_error_handler(struct sockaddr_nl (*nla)__always_unused,
+ struct nlmsgerr *err, void *arg)
+{
+ int *ret = arg;
+
+ *ret = err->error;
+ return NL_STOP;
+}
+
+static int mcast_ack_handler(struct nl_msg (*msg)__always_unused, void *arg)
+{
+ int *ret = arg;
+
+ *ret = 0;
+ return NL_STOP;
+}
+
+static int ovpn_handle_msg(struct nl_msg *msg, void *arg)
+{
+ struct genlmsghdr *gnlh = nlmsg_data(nlmsg_hdr(msg));
+ struct nlattr *attrs[OVPN_A_MAX + 1];
+ struct nlmsghdr *nlh = nlmsg_hdr(msg);
+ char ifname[IF_NAMESIZE];
+ int *ret = arg;
+ __u32 ifindex;
+
+ fprintf(stderr, "received message from ovpn-dco\n");
+
+ *ret = -1;
+
+ if (!genlmsg_valid_hdr(nlh, 0)) {
+ fprintf(stderr, "invalid header\n");
+ return NL_STOP;
+ }
+
+ if (nla_parse(attrs, OVPN_A_MAX, genlmsg_attrdata(gnlh, 0),
+ genlmsg_attrlen(gnlh, 0), NULL)) {
+ fprintf(stderr, "received bogus data from ovpn-dco\n");
+ return NL_STOP;
+ }
+
+ if (!attrs[OVPN_A_IFINDEX]) {
+ fprintf(stderr, "no ifindex in this message\n");
+ return NL_STOP;
+ }
+
+ ifindex = nla_get_u32(attrs[OVPN_A_IFINDEX]);
+ if (!if_indextoname(ifindex, ifname)) {
+ fprintf(stderr, "cannot resolve ifname for ifindex: %u\n",
+ ifindex);
+ return NL_STOP;
+ }
+
+ switch (gnlh->cmd) {
+ case OVPN_CMD_PEER_DEL_NTF:
+ fprintf(stdout, "received CMD_PEER_DEL_NTF\n");
+ break;
+ case OVPN_CMD_KEY_SWAP_NTF:
+ fprintf(stdout, "received CMD_KEY_SWAP_NTF\n");
+ break;
+ default:
+ fprintf(stderr, "received unknown command: %d\n", gnlh->cmd);
+ return NL_STOP;
+ }
+
+ *ret = 0;
+ return NL_OK;
+}
+
+static int ovpn_get_mcast_id(struct nl_sock *sock, const char *family,
+ const char *group)
+{
+ struct nl_msg *msg;
+ struct nl_cb *cb;
+ int ret, ctrlid;
+ struct mcast_handler_args grp = {
+ .group = group,
+ .id = -ENOENT,
+ };
+
+ msg = nlmsg_alloc();
+ if (!msg)
+ return -ENOMEM;
+
+ cb = nl_cb_alloc(NL_CB_DEFAULT);
+ if (!cb) {
+ ret = -ENOMEM;
+ goto out_fail_cb;
+ }
+
+ ctrlid = genl_ctrl_resolve(sock, "nlctrl");
+
+ genlmsg_put(msg, 0, 0, ctrlid, 0, 0, CTRL_CMD_GETFAMILY, 0);
+
+ ret = -ENOBUFS;
+ NLA_PUT_STRING(msg, CTRL_ATTR_FAMILY_NAME, family);
+
+ ret = nl_send_auto_complete(sock, msg);
+ if (ret < 0)
+ goto nla_put_failure;
+
+ ret = 1;
+
+ nl_cb_err(cb, NL_CB_CUSTOM, mcast_error_handler, &ret);
+ nl_cb_set(cb, NL_CB_ACK, NL_CB_CUSTOM, mcast_ack_handler, &ret);
+ nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM, mcast_family_handler, &grp);
+
+ while (ret > 0)
+ nl_recvmsgs(sock, cb);
+
+ if (ret == 0)
+ ret = grp.id;
+ nla_put_failure:
+ nl_cb_put(cb);
+ out_fail_cb:
+ nlmsg_free(msg);
+ return ret;
+}
+
+static int ovpn_listen_mcast(void)
+{
+ struct nl_sock *sock;
+ struct nl_cb *cb;
+ int mcid, ret;
+
+ sock = nl_socket_alloc();
+ if (!sock) {
+ fprintf(stderr, "cannot allocate netlink socket\n");
+ goto err_free;
+ }
+
+ nl_socket_set_buffer_size(sock, 8192, 8192);
+
+ ret = genl_connect(sock);
+ if (ret < 0) {
+ fprintf(stderr, "cannot connect to generic netlink: %s\n",
+ nl_geterror(ret));
+ goto err_free;
+ }
+
+ mcid = ovpn_get_mcast_id(sock, OVPN_FAMILY_NAME, OVPN_MCGRP_PEERS);
+ if (mcid < 0) {
+ fprintf(stderr, "cannot get mcast group: %s\n",
+ nl_geterror(mcid));
+ goto err_free;
+ }
+
+ ret = nl_socket_add_membership(sock, mcid);
+ if (ret) {
+ fprintf(stderr, "failed to join mcast group: %d\n", ret);
+ goto err_free;
+ }
+
+ ret = 1;
+ cb = nl_cb_alloc(NL_CB_DEFAULT);
+ nl_cb_set(cb, NL_CB_SEQ_CHECK, NL_CB_CUSTOM, nl_seq_check, NULL);
+ nl_cb_set(cb, NL_CB_VALID, NL_CB_CUSTOM, ovpn_handle_msg, &ret);
+ nl_cb_err(cb, NL_CB_CUSTOM, ovpn_nl_cb_error, &ret);
+
+ while (ret == 1) {
+ int err = nl_recvmsgs(sock, cb);
+
+ if (err < 0) {
+ fprintf(stderr,
+ "cannot receive netlink message: (%d) %s\n",
+ err, nl_geterror(-err));
+ ret = -1;
+ break;
+ }
+ }
+
+ nl_cb_put(cb);
+err_free:
+ nl_socket_free(sock);
+ return ret;
+}
+
+static void usage(const char *cmd)
+{
+ fprintf(stderr,
+ "Usage %s <command> <iface> [arguments..]\n",
+ cmd);
+ fprintf(stderr, "where <command> can be one of the following\n\n");
+
+ fprintf(stderr, "* new_iface <iface> [mode]: create new ovpn interface\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tmode:\n");
+ fprintf(stderr, "\t\t- P2P for peer-to-peer mode (i.e. client)\n");
+ fprintf(stderr, "\t\t- MP for multi-peer mode (i.e. server)\n");
+
+ fprintf(stderr, "* del_iface <iface>: delete ovpn interface\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+
+ fprintf(stderr,
+ "* listen <iface> <lport> <peers_file> [ipv6]: listen for incoming peer TCP connections\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tlport: TCP port to listen to\n");
+ fprintf(stderr,
+ "\tpeers_file: file containing one peer per line: Line format:\n");
+ fprintf(stderr, "\t\t<peer_id> <vpnaddr>\n");
+ fprintf(stderr,
+ "\tipv6: whether the socket should listen to the IPv6 wildcard address\n");
+
+ fprintf(stderr,
+ "* connect <iface> <peer_id> <raddr> <rport> [key_file]: start connecting peer of TCP-based VPN session\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tpeer_id: peer ID of the connecting peer\n");
+ fprintf(stderr, "\traddr: peer IP address to connect to\n");
+ fprintf(stderr, "\trport: peer TCP port to connect to\n");
+ fprintf(stderr,
+ "\tkey_file: file containing the symmetric key for encryption\n");
+
+ fprintf(stderr,
+ "* new_peer <iface> <peer_id> <lport> <raddr> <rport> [vpnaddr]: add new peer\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tlport: local UDP port to bind to\n");
+ fprintf(stderr,
+ "\tpeer_id: peer ID to be used in data packets to/from this peer\n");
+ fprintf(stderr, "\traddr: peer IP address\n");
+ fprintf(stderr, "\trport: peer UDP port\n");
+ fprintf(stderr, "\tvpnaddr: peer VPN IP\n");
+
+ fprintf(stderr,
+ "* new_multi_peer <iface> <lport> <peers_file>: add multiple peers as listed in the file\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tlport: local UDP port to bind to\n");
+ fprintf(stderr,
+ "\tpeers_file: text file containing one peer per line. Line format:\n");
+ fprintf(stderr, "\t\t<peer_id> <raddr> <rport> <vpnaddr>\n");
+
+ fprintf(stderr,
+ "* set_peer <iface> <peer_id> <keepalive_interval> <keepalive_timeout>: set peer attributes\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tpeer_id: peer ID of the peer to modify\n");
+ fprintf(stderr,
+ "\tkeepalive_interval: interval for sending ping messages\n");
+ fprintf(stderr,
+ "\tkeepalive_timeout: time after which a peer is timed out\n");
+
+ fprintf(stderr, "* del_peer <iface> <peer_id>: delete peer\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tpeer_id: peer ID of the peer to delete\n");
+
+ fprintf(stderr, "* get_peer <iface> [peer_id]: retrieve peer(s) status\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr,
+ "\tpeer_id: peer ID of the peer to query. All peers are returned if omitted\n");
+
+ fprintf(stderr,
+ "* new_key <iface> <peer_id> <slot> <key_id> <cipher> <key_dir> <key_file>: set data channel key\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr,
+ "\tpeer_id: peer ID of the peer to configure the key for\n");
+ fprintf(stderr, "\tslot: either 1 (primary) or 2 (secondary)\n");
+ fprintf(stderr, "\tkey_id: an ID from 0 to 7\n");
+ fprintf(stderr,
+ "\tcipher: cipher to use, supported: aes (AES-GCM), chachapoly (CHACHA20POLY1305)\n");
+ fprintf(stderr,
+ "\tkey_dir: key direction, must 0 on one host and 1 on the other\n");
+ fprintf(stderr, "\tkey_file: file containing the pre-shared key\n");
+
+ fprintf(stderr,
+ "* del_key <iface> <peer_id> [slot]: erase existing data channel key\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tpeer_id: peer ID of the peer to modify\n");
+ fprintf(stderr, "\tslot: slot to erase. PRIMARY if omitted\n");
+
+ fprintf(stderr,
+ "* get_key <iface> <peer_id> <slot>: retrieve non sensible key data\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tpeer_id: peer ID of the peer to query\n");
+ fprintf(stderr, "\tslot: either 1 (primary) or 2 (secondary)\n");
+
+ fprintf(stderr,
+ "* swap_keys <iface> <peer_id>: swap content of primary and secondary key slots\n");
+ fprintf(stderr, "\tiface: ovpn interface name\n");
+ fprintf(stderr, "\tpeer_id: peer ID of the peer to modify\n");
+
+ fprintf(stderr,
+ "* listen_mcast: listen to ovpn netlink multicast messages\n");
+}
+
+static int ovpn_parse_remote(struct ovpn_ctx *ovpn, const char *host,
+ const char *service, const char *vpnip)
+{
+ int ret;
+ struct addrinfo *result;
+ struct addrinfo hints = {
+ .ai_family = ovpn->sa_family,
+ .ai_socktype = SOCK_DGRAM,
+ .ai_protocol = IPPROTO_UDP
+ };
+
+ if (host) {
+ ret = getaddrinfo(host, service, &hints, &result);
+ if (ret == EAI_NONAME || ret == EAI_FAIL)
+ return -1;
+
+ if (!(result->ai_family == AF_INET &&
+ result->ai_addrlen == sizeof(struct sockaddr_in)) &&
+ !(result->ai_family == AF_INET6 &&
+ result->ai_addrlen == sizeof(struct sockaddr_in6))) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ memcpy(&ovpn->remote, result->ai_addr, result->ai_addrlen);
+ }
+
+ if (vpnip) {
+ ret = getaddrinfo(vpnip, NULL, &hints, &result);
+ if (ret == EAI_NONAME || ret == EAI_FAIL)
+ return -1;
+
+ if (!(result->ai_family == AF_INET &&
+ result->ai_addrlen == sizeof(struct sockaddr_in)) &&
+ !(result->ai_family == AF_INET6 &&
+ result->ai_addrlen == sizeof(struct sockaddr_in6))) {
+ ret = -EINVAL;
+ goto out;
+ }
+
+ memcpy(&ovpn->peer_ip, result->ai_addr, result->ai_addrlen);
+ ovpn->sa_family = result->ai_family;
+
+ ovpn->peer_ip_set = true;
+ }
+
+ ret = 0;
+out:
+ freeaddrinfo(result);
+ return ret;
+}
+
+static int ovpn_parse_new_peer(struct ovpn_ctx *ovpn, const char *peer_id,
+ const char *raddr, const char *rport,
+ const char *vpnip)
+{
+ ovpn->peer_id = strtoul(peer_id, NULL, 10);
+ if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) {
+ fprintf(stderr, "peer ID value out of range\n");
+ return -1;
+ }
+
+ return ovpn_parse_remote(ovpn, raddr, rport, vpnip);
+}
+
+static int ovpn_parse_key_slot(const char *arg, struct ovpn_ctx *ovpn)
+{
+ int slot = strtoul(arg, NULL, 10);
+
+ if (errno == ERANGE || slot < 1 || slot > 2) {
+ fprintf(stderr, "key slot out of range\n");
+ return -1;
+ }
+
+ switch (slot) {
+ case 1:
+ ovpn->key_slot = OVPN_KEY_SLOT_PRIMARY;
+ break;
+ case 2:
+ ovpn->key_slot = OVPN_KEY_SLOT_SECONDARY;
+ break;
+ }
+
+ return 0;
+}
+
+static int ovpn_send_tcp_data(int socket)
+{
+ uint16_t len = htons(1000);
+ uint8_t buf[1002];
+ int ret;
+
+ memcpy(buf, &len, sizeof(len));
+ memset(buf + sizeof(len), 0x86, sizeof(buf) - sizeof(len));
+
+ ret = send(socket, buf, sizeof(buf), MSG_NOSIGNAL);
+
+ fprintf(stdout, "Sent %u bytes over TCP socket\n", ret);
+
+ return ret > 0 ? 0 : ret;
+}
+
+static int ovpn_recv_tcp_data(int socket)
+{
+ uint8_t buf[1002];
+ uint16_t len;
+ int ret;
+
+ ret = recv(socket, buf, sizeof(buf), MSG_NOSIGNAL);
+
+ if (ret < 2) {
+ fprintf(stderr, ">>>> Error while reading TCP data: %d\n", ret);
+ return ret;
+ }
+
+ memcpy(&len, buf, sizeof(len));
+ len = ntohs(len);
+
+ fprintf(stdout, ">>>> Received %u bytes over TCP socket, header: %u\n",
+ ret, len);
+
+ return 0;
+}
+
+static enum ovpn_cmd ovpn_parse_cmd(const char *cmd)
+{
+ if (!strcmp(cmd, "new_iface"))
+ return CMD_NEW_IFACE;
+
+ if (!strcmp(cmd, "del_iface"))
+ return CMD_DEL_IFACE;
+
+ if (!strcmp(cmd, "listen"))
+ return CMD_LISTEN;
+
+ if (!strcmp(cmd, "connect"))
+ return CMD_CONNECT;
+
+ if (!strcmp(cmd, "new_peer"))
+ return CMD_NEW_PEER;
+
+ if (!strcmp(cmd, "new_multi_peer"))
+ return CMD_NEW_MULTI_PEER;
+
+ if (!strcmp(cmd, "set_peer"))
+ return CMD_SET_PEER;
+
+ if (!strcmp(cmd, "del_peer"))
+ return CMD_DEL_PEER;
+
+ if (!strcmp(cmd, "get_peer"))
+ return CMD_GET_PEER;
+
+ if (!strcmp(cmd, "new_key"))
+ return CMD_NEW_KEY;
+
+ if (!strcmp(cmd, "del_key"))
+ return CMD_DEL_KEY;
+
+ if (!strcmp(cmd, "get_key"))
+ return CMD_GET_KEY;
+
+ if (!strcmp(cmd, "swap_keys"))
+ return CMD_SWAP_KEYS;
+
+ if (!strcmp(cmd, "listen_mcast"))
+ return CMD_LISTEN_MCAST;
+
+ return CMD_INVALID;
+}
+
+/* Send process to background and waits for signal.
+ *
+ * This helper is called at the end of commands
+ * creating sockets, so that the latter stay alive
+ * along with the process that created them.
+ *
+ * A signal is expected to be delivered in order to
+ * terminate the waiting processes
+ */
+static void ovpn_waitbg(void)
+{
+ daemon(1, 1);
+ pause();
+}
+
+static int ovpn_run_cmd(struct ovpn_ctx *ovpn)
+{
+ char peer_id[10], vpnip[INET6_ADDRSTRLEN], raddr[128], rport[10];
+ int n, ret;
+ FILE *fp;
+
+ switch (ovpn->cmd) {
+ case CMD_NEW_IFACE:
+ ret = ovpn_new_iface(ovpn);
+ break;
+ case CMD_DEL_IFACE:
+ ret = ovpn_del_iface(ovpn);
+ break;
+ case CMD_LISTEN:
+ ret = ovpn_listen(ovpn, ovpn->sa_family);
+ if (ret < 0) {
+ fprintf(stderr, "cannot listen on TCP socket\n");
+ return ret;
+ }
+
+ fp = fopen(ovpn->peers_file, "r");
+ if (!fp) {
+ fprintf(stderr, "cannot open file: %s\n",
+ ovpn->peers_file);
+ return -1;
+ }
+
+ int num_peers = 0;
+
+ while ((n = fscanf(fp, "%s %s\n", peer_id, vpnip)) == 2) {
+ struct ovpn_ctx peer_ctx = { 0 };
+
+ if (num_peers == MAX_PEERS) {
+ fprintf(stderr, "max peers reached!\n");
+ return -E2BIG;
+ }
+
+ peer_ctx.ifindex = ovpn->ifindex;
+ peer_ctx.sa_family = ovpn->sa_family;
+
+ peer_ctx.socket = ovpn_accept(ovpn);
+ if (peer_ctx.socket < 0) {
+ fprintf(stderr, "cannot accept connection!\n");
+ return -1;
+ }
+
+ /* store peer sockets to test TCP I/O */
+ ovpn->cli_sockets[num_peers] = peer_ctx.socket;
+
+ ret = ovpn_parse_new_peer(&peer_ctx, peer_id, NULL,
+ NULL, vpnip);
+ if (ret < 0) {
+ fprintf(stderr, "error while parsing line\n");
+ return -1;
+ }
+
+ ret = ovpn_new_peer(&peer_ctx, true);
+ if (ret < 0) {
+ fprintf(stderr,
+ "cannot add peer to VPN: %s %s\n",
+ peer_id, vpnip);
+ return ret;
+ }
+ num_peers++;
+ }
+
+ for (int i = 0; i < num_peers; i++) {
+ ret = ovpn_recv_tcp_data(ovpn->cli_sockets[i]);
+ if (ret < 0)
+ break;
+ }
+ ovpn_waitbg();
+ break;
+ case CMD_CONNECT:
+ ret = ovpn_connect(ovpn);
+ if (ret < 0) {
+ fprintf(stderr, "cannot connect TCP socket\n");
+ return ret;
+ }
+
+ ret = ovpn_new_peer(ovpn, true);
+ if (ret < 0) {
+ fprintf(stderr, "cannot add peer to VPN\n");
+ close(ovpn->socket);
+ return ret;
+ }
+
+ if (ovpn->cipher != OVPN_CIPHER_ALG_NONE) {
+ ret = ovpn_new_key(ovpn);
+ if (ret < 0) {
+ fprintf(stderr, "cannot set key\n");
+ return ret;
+ }
+ }
+
+ ret = ovpn_send_tcp_data(ovpn->socket);
+ ovpn_waitbg();
+ break;
+ case CMD_NEW_PEER:
+ ret = ovpn_udp_socket(ovpn, AF_INET6);
+ if (ret < 0)
+ return ret;
+
+ ret = ovpn_new_peer(ovpn, false);
+ ovpn_waitbg();
+ break;
+ case CMD_NEW_MULTI_PEER:
+ ret = ovpn_udp_socket(ovpn, AF_INET6);
+ if (ret < 0)
+ return ret;
+
+ fp = fopen(ovpn->peers_file, "r");
+ if (!fp) {
+ fprintf(stderr, "cannot open file: %s\n",
+ ovpn->peers_file);
+ return -1;
+ }
+
+ while ((n = fscanf(fp, "%s %s %s %s\n", peer_id, raddr, rport,
+ vpnip)) == 4) {
+ struct ovpn_ctx peer_ctx = { 0 };
+
+ peer_ctx.ifindex = ovpn->ifindex;
+ peer_ctx.socket = ovpn->socket;
+ peer_ctx.sa_family = AF_UNSPEC;
+
+ ret = ovpn_parse_new_peer(&peer_ctx, peer_id, raddr,
+ rport, vpnip);
+ if (ret < 0) {
+ fprintf(stderr, "error while parsing line\n");
+ return -1;
+ }
+
+ ret = ovpn_new_peer(&peer_ctx, false);
+ if (ret < 0) {
+ fprintf(stderr,
+ "cannot add peer to VPN: %s %s %s %s\n",
+ peer_id, raddr, rport, vpnip);
+ return ret;
+ }
+ }
+ ovpn_waitbg();
+ break;
+ case CMD_SET_PEER:
+ ret = ovpn_set_peer(ovpn);
+ break;
+ case CMD_DEL_PEER:
+ ret = ovpn_del_peer(ovpn);
+ break;
+ case CMD_GET_PEER:
+ if (ovpn->peer_id == PEER_ID_UNDEF)
+ fprintf(stderr, "List of peers connected to: %s\n",
+ ovpn->ifname);
+
+ ret = ovpn_get_peer(ovpn);
+ break;
+ case CMD_NEW_KEY:
+ ret = ovpn_new_key(ovpn);
+ break;
+ case CMD_DEL_KEY:
+ ret = ovpn_del_key(ovpn);
+ break;
+ case CMD_GET_KEY:
+ ret = ovpn_get_key(ovpn);
+ break;
+ case CMD_SWAP_KEYS:
+ ret = ovpn_swap_keys(ovpn);
+ break;
+ case CMD_LISTEN_MCAST:
+ ret = ovpn_listen_mcast();
+ break;
+ case CMD_INVALID:
+ break;
+ }
+
+ return ret;
+}
+
+static int ovpn_parse_cmd_args(struct ovpn_ctx *ovpn, int argc, char *argv[])
+{
+ int ret;
+
+ /* no args required for LISTEN_MCAST */
+ if (ovpn->cmd == CMD_LISTEN_MCAST)
+ return 0;
+
+ /* all commands need an ifname */
+ if (argc < 3)
+ return -EINVAL;
+
+ strscpy(ovpn->ifname, argv[2], IFNAMSIZ - 1);
+ ovpn->ifname[IFNAMSIZ - 1] = '\0';
+
+ /* all commands, except NEW_IFNAME, needs an ifindex */
+ if (ovpn->cmd != CMD_NEW_IFACE) {
+ ovpn->ifindex = if_nametoindex(ovpn->ifname);
+ if (!ovpn->ifindex) {
+ fprintf(stderr, "cannot find interface: %s\n",
+ strerror(errno));
+ return -1;
+ }
+ }
+
+ switch (ovpn->cmd) {
+ case CMD_NEW_IFACE:
+ if (argc < 4)
+ break;
+
+ if (!strcmp(argv[3], "P2P")) {
+ ovpn->mode = OVPN_MODE_P2P;
+ } else if (!strcmp(argv[3], "MP")) {
+ ovpn->mode = OVPN_MODE_MP;
+ } else {
+ fprintf(stderr, "Cannot parse iface mode: %s\n",
+ argv[3]);
+ return -1;
+ }
+ ovpn->mode_set = true;
+ break;
+ case CMD_DEL_IFACE:
+ break;
+ case CMD_LISTEN:
+ if (argc < 5)
+ return -EINVAL;
+
+ ovpn->lport = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE || ovpn->lport > 65535) {
+ fprintf(stderr, "lport value out of range\n");
+ return -1;
+ }
+
+ ovpn->peers_file = argv[4];
+
+ if (argc > 5 && !strcmp(argv[5], "ipv6"))
+ ovpn->sa_family = AF_INET6;
+ break;
+ case CMD_CONNECT:
+ if (argc < 6)
+ return -EINVAL;
+
+ ovpn->sa_family = AF_INET;
+
+ ret = ovpn_parse_new_peer(ovpn, argv[3], argv[4], argv[5],
+ NULL);
+ if (ret < 0) {
+ fprintf(stderr, "Cannot parse remote peer data\n");
+ return -1;
+ }
+
+ if (argc > 6) {
+ ovpn->key_slot = OVPN_KEY_SLOT_PRIMARY;
+ ovpn->key_id = 0;
+ ovpn->cipher = OVPN_CIPHER_ALG_AES_GCM;
+ ovpn->key_dir = KEY_DIR_OUT;
+
+ ret = ovpn_parse_key(argv[6], ovpn);
+ if (ret)
+ return -1;
+ }
+ break;
+ case CMD_NEW_PEER:
+ if (argc < 7)
+ return -EINVAL;
+
+ ovpn->lport = strtoul(argv[4], NULL, 10);
+ if (errno == ERANGE || ovpn->lport > 65535) {
+ fprintf(stderr, "lport value out of range\n");
+ return -1;
+ }
+
+ const char *vpnip = (argc > 7) ? argv[7] : NULL;
+
+ ret = ovpn_parse_new_peer(ovpn, argv[3], argv[5], argv[6],
+ vpnip);
+ if (ret < 0)
+ return -1;
+ break;
+ case CMD_NEW_MULTI_PEER:
+ if (argc < 5)
+ return -EINVAL;
+
+ ovpn->lport = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE || ovpn->lport > 65535) {
+ fprintf(stderr, "lport value out of range\n");
+ return -1;
+ }
+
+ ovpn->peers_file = argv[4];
+ break;
+ case CMD_SET_PEER:
+ if (argc < 6)
+ return -EINVAL;
+
+ ovpn->peer_id = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) {
+ fprintf(stderr, "peer ID value out of range\n");
+ return -1;
+ }
+
+ ovpn->keepalive_interval = strtoul(argv[4], NULL, 10);
+ if (errno == ERANGE) {
+ fprintf(stderr,
+ "keepalive interval value out of range\n");
+ return -1;
+ }
+
+ ovpn->keepalive_timeout = strtoul(argv[5], NULL, 10);
+ if (errno == ERANGE) {
+ fprintf(stderr,
+ "keepalive interval value out of range\n");
+ return -1;
+ }
+ break;
+ case CMD_DEL_PEER:
+ if (argc < 4)
+ return -EINVAL;
+
+ ovpn->peer_id = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) {
+ fprintf(stderr, "peer ID value out of range\n");
+ return -1;
+ }
+ break;
+ case CMD_GET_PEER:
+ ovpn->peer_id = PEER_ID_UNDEF;
+ if (argc > 3) {
+ ovpn->peer_id = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE || ovpn->peer_id > PEER_ID_UNDEF) {
+ fprintf(stderr, "peer ID value out of range\n");
+ return -1;
+ }
+ }
+ break;
+ case CMD_NEW_KEY:
+ if (argc < 9)
+ return -EINVAL;
+
+ ovpn->peer_id = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE) {
+ fprintf(stderr, "peer ID value out of range\n");
+ return -1;
+ }
+
+ ret = ovpn_parse_key_slot(argv[4], ovpn);
+ if (ret)
+ return -1;
+
+ ovpn->key_id = strtoul(argv[5], NULL, 10);
+ if (errno == ERANGE || ovpn->key_id > 2) {
+ fprintf(stderr, "key ID out of range\n");
+ return -1;
+ }
+
+ ret = ovpn_parse_cipher(argv[6], ovpn);
+ if (ret < 0)
+ return -1;
+
+ ret = ovpn_parse_key_direction(argv[7], ovpn);
+ if (ret < 0)
+ return -1;
+
+ ret = ovpn_parse_key(argv[8], ovpn);
+ if (ret)
+ return -1;
+ break;
+ case CMD_DEL_KEY:
+ if (argc < 4)
+ return -EINVAL;
+
+ ovpn->peer_id = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE) {
+ fprintf(stderr, "peer ID value out of range\n");
+ return -1;
+ }
+
+ ret = ovpn_parse_key_slot(argv[4], ovpn);
+ if (ret)
+ return ret;
+ break;
+ case CMD_GET_KEY:
+ if (argc < 5)
+ return -EINVAL;
+
+ ovpn->peer_id = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE) {
+ fprintf(stderr, "peer ID value out of range\n");
+ return -1;
+ }
+
+ ret = ovpn_parse_key_slot(argv[4], ovpn);
+ if (ret)
+ return ret;
+ break;
+ case CMD_SWAP_KEYS:
+ if (argc < 4)
+ return -EINVAL;
+
+ ovpn->peer_id = strtoul(argv[3], NULL, 10);
+ if (errno == ERANGE) {
+ fprintf(stderr, "peer ID value out of range\n");
+ return -1;
+ }
+ break;
+ case CMD_LISTEN_MCAST:
+ break;
+ case CMD_INVALID:
+ break;
+ }
+
+ return 0;
+}
+
+int main(int argc, char *argv[])
+{
+ struct ovpn_ctx ovpn;
+ int ret;
+
+ if (argc < 2) {
+ usage(argv[0]);
+ return -1;
+ }
+
+ memset(&ovpn, 0, sizeof(ovpn));
+ ovpn.sa_family = AF_INET;
+ ovpn.cipher = OVPN_CIPHER_ALG_NONE;
+
+ ovpn.cmd = ovpn_parse_cmd(argv[1]);
+ if (ovpn.cmd == CMD_INVALID) {
+ fprintf(stderr, "Error: unknown command.\n\n");
+ usage(argv[0]);
+ return -1;
+ }
+
+ ret = ovpn_parse_cmd_args(&ovpn, argc, argv);
+ if (ret < 0) {
+ fprintf(stderr, "Error: invalid arguments.\n\n");
+ if (ret == -EINVAL)
+ usage(argv[0]);
+ return ret;
+ }
+
+ ret = ovpn_run_cmd(&ovpn);
+ if (ret)
+ fprintf(stderr, "Cannot execute command: %s (%d)\n",
+ strerror(-ret), ret);
+
+ return ret;
+}
diff --git a/tools/testing/selftests/net/ovpn/tcp_peers.txt b/tools/testing/selftests/net/ovpn/tcp_peers.txt
new file mode 100644
index 000000000000..d753eebe8716
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/tcp_peers.txt
@@ -0,0 +1,5 @@
+1 5.5.5.2
+2 5.5.5.3
+3 5.5.5.4
+4 5.5.5.5
+5 5.5.5.6
diff --git a/tools/testing/selftests/net/ovpn/test-chachapoly.sh b/tools/testing/selftests/net/ovpn/test-chachapoly.sh
new file mode 100755
index 000000000000..32504079a2b8
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test-chachapoly.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2025 OpenVPN, Inc.
+#
+# Author: Antonio Quartulli <antonio@openvpn.net>
+
+ALG="chachapoly"
+
+source test.sh
diff --git a/tools/testing/selftests/net/ovpn/test-close-socket-tcp.sh b/tools/testing/selftests/net/ovpn/test-close-socket-tcp.sh
new file mode 100755
index 000000000000..093d44772ffd
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test-close-socket-tcp.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2025 OpenVPN, Inc.
+#
+# Author: Antonio Quartulli <antonio@openvpn.net>
+
+PROTO="TCP"
+
+source test-close-socket.sh
diff --git a/tools/testing/selftests/net/ovpn/test-close-socket.sh b/tools/testing/selftests/net/ovpn/test-close-socket.sh
new file mode 100755
index 000000000000..5e48a8b67928
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test-close-socket.sh
@@ -0,0 +1,45 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2020-2025 OpenVPN, Inc.
+#
+# Author: Antonio Quartulli <antonio@openvpn.net>
+
+#set -x
+set -e
+
+source ./common.sh
+
+cleanup
+
+modprobe -q ovpn || true
+
+for p in $(seq 0 ${NUM_PEERS}); do
+ create_ns ${p}
+done
+
+for p in $(seq 0 ${NUM_PEERS}); do
+ setup_ns ${p} 5.5.5.$((${p} + 1))/24
+done
+
+for p in $(seq 0 ${NUM_PEERS}); do
+ add_peer ${p}
+done
+
+for p in $(seq 1 ${NUM_PEERS}); do
+ ip netns exec peer0 ${OVPN_CLI} set_peer tun0 ${p} 60 120
+ ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} ${p} 60 120
+done
+
+sleep 1
+
+for p in $(seq 1 ${NUM_PEERS}); do
+ ip netns exec peer0 ping -qfc 500 -w 3 5.5.5.$((${p} + 1))
+done
+
+ip netns exec peer0 iperf3 -1 -s &
+sleep 1
+ip netns exec peer1 iperf3 -Z -t 3 -c 5.5.5.1
+
+cleanup
+
+modprobe -r ovpn || true
diff --git a/tools/testing/selftests/net/ovpn/test-float.sh b/tools/testing/selftests/net/ovpn/test-float.sh
new file mode 100755
index 000000000000..ba5d725e18b0
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test-float.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2025 OpenVPN, Inc.
+#
+# Author: Antonio Quartulli <antonio@openvpn.net>
+
+FLOAT="1"
+
+source test.sh
diff --git a/tools/testing/selftests/net/ovpn/test-tcp.sh b/tools/testing/selftests/net/ovpn/test-tcp.sh
new file mode 100755
index 000000000000..ba3f1f315a34
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test-tcp.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2025 OpenVPN, Inc.
+#
+# Author: Antonio Quartulli <antonio@openvpn.net>
+
+PROTO="TCP"
+
+source test.sh
diff --git a/tools/testing/selftests/net/ovpn/test.sh b/tools/testing/selftests/net/ovpn/test.sh
new file mode 100755
index 000000000000..7b62897b0240
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/test.sh
@@ -0,0 +1,113 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+# Copyright (C) 2020-2025 OpenVPN, Inc.
+#
+# Author: Antonio Quartulli <antonio@openvpn.net>
+
+#set -x
+set -e
+
+source ./common.sh
+
+cleanup
+
+modprobe -q ovpn || true
+
+for p in $(seq 0 ${NUM_PEERS}); do
+ create_ns ${p}
+done
+
+for p in $(seq 0 ${NUM_PEERS}); do
+ setup_ns ${p} 5.5.5.$((${p} + 1))/24
+done
+
+for p in $(seq 0 ${NUM_PEERS}); do
+ add_peer ${p}
+done
+
+for p in $(seq 1 ${NUM_PEERS}); do
+ ip netns exec peer0 ${OVPN_CLI} set_peer tun0 ${p} 60 120
+ ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} ${p} 60 120
+done
+
+sleep 1
+
+for p in $(seq 1 ${NUM_PEERS}); do
+ ip netns exec peer0 ping -qfc 500 -w 3 5.5.5.$((${p} + 1))
+done
+
+if [ "$FLOAT" == "1" ]; then
+ # make clients float..
+ for p in $(seq 1 ${NUM_PEERS}); do
+ ip -n peer${p} addr del 10.10.${p}.2/24 dev veth${p}
+ ip -n peer${p} addr add 10.10.${p}.3/24 dev veth${p}
+ done
+ for p in $(seq 1 ${NUM_PEERS}); do
+ ip netns exec peer${p} ping -qfc 500 -w 3 5.5.5.1
+ done
+fi
+
+ip netns exec peer0 iperf3 -1 -s &
+sleep 1
+ip netns exec peer1 iperf3 -Z -t 3 -c 5.5.5.1
+
+echo "Adding secondary key and then swap:"
+for p in $(seq 1 ${NUM_PEERS}); do
+ ip netns exec peer0 ${OVPN_CLI} new_key tun0 ${p} 2 1 ${ALG} 0 data64.key
+ ip netns exec peer${p} ${OVPN_CLI} new_key tun${p} ${p} 2 1 ${ALG} 1 data64.key
+ ip netns exec peer${p} ${OVPN_CLI} swap_keys tun${p} ${p}
+done
+
+sleep 1
+
+echo "Querying all peers:"
+ip netns exec peer0 ${OVPN_CLI} get_peer tun0
+ip netns exec peer1 ${OVPN_CLI} get_peer tun1
+
+echo "Querying peer 1:"
+ip netns exec peer0 ${OVPN_CLI} get_peer tun0 1
+
+echo "Querying non-existent peer 10:"
+ip netns exec peer0 ${OVPN_CLI} get_peer tun0 10 || true
+
+echo "Deleting peer 1:"
+ip netns exec peer0 ${OVPN_CLI} del_peer tun0 1
+ip netns exec peer1 ${OVPN_CLI} del_peer tun1 1
+
+echo "Querying keys:"
+for p in $(seq 2 ${NUM_PEERS}); do
+ ip netns exec peer${p} ${OVPN_CLI} get_key tun${p} ${p} 1
+ ip netns exec peer${p} ${OVPN_CLI} get_key tun${p} ${p} 2
+done
+
+echo "Deleting peer while sending traffic:"
+(ip netns exec peer2 ping -qf -w 4 5.5.5.1)&
+sleep 2
+ip netns exec peer0 ${OVPN_CLI} del_peer tun0 2
+# following command fails in TCP mode
+# (both ends get conn reset when one peer disconnects)
+ip netns exec peer2 ${OVPN_CLI} del_peer tun2 2 || true
+
+echo "Deleting keys:"
+for p in $(seq 3 ${NUM_PEERS}); do
+ ip netns exec peer${p} ${OVPN_CLI} del_key tun${p} ${p} 1
+ ip netns exec peer${p} ${OVPN_CLI} del_key tun${p} ${p} 2
+done
+
+echo "Setting timeout to 3s MP:"
+for p in $(seq 3 ${NUM_PEERS}); do
+ ip netns exec peer0 ${OVPN_CLI} set_peer tun0 ${p} 3 3 || true
+ ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} ${p} 0 0
+done
+# wait for peers to timeout
+sleep 5
+
+echo "Setting timeout to 3s P2P:"
+for p in $(seq 3 ${NUM_PEERS}); do
+ ip netns exec peer${p} ${OVPN_CLI} set_peer tun${p} ${p} 3 3
+done
+sleep 5
+
+cleanup
+
+modprobe -r ovpn || true
diff --git a/tools/testing/selftests/net/ovpn/udp_peers.txt b/tools/testing/selftests/net/ovpn/udp_peers.txt
new file mode 100644
index 000000000000..32f14bd9347a
--- /dev/null
+++ b/tools/testing/selftests/net/ovpn/udp_peers.txt
@@ -0,0 +1,5 @@
+1 10.10.1.2 1 5.5.5.2
+2 10.10.2.2 1 5.5.5.3
+3 10.10.3.2 1 5.5.5.4
+4 10.10.4.2 1 5.5.5.5
+5 10.10.5.2 1 5.5.5.6
diff --git a/tools/testing/selftests/net/reuseport_addr_any.c b/tools/testing/selftests/net/reuseport_addr_any.c
index b8475cb29be7..1c43401a1c80 100644
--- a/tools/testing/selftests/net/reuseport_addr_any.c
+++ b/tools/testing/selftests/net/reuseport_addr_any.c
@@ -9,7 +9,6 @@
#include <arpa/inet.h>
#include <errno.h>
#include <error.h>
-#include <linux/dccp.h>
#include <linux/in.h>
#include <linux/unistd.h>
#include <stdbool.h>
@@ -21,10 +20,6 @@
#include <sys/socket.h>
#include <unistd.h>
-#ifndef SOL_DCCP
-#define SOL_DCCP 269
-#endif
-
static const char *IP4_ADDR = "127.0.0.1";
static const char *IP6_ADDR = "::1";
static const char *IP4_MAPPED6 = "::ffff:127.0.0.1";
@@ -86,15 +81,6 @@ static void build_rcv_fd(int family, int proto, int *rcv_fds, int count,
if (proto == SOCK_STREAM && listen(rcv_fds[i], 10))
error(1, errno, "tcp: failed to listen on receive port");
- else if (proto == SOCK_DCCP) {
- if (setsockopt(rcv_fds[i], SOL_DCCP,
- DCCP_SOCKOPT_SERVICE,
- &(int) {htonl(42)}, sizeof(int)))
- error(1, errno, "failed to setsockopt");
-
- if (listen(rcv_fds[i], 10))
- error(1, errno, "dccp: failed to listen on receive port");
- }
}
}
@@ -148,11 +134,6 @@ static int connect_and_send(int family, int proto)
if (fd < 0)
error(1, errno, "failed to create send socket");
- if (proto == SOCK_DCCP &&
- setsockopt(fd, SOL_DCCP, DCCP_SOCKOPT_SERVICE,
- &(int){htonl(42)}, sizeof(int)))
- error(1, errno, "failed to setsockopt");
-
if (bind(fd, saddr, sz))
error(1, errno, "failed to bind send socket");
@@ -175,7 +156,7 @@ static int receive_once(int epfd, int proto)
if (i < 0)
error(1, errno, "epoll_wait failed");
- if (proto == SOCK_STREAM || proto == SOCK_DCCP) {
+ if (proto == SOCK_STREAM) {
fd = accept(ev.data.fd, NULL, NULL);
if (fd < 0)
error(1, errno, "failed to accept");
@@ -243,20 +224,6 @@ static void run_one_test(int fam_send, int fam_rcv, int proto,
static void test_proto(int proto, const char *proto_str)
{
- if (proto == SOCK_DCCP) {
- int test_fd;
-
- test_fd = socket(AF_INET, proto, 0);
- if (test_fd < 0) {
- if (errno == ESOCKTNOSUPPORT) {
- fprintf(stderr, "DCCP not supported: skipping DCCP tests\n");
- return;
- } else
- error(1, errno, "failed to create a DCCP socket");
- }
- close(test_fd);
- }
-
fprintf(stderr, "%s IPv4 ... ", proto_str);
run_one_test(AF_INET, AF_INET, proto, IP4_ADDR);
@@ -271,7 +238,6 @@ int main(void)
{
test_proto(SOCK_DGRAM, "UDP");
test_proto(SOCK_STREAM, "TCP");
- test_proto(SOCK_DCCP, "DCCP");
fprintf(stderr, "SUCCESS\n");
return 0;
diff --git a/tools/testing/selftests/net/test_bridge_neigh_suppress.sh b/tools/testing/selftests/net/test_bridge_neigh_suppress.sh
index 02b986c9c247..9067197c9055 100755
--- a/tools/testing/selftests/net/test_bridge_neigh_suppress.sh
+++ b/tools/testing/selftests/net/test_bridge_neigh_suppress.sh
@@ -51,7 +51,9 @@ ret=0
# All tests in this script. Can be overridden with -t option.
TESTS="
neigh_suppress_arp
+ neigh_suppress_uc_arp
neigh_suppress_ns
+ neigh_suppress_uc_ns
neigh_vlan_suppress_arp
neigh_vlan_suppress_ns
"
@@ -388,6 +390,52 @@ neigh_suppress_arp()
neigh_suppress_arp_common $vid $sip $tip
}
+neigh_suppress_uc_arp_common()
+{
+ local vid=$1; shift
+ local sip=$1; shift
+ local tip=$1; shift
+ local tmac
+
+ echo
+ echo "Unicast ARP, per-port ARP suppression - VLAN $vid"
+ echo "-----------------------------------------------"
+
+ run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on"
+ run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\""
+ log_test $? 0 "\"neigh_suppress\" is on"
+
+ tmac=$(ip -n $h2 -j -p link show eth0.$vid | jq -r '.[]["address"]')
+ run_cmd "bridge -n $sw1 fdb replace $tmac dev vx0 master static vlan $vid"
+ run_cmd "ip -n $sw1 neigh replace $tip lladdr $tmac nud permanent dev br0.$vid"
+
+ run_cmd "tc -n $h1 qdisc replace dev eth0.$vid clsact"
+ run_cmd "tc -n $h1 filter replace dev eth0.$vid ingress pref 1 handle 101 proto arp flower arp_sip $tip arp_op reply action pass"
+
+ run_cmd "tc -n $h2 qdisc replace dev eth0.$vid clsact"
+ run_cmd "tc -n $h2 filter replace dev eth0.$vid egress pref 1 handle 101 proto arp flower arp_tip $sip arp_op reply action pass"
+
+ run_cmd "ip netns exec $h1 mausezahn eth0.$vid -c 1 -a own -b $tmac -t arp 'request sip=$sip, tip=$tip, tmac=$tmac' -q"
+ tc_check_packets $h1 "dev eth0.$vid ingress" 101 1
+ log_test $? 0 "Unicast ARP, suppression on, h1 filter"
+ tc_check_packets $h2 "dev eth0.$vid egress" 101 1
+ log_test $? 0 "Unicast ARP, suppression on, h2 filter"
+}
+
+neigh_suppress_uc_arp()
+{
+ local vid=10
+ local sip=192.0.2.1
+ local tip=192.0.2.2
+
+ neigh_suppress_uc_arp_common $vid $sip $tip
+
+ vid=20
+ sip=192.0.2.17
+ tip=192.0.2.18
+ neigh_suppress_uc_arp_common $vid $sip $tip
+}
+
neigh_suppress_ns_common()
{
local vid=$1; shift
@@ -494,6 +542,78 @@ neigh_suppress_ns()
neigh_suppress_ns_common $vid $saddr $daddr $maddr
}
+icmpv6_header_get()
+{
+ local csum=$1; shift
+ local tip=$1; shift
+ local type
+ local p
+
+ # Type 135 (Neighbor Solicitation), hex format
+ type="87"
+ p=$(:
+ )"$type:"$( : ICMPv6.type
+ )"00:"$( : ICMPv6.code
+ )"$csum:"$( : ICMPv6.checksum
+ )"00:00:00:00:"$( : Reserved
+ )"$tip:"$( : Target Address
+ )
+ echo $p
+}
+
+neigh_suppress_uc_ns_common()
+{
+ local vid=$1; shift
+ local sip=$1; shift
+ local dip=$1; shift
+ local full_dip=$1; shift
+ local csum=$1; shift
+ local tmac
+
+ echo
+ echo "Unicast NS, per-port NS suppression - VLAN $vid"
+ echo "---------------------------------------------"
+
+ run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on"
+ run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\""
+ log_test $? 0 "\"neigh_suppress\" is on"
+
+ tmac=$(ip -n $h2 -j -p link show eth0.$vid | jq -r '.[]["address"]')
+ run_cmd "bridge -n $sw1 fdb replace $tmac dev vx0 master static vlan $vid"
+ run_cmd "ip -n $sw1 -6 neigh replace $dip lladdr $tmac nud permanent dev br0.$vid"
+
+ run_cmd "tc -n $h1 qdisc replace dev eth0.$vid clsact"
+ run_cmd "tc -n $h1 filter replace dev eth0.$vid ingress pref 1 handle 101 proto ipv6 flower ip_proto icmpv6 src_ip $dip type 136 code 0 action pass"
+
+ run_cmd "tc -n $h2 qdisc replace dev eth0.$vid clsact"
+ run_cmd "tc -n $h2 filter replace dev eth0.$vid egress pref 1 handle 101 proto ipv6 flower ip_proto icmpv6 dst_ip $sip type 136 code 0 action pass"
+
+ run_cmd "ip netns exec $h1 mausezahn -6 eth0.$vid -c 1 -a own -b $tmac -A $sip -B $dip -t ip hop=255,next=58,payload=$(icmpv6_header_get $csum $full_dip) -q"
+ tc_check_packets $h1 "dev eth0.$vid ingress" 101 1
+ log_test $? 0 "Unicast NS, suppression on, h1 filter"
+ tc_check_packets $h2 "dev eth0.$vid egress" 101 1
+ log_test $? 0 "Unicast NS, suppression on, h2 filter"
+}
+
+neigh_suppress_uc_ns()
+{
+ local vid=10
+ local saddr=2001:db8:1::1
+ local daddr=2001:db8:1::2
+ local full_daddr=20:01:0d:b8:00:01:00:00:00:00:00:00:00:00:00:02
+ local csum="ef:79"
+
+ neigh_suppress_uc_ns_common $vid $saddr $daddr $full_daddr $csum
+
+ vid=20
+ saddr=2001:db8:2::1
+ daddr=2001:db8:2::2
+ full_daddr=20:01:0d:b8:00:02:00:00:00:00:00:00:00:00:00:02
+ csum="ef:76"
+
+ neigh_suppress_uc_ns_common $vid $saddr $daddr $full_daddr $csum
+}
+
neigh_vlan_suppress_arp()
{
local vid1=10
@@ -825,6 +945,11 @@ if [ ! -x "$(command -v jq)" ]; then
exit $ksft_skip
fi
+if [ ! -x "$(command -v mausezahn)" ]; then
+ echo "SKIP: Could not run test without mausezahn tool"
+ exit $ksft_skip
+fi
+
bridge link help 2>&1 | grep -q "neigh_vlan_suppress"
if [ $? -ne 0 ]; then
echo "SKIP: iproute2 bridge too old, missing per-VLAN neighbor suppression support"
diff --git a/tools/testing/selftests/pcie_bwctrl/Makefile b/tools/testing/selftests/pcie_bwctrl/Makefile
index 48ec048f47af..277f92f9d753 100644
--- a/tools/testing/selftests/pcie_bwctrl/Makefile
+++ b/tools/testing/selftests/pcie_bwctrl/Makefile
@@ -1,2 +1,3 @@
-TEST_PROGS = set_pcie_cooling_state.sh set_pcie_speed.sh
+TEST_PROGS = set_pcie_cooling_state.sh
+TEST_FILES = set_pcie_speed.sh
include ../lib.mk
diff --git a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
index d4ea9cd845a3..0843f6d37e9c 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/infra/qdiscs.json
@@ -313,5 +313,230 @@
"$TC qdisc del dev $DUMMY handle 1: root",
"$IP addr del 10.10.10.10/24 dev $DUMMY || true"
]
+ },
+ {
+ "id": "a4c3",
+ "name": "Test HFSC with netem/blackhole - queue emptying during peek operation",
+ "category": [
+ "qdisc",
+ "hfsc",
+ "netem",
+ "blackhole"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || true",
+ "$IP addr add 10.10.10.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY handle 1:0 root drr",
+ "$TC class add dev $DUMMY parent 1:0 classid 1:1 drr",
+ "$TC class add dev $DUMMY parent 1:0 classid 1:2 drr",
+ "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 plug limit 1024",
+ "$TC qdisc add dev $DUMMY parent 1:2 handle 3:0 hfsc default 1",
+ "$TC class add dev $DUMMY parent 3:0 classid 3:1 hfsc rt m1 5Mbit d 10ms m2 10Mbit",
+ "$TC qdisc add dev $DUMMY parent 3:1 handle 4:0 netem delay 1ms",
+ "$TC qdisc add dev $DUMMY parent 4:1 handle 5:0 blackhole",
+ "ping -c 3 -W 0.01 -i 0.001 -s 1 10.10.10.10 -I $DUMMY > /dev/null 2>&1 || true",
+ "$TC class change dev $DUMMY parent 3:0 classid 3:1 hfsc sc m1 5Mbit d 10ms m2 10Mbit",
+ "$TC class del dev $DUMMY parent 3:0 classid 3:1",
+ "$TC class add dev $DUMMY parent 3:0 classid 3:1 hfsc rt m1 5Mbit d 10ms m2 10Mbit",
+ "ping -c 3 -W 0.01 -i 0.001 -s 1 10.10.10.10 -I $DUMMY > /dev/null 2>&1 || true"
+ ],
+ "cmdUnderTest": "$TC class change dev $DUMMY parent 3:0 classid 3:1 hfsc sc m1 5Mbit d 10ms m2 10Mbit",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -s qdisc show dev $DUMMY",
+ "matchPattern": "qdisc hfsc 3:.*parent 1:2.*default 1",
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1:0 root",
+ "$IP addr del 10.10.10.10/24 dev $DUMMY || true"
+ ]
+ },
+ {
+ "id": "90ec",
+ "name": "Test DRR's enqueue reentrant behaviour with netem",
+ "category": [
+ "qdisc",
+ "drr"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || true",
+ "$IP addr add 10.10.10.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY handle 1:0 root drr",
+ "$TC class replace dev $DUMMY parent 1:0 classid 1:1 drr",
+ "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 netem duplicate 100%",
+ "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:1"
+ ],
+ "cmdUnderTest": "ping -c 1 -I $DUMMY 10.10.10.1 > /dev/null || true",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -j -s qdisc ls dev $DUMMY handle 1:0",
+ "matchJSON": [
+ {
+ "kind": "drr",
+ "handle": "1:",
+ "bytes": 196,
+ "packets": 2
+ }
+ ],
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1:0 root",
+ "$IP addr del 10.10.10.10/24 dev $DUMMY || true"
+ ]
+ },
+ {
+ "id": "1f1f",
+ "name": "Test ETS's enqueue reentrant behaviour with netem",
+ "category": [
+ "qdisc",
+ "ets"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || true",
+ "$IP addr add 10.10.10.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY handle 1:0 root ets bands 2",
+ "$TC class replace dev $DUMMY parent 1:0 classid 1:1 ets quantum 1500",
+ "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 netem duplicate 100%",
+ "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:1"
+ ],
+ "cmdUnderTest": "ping -c 1 -I $DUMMY 10.10.10.1 > /dev/null || true",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -j -s class show dev $DUMMY",
+ "matchJSON": [
+ {
+ "class": "ets",
+ "handle": "1:1",
+ "stats": {
+ "bytes": 196,
+ "packets": 2
+ }
+ }
+ ],
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1:0 root",
+ "$IP addr del 10.10.10.10/24 dev $DUMMY || true"
+ ]
+ },
+ {
+ "id": "5e6d",
+ "name": "Test QFQ's enqueue reentrant behaviour with netem",
+ "category": [
+ "qdisc",
+ "qfq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || true",
+ "$IP addr add 10.10.10.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY handle 1:0 root qfq",
+ "$TC class replace dev $DUMMY parent 1:0 classid 1:1 qfq weight 100 maxpkt 1500",
+ "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 netem duplicate 100%",
+ "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:1"
+ ],
+ "cmdUnderTest": "ping -c 1 -I $DUMMY 10.10.10.1 > /dev/null || true",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -j -s qdisc ls dev $DUMMY handle 1:0",
+ "matchJSON": [
+ {
+ "kind": "qfq",
+ "handle": "1:",
+ "bytes": 196,
+ "packets": 2
+ }
+ ],
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1:0 root",
+ "$IP addr del 10.10.10.10/24 dev $DUMMY || true"
+ ]
+ },
+ {
+ "id": "bf1d",
+ "name": "Test HFSC's enqueue reentrant behaviour with netem",
+ "category": [
+ "qdisc",
+ "hfsc"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || true",
+ "$IP addr add 10.10.10.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY handle 1:0 root hfsc",
+ "$TC class add dev $DUMMY parent 1:0 classid 1:1 hfsc ls m2 10Mbit",
+ "$TC qdisc add dev $DUMMY parent 1:1 handle 2:0 netem duplicate 100%",
+ "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 1 u32 match ip dst 10.10.10.1/32 flowid 1:1",
+ "$TC class add dev $DUMMY parent 1:0 classid 1:2 hfsc ls m2 10Mbit",
+ "$TC qdisc add dev $DUMMY parent 1:2 handle 3:0 netem duplicate 100%",
+ "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 2 u32 match ip dst 10.10.10.2/32 flowid 1:2",
+ "ping -c 1 10.10.10.1 -I$DUMMY > /dev/null || true",
+ "$TC filter del dev $DUMMY parent 1:0 protocol ip prio 1",
+ "$TC class del dev $DUMMY classid 1:1"
+ ],
+ "cmdUnderTest": "ping -c 1 10.10.10.2 -I$DUMMY > /dev/null || true",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -j -s qdisc ls dev $DUMMY handle 1:0",
+ "matchJSON": [
+ {
+ "kind": "hfsc",
+ "handle": "1:",
+ "bytes": 392,
+ "packets": 4
+ }
+ ],
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1:0 root",
+ "$IP addr del 10.10.10.10/24 dev $DUMMY || true"
+ ]
+ },
+ {
+ "id": "7c3b",
+ "name": "Test nested DRR's enqueue reentrant behaviour with netem",
+ "category": [
+ "qdisc",
+ "drr"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ "$IP link set dev $DUMMY up || true",
+ "$IP addr add 10.10.10.10/24 dev $DUMMY || true",
+ "$TC qdisc add dev $DUMMY handle 1:0 root drr",
+ "$TC class add dev $DUMMY parent 1:0 classid 1:1 drr",
+ "$TC filter add dev $DUMMY parent 1:0 protocol ip prio 1 u32 match ip protocol 1 0xff flowid 1:1",
+ "$TC qdisc add dev $DUMMY handle 2:0 parent 1:1 drr",
+ "$TC class add dev $DUMMY classid 2:1 parent 2:0 drr",
+ "$TC filter add dev $DUMMY parent 2:0 protocol ip prio 1 u32 match ip protocol 1 0xff flowid 2:1",
+ "$TC qdisc add dev $DUMMY parent 2:1 handle 3:0 netem duplicate 100%"
+ ],
+ "cmdUnderTest": "ping -c 1 -I $DUMMY 10.10.10.1 > /dev/null || true",
+ "expExitCode": "0",
+ "verifyCmd": "$TC -j -s qdisc ls dev $DUMMY handle 1:0",
+ "matchJSON": [
+ {
+ "kind": "drr",
+ "handle": "1:",
+ "bytes": 196,
+ "packets": 2
+ }
+ ],
+ "matchCount": "1",
+ "teardown": [
+ "$TC qdisc del dev $DUMMY handle 1:0 root",
+ "$IP addr del 10.10.10.10/24 dev $DUMMY || true"
+ ]
}
]
diff --git a/tools/testing/selftests/tc-testing/tdc.sh b/tools/testing/selftests/tc-testing/tdc.sh
index cddff1772e10..589b18ed758a 100755
--- a/tools/testing/selftests/tc-testing/tdc.sh
+++ b/tools/testing/selftests/tc-testing/tdc.sh
@@ -31,6 +31,10 @@ try_modprobe act_skbedit
try_modprobe act_skbmod
try_modprobe act_tunnel_key
try_modprobe act_vlan
+try_modprobe act_ife
+try_modprobe act_meta_mark
+try_modprobe act_meta_skbtcindex
+try_modprobe act_meta_skbprio
try_modprobe cls_basic
try_modprobe cls_bpf
try_modprobe cls_cgroup
diff --git a/tools/testing/selftests/ublk/Makefile b/tools/testing/selftests/ublk/Makefile
index c7781efea0f3..ec4624a283bc 100644
--- a/tools/testing/selftests/ublk/Makefile
+++ b/tools/testing/selftests/ublk/Makefile
@@ -6,6 +6,9 @@ LDLIBS += -lpthread -lm -luring
TEST_PROGS := test_generic_01.sh
TEST_PROGS += test_generic_02.sh
TEST_PROGS += test_generic_03.sh
+TEST_PROGS += test_generic_04.sh
+TEST_PROGS += test_generic_05.sh
+TEST_PROGS += test_generic_06.sh
TEST_PROGS += test_null_01.sh
TEST_PROGS += test_null_02.sh
@@ -21,12 +24,16 @@ TEST_PROGS += test_stripe_04.sh
TEST_PROGS += test_stress_01.sh
TEST_PROGS += test_stress_02.sh
+TEST_PROGS += test_stress_03.sh
+TEST_PROGS += test_stress_04.sh
+TEST_PROGS += test_stress_05.sh
TEST_GEN_PROGS_EXTENDED = kublk
include ../lib.mk
-$(TEST_GEN_PROGS_EXTENDED): kublk.c null.c file_backed.c common.c stripe.c
+$(TEST_GEN_PROGS_EXTENDED): kublk.c null.c file_backed.c common.c stripe.c \
+ fault_inject.c
check:
shellcheck -x -f gcc *.sh
diff --git a/tools/testing/selftests/ublk/fault_inject.c b/tools/testing/selftests/ublk/fault_inject.c
new file mode 100644
index 000000000000..94a8e729ba4c
--- /dev/null
+++ b/tools/testing/selftests/ublk/fault_inject.c
@@ -0,0 +1,98 @@
+// SPDX-License-Identifier: GPL-2.0
+
+/*
+ * Fault injection ublk target. Hack this up however you like for
+ * testing specific behaviors of ublk_drv. Currently is a null target
+ * with a configurable delay before completing each I/O. This delay can
+ * be used to test ublk_drv's handling of I/O outstanding to the ublk
+ * server when it dies.
+ */
+
+#include "kublk.h"
+
+static int ublk_fault_inject_tgt_init(const struct dev_ctx *ctx,
+ struct ublk_dev *dev)
+{
+ const struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
+ unsigned long dev_size = 250UL << 30;
+
+ dev->tgt.dev_size = dev_size;
+ dev->tgt.params = (struct ublk_params) {
+ .types = UBLK_PARAM_TYPE_BASIC,
+ .basic = {
+ .logical_bs_shift = 9,
+ .physical_bs_shift = 12,
+ .io_opt_shift = 12,
+ .io_min_shift = 9,
+ .max_sectors = info->max_io_buf_bytes >> 9,
+ .dev_sectors = dev_size >> 9,
+ },
+ };
+
+ dev->private_data = (void *)(unsigned long)(ctx->fault_inject.delay_us * 1000);
+ return 0;
+}
+
+static int ublk_fault_inject_queue_io(struct ublk_queue *q, int tag)
+{
+ const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
+ struct io_uring_sqe *sqe;
+ struct __kernel_timespec ts = {
+ .tv_nsec = (long long)q->dev->private_data,
+ };
+
+ ublk_queue_alloc_sqes(q, &sqe, 1);
+ io_uring_prep_timeout(sqe, &ts, 1, 0);
+ sqe->user_data = build_user_data(tag, ublksrv_get_op(iod), 0, 1);
+
+ ublk_queued_tgt_io(q, tag, 1);
+
+ return 0;
+}
+
+static void ublk_fault_inject_tgt_io_done(struct ublk_queue *q, int tag,
+ const struct io_uring_cqe *cqe)
+{
+ const struct ublksrv_io_desc *iod = ublk_get_iod(q, tag);
+
+ if (cqe->res != -ETIME)
+ ublk_err("%s: unexpected cqe res %d\n", __func__, cqe->res);
+
+ if (ublk_completed_tgt_io(q, tag))
+ ublk_complete_io(q, tag, iod->nr_sectors << 9);
+ else
+ ublk_err("%s: io not complete after 1 cqe\n", __func__);
+}
+
+static void ublk_fault_inject_cmd_line(struct dev_ctx *ctx, int argc, char *argv[])
+{
+ static const struct option longopts[] = {
+ { "delay_us", 1, NULL, 0 },
+ { 0, 0, 0, 0 }
+ };
+ int option_idx, opt;
+
+ ctx->fault_inject.delay_us = 0;
+ while ((opt = getopt_long(argc, argv, "",
+ longopts, &option_idx)) != -1) {
+ switch (opt) {
+ case 0:
+ if (!strcmp(longopts[option_idx].name, "delay_us"))
+ ctx->fault_inject.delay_us = strtoll(optarg, NULL, 10);
+ }
+ }
+}
+
+static void ublk_fault_inject_usage(const struct ublk_tgt_ops *ops)
+{
+ printf("\tfault_inject: [--delay_us us (default 0)]\n");
+}
+
+const struct ublk_tgt_ops fault_inject_tgt_ops = {
+ .name = "fault_inject",
+ .init_tgt = ublk_fault_inject_tgt_init,
+ .queue_io = ublk_fault_inject_queue_io,
+ .tgt_io_done = ublk_fault_inject_tgt_io_done,
+ .parse_cmd_line = ublk_fault_inject_cmd_line,
+ .usage = ublk_fault_inject_usage,
+};
diff --git a/tools/testing/selftests/ublk/kublk.c b/tools/testing/selftests/ublk/kublk.c
index 91c282bc7674..e57a1486bb48 100644
--- a/tools/testing/selftests/ublk/kublk.c
+++ b/tools/testing/selftests/ublk/kublk.c
@@ -5,22 +5,24 @@
#include "kublk.h"
+#define MAX_NR_TGT_ARG 64
+
unsigned int ublk_dbg_mask = UBLK_LOG;
static const struct ublk_tgt_ops *tgt_ops_list[] = {
&null_tgt_ops,
&loop_tgt_ops,
&stripe_tgt_ops,
+ &fault_inject_tgt_ops,
};
static const struct ublk_tgt_ops *ublk_find_tgt(const char *name)
{
- const struct ublk_tgt_ops *ops;
int i;
if (name == NULL)
return NULL;
- for (i = 0; sizeof(tgt_ops_list) / sizeof(ops); i++)
+ for (i = 0; i < ARRAY_SIZE(tgt_ops_list); i++)
if (strcmp(tgt_ops_list[i]->name, name) == 0)
return tgt_ops_list[i];
return NULL;
@@ -118,6 +120,27 @@ static int ublk_ctrl_start_dev(struct ublk_dev *dev,
return __ublk_ctrl_cmd(dev, &data);
}
+static int ublk_ctrl_start_user_recovery(struct ublk_dev *dev)
+{
+ struct ublk_ctrl_cmd_data data = {
+ .cmd_op = UBLK_U_CMD_START_USER_RECOVERY,
+ };
+
+ return __ublk_ctrl_cmd(dev, &data);
+}
+
+static int ublk_ctrl_end_user_recovery(struct ublk_dev *dev, int daemon_pid)
+{
+ struct ublk_ctrl_cmd_data data = {
+ .cmd_op = UBLK_U_CMD_END_USER_RECOVERY,
+ .flags = CTRL_CMD_HAS_DATA,
+ };
+
+ dev->dev_info.ublksrv_pid = data.data[0] = daemon_pid;
+
+ return __ublk_ctrl_cmd(dev, &data);
+}
+
static int ublk_ctrl_add_dev(struct ublk_dev *dev)
{
struct ublk_ctrl_cmd_data data = {
@@ -207,10 +230,73 @@ static const char *ublk_dev_state_desc(struct ublk_dev *dev)
};
}
+static void ublk_print_cpu_set(const cpu_set_t *set, char *buf, unsigned len)
+{
+ unsigned done = 0;
+ int i;
+
+ for (i = 0; i < CPU_SETSIZE; i++) {
+ if (CPU_ISSET(i, set))
+ done += snprintf(&buf[done], len - done, "%d ", i);
+ }
+}
+
+static void ublk_adjust_affinity(cpu_set_t *set)
+{
+ int j, updated = 0;
+
+ /*
+ * Just keep the 1st CPU now.
+ *
+ * In future, auto affinity selection can be tried.
+ */
+ for (j = 0; j < CPU_SETSIZE; j++) {
+ if (CPU_ISSET(j, set)) {
+ if (!updated) {
+ updated = 1;
+ continue;
+ }
+ CPU_CLR(j, set);
+ }
+ }
+}
+
+/* Caller must free the allocated buffer */
+static int ublk_ctrl_get_affinity(struct ublk_dev *ctrl_dev, cpu_set_t **ptr_buf)
+{
+ struct ublk_ctrl_cmd_data data = {
+ .cmd_op = UBLK_U_CMD_GET_QUEUE_AFFINITY,
+ .flags = CTRL_CMD_HAS_DATA | CTRL_CMD_HAS_BUF,
+ };
+ cpu_set_t *buf;
+ int i, ret;
+
+ buf = malloc(sizeof(cpu_set_t) * ctrl_dev->dev_info.nr_hw_queues);
+ if (!buf)
+ return -ENOMEM;
+
+ for (i = 0; i < ctrl_dev->dev_info.nr_hw_queues; i++) {
+ data.data[0] = i;
+ data.len = sizeof(cpu_set_t);
+ data.addr = (__u64)&buf[i];
+
+ ret = __ublk_ctrl_cmd(ctrl_dev, &data);
+ if (ret < 0) {
+ free(buf);
+ return ret;
+ }
+ ublk_adjust_affinity(&buf[i]);
+ }
+
+ *ptr_buf = buf;
+ return 0;
+}
+
static void ublk_ctrl_dump(struct ublk_dev *dev)
{
struct ublksrv_ctrl_dev_info *info = &dev->dev_info;
struct ublk_params p;
+ cpu_set_t *affinity;
int ret;
ret = ublk_ctrl_get_params(dev, &p);
@@ -219,12 +305,31 @@ static void ublk_ctrl_dump(struct ublk_dev *dev)
return;
}
+ ret = ublk_ctrl_get_affinity(dev, &affinity);
+ if (ret < 0) {
+ ublk_err("failed to get affinity %m\n");
+ return;
+ }
+
ublk_log("dev id %d: nr_hw_queues %d queue_depth %d block size %d dev_capacity %lld\n",
info->dev_id, info->nr_hw_queues, info->queue_depth,
1 << p.basic.logical_bs_shift, p.basic.dev_sectors);
ublk_log("\tmax rq size %d daemon pid %d flags 0x%llx state %s\n",
info->max_io_buf_bytes, info->ublksrv_pid, info->flags,
ublk_dev_state_desc(dev));
+
+ if (affinity) {
+ char buf[512];
+ int i;
+
+ for (i = 0; i < info->nr_hw_queues; i++) {
+ ublk_print_cpu_set(&affinity[i], buf, sizeof(buf));
+ printf("\tqueue %u: tid %d affinity(%s)\n",
+ i, dev->q[i].tid, buf);
+ }
+ free(affinity);
+ }
+
fflush(stdout);
}
@@ -347,7 +452,9 @@ static int ublk_queue_init(struct ublk_queue *q)
}
ret = ublk_setup_ring(&q->ring, ring_depth, cq_depth,
- IORING_SETUP_COOP_TASKRUN);
+ IORING_SETUP_COOP_TASKRUN |
+ IORING_SETUP_SINGLE_ISSUER |
+ IORING_SETUP_DEFER_TASKRUN);
if (ret < 0) {
ublk_err("ublk dev %d queue %d setup io_uring failed %d\n",
q->dev->dev_info.dev_id, q->q_id, ret);
@@ -602,9 +709,24 @@ static int ublk_process_io(struct ublk_queue *q)
return reapped;
}
+static void ublk_queue_set_sched_affinity(const struct ublk_queue *q,
+ cpu_set_t *cpuset)
+{
+ if (sched_setaffinity(0, sizeof(*cpuset), cpuset) < 0)
+ ublk_err("ublk dev %u queue %u set affinity failed",
+ q->dev->dev_info.dev_id, q->q_id);
+}
+
+struct ublk_queue_info {
+ struct ublk_queue *q;
+ sem_t *queue_sem;
+ cpu_set_t *affinity;
+};
+
static void *ublk_io_handler_fn(void *data)
{
- struct ublk_queue *q = data;
+ struct ublk_queue_info *info = data;
+ struct ublk_queue *q = info->q;
int dev_id = q->dev->dev_info.dev_id;
int ret;
@@ -614,6 +736,10 @@ static void *ublk_io_handler_fn(void *data)
dev_id, q->q_id);
return NULL;
}
+ /* IO perf is sensitive with queue pthread affinity on NUMA machine*/
+ ublk_queue_set_sched_affinity(q, info->affinity);
+ sem_post(info->queue_sem);
+
ublk_dbg(UBLK_DBG_QUEUE, "tid %d: ublk dev %d queue %d started\n",
q->tid, dev_id, q->q_id);
@@ -639,7 +765,7 @@ static void ublk_set_parameters(struct ublk_dev *dev)
dev->dev_info.dev_id, ret);
}
-static int ublk_send_dev_event(const struct dev_ctx *ctx, int dev_id)
+static int ublk_send_dev_event(const struct dev_ctx *ctx, struct ublk_dev *dev, int dev_id)
{
uint64_t id;
int evtfd = ctx->_evtfd;
@@ -652,36 +778,68 @@ static int ublk_send_dev_event(const struct dev_ctx *ctx, int dev_id)
else
id = ERROR_EVTFD_DEVID;
+ if (dev && ctx->shadow_dev)
+ memcpy(&ctx->shadow_dev->q, &dev->q, sizeof(dev->q));
+
if (write(evtfd, &id, sizeof(id)) != sizeof(id))
return -EINVAL;
+ close(evtfd);
+ shmdt(ctx->shadow_dev);
+
return 0;
}
static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev)
{
- int ret, i;
- void *thread_ret;
const struct ublksrv_ctrl_dev_info *dinfo = &dev->dev_info;
+ struct ublk_queue_info *qinfo;
+ cpu_set_t *affinity_buf;
+ void *thread_ret;
+ sem_t queue_sem;
+ int ret, i;
ublk_dbg(UBLK_DBG_DEV, "%s enter\n", __func__);
+ qinfo = (struct ublk_queue_info *)calloc(sizeof(struct ublk_queue_info),
+ dinfo->nr_hw_queues);
+ if (!qinfo)
+ return -ENOMEM;
+
+ sem_init(&queue_sem, 0, 0);
ret = ublk_dev_prep(ctx, dev);
if (ret)
return ret;
+ ret = ublk_ctrl_get_affinity(dev, &affinity_buf);
+ if (ret)
+ return ret;
+
for (i = 0; i < dinfo->nr_hw_queues; i++) {
dev->q[i].dev = dev;
dev->q[i].q_id = i;
+
+ qinfo[i].q = &dev->q[i];
+ qinfo[i].queue_sem = &queue_sem;
+ qinfo[i].affinity = &affinity_buf[i];
pthread_create(&dev->q[i].thread, NULL,
ublk_io_handler_fn,
- &dev->q[i]);
+ &qinfo[i]);
}
+ for (i = 0; i < dinfo->nr_hw_queues; i++)
+ sem_wait(&queue_sem);
+ free(qinfo);
+ free(affinity_buf);
+
/* everything is fine now, start us */
- ublk_set_parameters(dev);
- ret = ublk_ctrl_start_dev(dev, getpid());
+ if (ctx->recovery)
+ ret = ublk_ctrl_end_user_recovery(dev, getpid());
+ else {
+ ublk_set_parameters(dev);
+ ret = ublk_ctrl_start_dev(dev, getpid());
+ }
if (ret < 0) {
ublk_err("%s: ublk_ctrl_start_dev failed: %d\n", __func__, ret);
goto fail;
@@ -691,7 +849,7 @@ static int ublk_start_daemon(const struct dev_ctx *ctx, struct ublk_dev *dev)
if (ctx->fg)
ublk_ctrl_dump(dev);
else
- ublk_send_dev_event(ctx, dev->dev_info.dev_id);
+ ublk_send_dev_event(ctx, dev, dev->dev_info.dev_id);
/* wait until we are terminated */
for (i = 0; i < dinfo->nr_hw_queues; i++)
@@ -856,7 +1014,10 @@ static int __cmd_dev_add(const struct dev_ctx *ctx)
}
}
- ret = ublk_ctrl_add_dev(dev);
+ if (ctx->recovery)
+ ret = ublk_ctrl_start_user_recovery(dev);
+ else
+ ret = ublk_ctrl_add_dev(dev);
if (ret < 0) {
ublk_err("%s: can't add dev id %d, type %s ret %d\n",
__func__, dev_id, tgt_type, ret);
@@ -870,7 +1031,7 @@ static int __cmd_dev_add(const struct dev_ctx *ctx)
fail:
if (ret < 0)
- ublk_send_dev_event(ctx, -1);
+ ublk_send_dev_event(ctx, dev, -1);
ublk_ctrl_deinit(dev);
return ret;
}
@@ -884,30 +1045,58 @@ static int cmd_dev_add(struct dev_ctx *ctx)
if (ctx->fg)
goto run;
+ ctx->_shmid = shmget(IPC_PRIVATE, sizeof(struct ublk_dev), IPC_CREAT | 0666);
+ if (ctx->_shmid < 0) {
+ ublk_err("%s: failed to shmget %s\n", __func__, strerror(errno));
+ exit(-1);
+ }
+ ctx->shadow_dev = (struct ublk_dev *)shmat(ctx->_shmid, NULL, 0);
+ if (ctx->shadow_dev == (struct ublk_dev *)-1) {
+ ublk_err("%s: failed to shmat %s\n", __func__, strerror(errno));
+ exit(-1);
+ }
ctx->_evtfd = eventfd(0, 0);
if (ctx->_evtfd < 0) {
ublk_err("%s: failed to create eventfd %s\n", __func__, strerror(errno));
exit(-1);
}
- setsid();
res = fork();
if (res == 0) {
+ int res2;
+
+ setsid();
+ res2 = fork();
+ if (res2 == 0) {
+ /* prepare for detaching */
+ close(STDIN_FILENO);
+ close(STDOUT_FILENO);
+ close(STDERR_FILENO);
run:
- res = __cmd_dev_add(ctx);
- return res;
+ res = __cmd_dev_add(ctx);
+ return res;
+ } else {
+ /* detached from the foreground task */
+ exit(EXIT_SUCCESS);
+ }
} else if (res > 0) {
uint64_t id;
+ int exit_code = EXIT_FAILURE;
res = read(ctx->_evtfd, &id, sizeof(id));
close(ctx->_evtfd);
if (res == sizeof(id) && id != ERROR_EVTFD_DEVID) {
ctx->dev_id = id - 1;
- return __cmd_dev_list(ctx);
+ if (__cmd_dev_list(ctx) >= 0)
+ exit_code = EXIT_SUCCESS;
}
- exit(EXIT_FAILURE);
+ shmdt(ctx->shadow_dev);
+ shmctl(ctx->_shmid, IPC_RMID, NULL);
+ /* wait for child and detach from it */
+ wait(NULL);
+ exit(exit_code);
} else {
- return res;
+ exit(EXIT_FAILURE);
}
}
@@ -969,6 +1158,9 @@ static int __cmd_dev_list(struct dev_ctx *ctx)
ublk_err("%s: can't get dev info from %d: %d\n",
__func__, ctx->dev_id, ret);
} else {
+ if (ctx->shadow_dev)
+ memcpy(&dev->q, ctx->shadow_dev->q, sizeof(dev->q));
+
ublk_ctrl_dump(dev);
}
@@ -1039,14 +1231,47 @@ static int cmd_dev_get_features(void)
return ret;
}
+static void __cmd_create_help(char *exe, bool recovery)
+{
+ int i;
+
+ printf("%s %s -t [null|loop|stripe|fault_inject] [-q nr_queues] [-d depth] [-n dev_id]\n",
+ exe, recovery ? "recover" : "add");
+ printf("\t[--foreground] [--quiet] [-z] [--debug_mask mask] [-r 0|1 ] [-g 0|1]\n");
+ printf("\t[-e 0|1 ] [-i 0|1]\n");
+ printf("\t[target options] [backfile1] [backfile2] ...\n");
+ printf("\tdefault: nr_queues=2(max 32), depth=128(max 1024), dev_id=-1(auto allocation)\n");
+
+ for (i = 0; i < sizeof(tgt_ops_list) / sizeof(tgt_ops_list[0]); i++) {
+ const struct ublk_tgt_ops *ops = tgt_ops_list[i];
+
+ if (ops->usage)
+ ops->usage(ops);
+ }
+}
+
+static void cmd_add_help(char *exe)
+{
+ __cmd_create_help(exe, false);
+ printf("\n");
+}
+
+static void cmd_recover_help(char *exe)
+{
+ __cmd_create_help(exe, true);
+ printf("\tPlease provide exact command line for creating this device with real dev_id\n");
+ printf("\n");
+}
+
static int cmd_dev_help(char *exe)
{
- printf("%s add -t [null|loop] [-q nr_queues] [-d depth] [-n dev_id] [backfile1] [backfile2] ...\n", exe);
- printf("\t default: nr_queues=2(max 4), depth=128(max 128), dev_id=-1(auto allocation)\n");
+ cmd_add_help(exe);
+ cmd_recover_help(exe);
+
printf("%s del [-n dev_id] -a \n", exe);
- printf("\t -a delete all devices -n delete specified device\n");
+ printf("\t -a delete all devices -n delete specified device\n\n");
printf("%s list [-n dev_id] -a \n", exe);
- printf("\t -a list all devices, -n list specified device, default -a \n");
+ printf("\t -a list all devices, -n list specified device, default -a \n\n");
printf("%s features\n", exe);
return 0;
}
@@ -1063,9 +1288,13 @@ int main(int argc, char *argv[])
{ "quiet", 0, NULL, 0 },
{ "zero_copy", 0, NULL, 'z' },
{ "foreground", 0, NULL, 0 },
- { "chunk_size", 1, NULL, 0 },
+ { "recovery", 1, NULL, 'r' },
+ { "recovery_fail_io", 1, NULL, 'e'},
+ { "recovery_reissue", 1, NULL, 'i'},
+ { "get_data", 1, NULL, 'g'},
{ 0, 0, 0, 0 }
};
+ const struct ublk_tgt_ops *ops = NULL;
int option_idx, opt;
const char *cmd = argv[1];
struct dev_ctx ctx = {
@@ -1073,15 +1302,18 @@ int main(int argc, char *argv[])
.nr_hw_queues = 2,
.dev_id = -1,
.tgt_type = "unknown",
- .chunk_size = 65536, /* def chunk size is 64K */
};
int ret = -EINVAL, i;
+ int tgt_argc = 1;
+ char *tgt_argv[MAX_NR_TGT_ARG] = { NULL };
+ int value;
if (argc == 1)
return ret;
+ opterr = 0;
optind = 2;
- while ((opt = getopt_long(argc, argv, "t:n:d:q:az",
+ while ((opt = getopt_long(argc, argv, "t:n:d:q:r:e:i:az",
longopts, &option_idx)) != -1) {
switch (opt) {
case 'a':
@@ -1103,6 +1335,26 @@ int main(int argc, char *argv[])
case 'z':
ctx.flags |= UBLK_F_SUPPORT_ZERO_COPY | UBLK_F_USER_COPY;
break;
+ case 'r':
+ value = strtol(optarg, NULL, 10);
+ if (value)
+ ctx.flags |= UBLK_F_USER_RECOVERY;
+ break;
+ case 'e':
+ value = strtol(optarg, NULL, 10);
+ if (value)
+ ctx.flags |= UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_FAIL_IO;
+ break;
+ case 'i':
+ value = strtol(optarg, NULL, 10);
+ if (value)
+ ctx.flags |= UBLK_F_USER_RECOVERY | UBLK_F_USER_RECOVERY_REISSUE;
+ break;
+ case 'g':
+ value = strtol(optarg, NULL, 10);
+ if (value)
+ ctx.flags |= UBLK_F_NEED_GET_DATA;
+ break;
case 0:
if (!strcmp(longopts[option_idx].name, "debug_mask"))
ublk_dbg_mask = strtol(optarg, NULL, 16);
@@ -1110,8 +1362,26 @@ int main(int argc, char *argv[])
ublk_dbg_mask = 0;
if (!strcmp(longopts[option_idx].name, "foreground"))
ctx.fg = 1;
- if (!strcmp(longopts[option_idx].name, "chunk_size"))
- ctx.chunk_size = strtol(optarg, NULL, 10);
+ break;
+ case '?':
+ /*
+ * target requires every option must have argument
+ */
+ if (argv[optind][0] == '-' || argv[optind - 1][0] != '-') {
+ fprintf(stderr, "every target option requires argument: %s %s\n",
+ argv[optind - 1], argv[optind]);
+ exit(EXIT_FAILURE);
+ }
+
+ if (tgt_argc < (MAX_NR_TGT_ARG - 1) / 2) {
+ tgt_argv[tgt_argc++] = argv[optind - 1];
+ tgt_argv[tgt_argc++] = argv[optind];
+ } else {
+ fprintf(stderr, "too many target options\n");
+ exit(EXIT_FAILURE);
+ }
+ optind += 1;
+ break;
}
}
@@ -1120,9 +1390,25 @@ int main(int argc, char *argv[])
ctx.files[ctx.nr_files++] = argv[i++];
}
+ ops = ublk_find_tgt(ctx.tgt_type);
+ if (ops && ops->parse_cmd_line) {
+ optind = 0;
+
+ tgt_argv[0] = ctx.tgt_type;
+ ops->parse_cmd_line(&ctx, tgt_argc, tgt_argv);
+ }
+
if (!strcmp(cmd, "add"))
ret = cmd_dev_add(&ctx);
- else if (!strcmp(cmd, "del"))
+ else if (!strcmp(cmd, "recover")) {
+ if (ctx.dev_id < 0) {
+ fprintf(stderr, "device id isn't provided for recovering\n");
+ ret = -EINVAL;
+ } else {
+ ctx.recovery = 1;
+ ret = cmd_dev_add(&ctx);
+ }
+ } else if (!strcmp(cmd, "del"))
ret = cmd_dev_del(&ctx);
else if (!strcmp(cmd, "list")) {
ctx.all = 1;
diff --git a/tools/testing/selftests/ublk/kublk.h b/tools/testing/selftests/ublk/kublk.h
index 760ff8ffb810..918db5cd633f 100644
--- a/tools/testing/selftests/ublk/kublk.h
+++ b/tools/testing/selftests/ublk/kublk.h
@@ -20,9 +20,15 @@
#include <sys/wait.h>
#include <sys/eventfd.h>
#include <sys/uio.h>
+#include <sys/ipc.h>
+#include <sys/shm.h>
+#include <linux/io_uring.h>
#include <liburing.h>
-#include <linux/ublk_cmd.h>
+#include <semaphore.h>
+
+/* allow ublk_dep.h to override ublk_cmd.h */
#include "ublk_dep.h"
+#include <linux/ublk_cmd.h>
#define __maybe_unused __attribute__((unused))
#define MAX_BACK_FILES 4
@@ -30,6 +36,8 @@
#define min(a, b) ((a) < (b) ? (a) : (b))
#endif
+#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
+
/****************** part 1: libublk ********************/
#define CTRL_DEV "/dev/ublk-control"
@@ -42,8 +50,8 @@
#define UBLKSRV_IO_IDLE_SECS 20
#define UBLK_IO_MAX_BYTES (1 << 20)
-#define UBLK_MAX_QUEUES 4
-#define UBLK_QUEUE_DEPTH 128
+#define UBLK_MAX_QUEUES 32
+#define UBLK_QUEUE_DEPTH 1024
#define UBLK_DBG_DEV (1U << 0)
#define UBLK_DBG_QUEUE (1U << 1)
@@ -55,6 +63,16 @@
struct ublk_dev;
struct ublk_queue;
+struct stripe_ctx {
+ /* stripe */
+ unsigned int chunk_size;
+};
+
+struct fault_inject_ctx {
+ /* fault_inject */
+ unsigned long delay_us;
+};
+
struct dev_ctx {
char tgt_type[16];
unsigned long flags;
@@ -66,11 +84,18 @@ struct dev_ctx {
unsigned int logging:1;
unsigned int all:1;
unsigned int fg:1;
-
- /* stripe */
- unsigned int chunk_size;
+ unsigned int recovery:1;
int _evtfd;
+ int _shmid;
+
+ /* built from shmem, only for ublk_dump_dev() */
+ struct ublk_dev *shadow_dev;
+
+ union {
+ struct stripe_ctx stripe;
+ struct fault_inject_ctx fault_inject;
+ };
};
struct ublk_ctrl_cmd_data {
@@ -107,6 +132,14 @@ struct ublk_tgt_ops {
int (*queue_io)(struct ublk_queue *, int tag);
void (*tgt_io_done)(struct ublk_queue *,
int tag, const struct io_uring_cqe *);
+
+ /*
+ * Target specific command line handling
+ *
+ * each option requires argument for target command line
+ */
+ void (*parse_cmd_line)(struct dev_ctx *ctx, int argc, char *argv[]);
+ void (*usage)(const struct ublk_tgt_ops *ops);
};
struct ublk_tgt {
@@ -357,6 +390,7 @@ static inline int ublk_queue_use_zc(const struct ublk_queue *q)
extern const struct ublk_tgt_ops null_tgt_ops;
extern const struct ublk_tgt_ops loop_tgt_ops;
extern const struct ublk_tgt_ops stripe_tgt_ops;
+extern const struct ublk_tgt_ops fault_inject_tgt_ops;
void backing_file_tgt_deinit(struct ublk_dev *dev);
int backing_file_tgt_init(struct ublk_dev *dev);
diff --git a/tools/testing/selftests/ublk/stripe.c b/tools/testing/selftests/ublk/stripe.c
index 179731c3dd6f..5dbd6392d83d 100644
--- a/tools/testing/selftests/ublk/stripe.c
+++ b/tools/testing/selftests/ublk/stripe.c
@@ -281,7 +281,7 @@ static int ublk_stripe_tgt_init(const struct dev_ctx *ctx, struct ublk_dev *dev)
.max_sectors = dev->dev_info.max_io_buf_bytes >> 9,
},
};
- unsigned chunk_size = ctx->chunk_size;
+ unsigned chunk_size = ctx->stripe.chunk_size;
struct stripe_conf *conf;
unsigned chunk_shift;
loff_t bytes = 0;
@@ -344,10 +344,36 @@ static void ublk_stripe_tgt_deinit(struct ublk_dev *dev)
backing_file_tgt_deinit(dev);
}
+static void ublk_stripe_cmd_line(struct dev_ctx *ctx, int argc, char *argv[])
+{
+ static const struct option longopts[] = {
+ { "chunk_size", 1, NULL, 0 },
+ { 0, 0, 0, 0 }
+ };
+ int option_idx, opt;
+
+ ctx->stripe.chunk_size = 65536;
+ while ((opt = getopt_long(argc, argv, "",
+ longopts, &option_idx)) != -1) {
+ switch (opt) {
+ case 0:
+ if (!strcmp(longopts[option_idx].name, "chunk_size"))
+ ctx->stripe.chunk_size = strtol(optarg, NULL, 10);
+ }
+ }
+}
+
+static void ublk_stripe_usage(const struct ublk_tgt_ops *ops)
+{
+ printf("\tstripe: [--chunk_size chunk_size (default 65536)]\n");
+}
+
const struct ublk_tgt_ops stripe_tgt_ops = {
.name = "stripe",
.init_tgt = ublk_stripe_tgt_init,
.deinit_tgt = ublk_stripe_tgt_deinit,
.queue_io = ublk_stripe_queue_io,
.tgt_io_done = ublk_stripe_io_done,
+ .parse_cmd_line = ublk_stripe_cmd_line,
+ .usage = ublk_stripe_usage,
};
diff --git a/tools/testing/selftests/ublk/test_common.sh b/tools/testing/selftests/ublk/test_common.sh
index a88b35943227..a81210ca3e99 100755
--- a/tools/testing/selftests/ublk/test_common.sh
+++ b/tools/testing/selftests/ublk/test_common.sh
@@ -17,8 +17,8 @@ _get_disk_dev_t() {
local minor
dev=/dev/ublkb"${dev_id}"
- major=$(stat -c '%Hr' "$dev")
- minor=$(stat -c '%Lr' "$dev")
+ major="0x"$(stat -c '%t' "$dev")
+ minor="0x"$(stat -c '%T' "$dev")
echo $(( (major & 0xfff) << 20 | (minor & 0xfffff) ))
}
@@ -30,18 +30,26 @@ _run_fio_verify_io() {
}
_create_backfile() {
- local my_size=$1
- local my_file
+ local index=$1
+ local new_size=$2
+ local old_file
+ local new_file
- my_file=$(mktemp ublk_file_"${my_size}"_XXXXX)
- truncate -s "${my_size}" "${my_file}"
- echo "$my_file"
+ old_file="${UBLK_BACKFILES[$index]}"
+ [ -f "$old_file" ] && rm -f "$old_file"
+
+ new_file=$(mktemp ublk_file_"${new_size}"_XXXXX)
+ truncate -s "${new_size}" "${new_file}"
+ UBLK_BACKFILES["$index"]="$new_file"
}
-_remove_backfile() {
- local file=$1
+_remove_files() {
+ local file
- [ -f "$file" ] && rm -f "$file"
+ for file in "${UBLK_BACKFILES[@]}"; do
+ [ -f "$file" ] && rm -f "$file"
+ done
+ [ -f "$UBLK_TMP" ] && rm -f "$UBLK_TMP"
}
_create_tmp_dir() {
@@ -106,6 +114,7 @@ _prep_test() {
local type=$1
shift 1
modprobe ublk_drv > /dev/null 2>&1
+ UBLK_TMP=$(mktemp ublk_test_XXXXX)
[ "$UBLK_TEST_QUIET" -eq 0 ] && echo "ublk $type: $*"
}
@@ -129,7 +138,10 @@ _show_result()
echo "$1 : [FAIL]"
fi
fi
- [ "$2" -ne 0 ] && exit "$2"
+ if [ "$2" -ne 0 ]; then
+ _remove_files
+ exit "$2"
+ fi
return 0
}
@@ -138,16 +150,16 @@ _check_add_dev()
{
local tid=$1
local code=$2
- shift 2
+
if [ "${code}" -ne 0 ]; then
- _remove_test_files "$@"
_show_result "${tid}" "${code}"
fi
}
_cleanup_test() {
"${UBLK_PROG}" del -a
- rm -f "$UBLK_TMP"
+
+ _remove_files
}
_have_feature()
@@ -158,9 +170,11 @@ _have_feature()
return 1
}
-_add_ublk_dev() {
- local kublk_temp;
+_create_ublk_dev() {
local dev_id;
+ local cmd=$1
+
+ shift 1
if [ ! -c /dev/ublk-control ]; then
return ${UBLK_SKIP_CODE}
@@ -171,17 +185,34 @@ _add_ublk_dev() {
fi
fi
- kublk_temp=$(mktemp /tmp/kublk-XXXXXX)
- if ! "${UBLK_PROG}" add "$@" > "${kublk_temp}" 2>&1; then
+ if ! dev_id=$("${UBLK_PROG}" "$cmd" "$@" | grep "dev id" | awk -F '[ :]' '{print $3}'); then
echo "fail to add ublk dev $*"
- rm -f "${kublk_temp}"
return 255
fi
-
- dev_id=$(grep "dev id" "${kublk_temp}" | awk -F '[ :]' '{print $3}')
udevadm settle
- rm -f "${kublk_temp}"
- echo "${dev_id}"
+
+ if [[ "$dev_id" =~ ^[0-9]+$ ]]; then
+ echo "${dev_id}"
+ else
+ return 255
+ fi
+}
+
+_add_ublk_dev() {
+ _create_ublk_dev "add" "$@"
+}
+
+_recover_ublk_dev() {
+ local dev_id
+ local state
+
+ dev_id=$(_create_ublk_dev "recover" "$@")
+ for ((j=0;j<20;j++)); do
+ state=$(_get_ublk_dev_state "${dev_id}")
+ [ "$state" == "LIVE" ] && break
+ sleep 1
+ done
+ echo "$state"
}
# kill the ublk daemon and return ublk device state
@@ -220,7 +251,7 @@ __run_io_and_remove()
local kill_server=$3
fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio \
- --rw=readwrite --iodepth=64 --size="${size}" --numjobs=4 \
+ --rw=readwrite --iodepth=256 --size="${size}" --numjobs=4 \
--runtime=20 --time_based > /dev/null 2>&1 &
sleep 2
if [ "${kill_server}" = "yes" ]; then
@@ -238,15 +269,80 @@ __run_io_and_remove()
wait
}
+run_io_and_remove()
+{
+ local size=$1
+ local dev_id
+ shift 1
+
+ dev_id=$(_add_ublk_dev "$@")
+ _check_add_dev "$TID" $?
+
+ [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs. remove device(ublk add $*)"
+ if ! __run_io_and_remove "$dev_id" "${size}" "no"; then
+ echo "/dev/ublkc$dev_id isn't removed"
+ exit 255
+ fi
+}
+
+run_io_and_kill_daemon()
+{
+ local size=$1
+ local dev_id
+ shift 1
+
+ dev_id=$(_add_ublk_dev "$@")
+ _check_add_dev "$TID" $?
+
+ [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs kill ublk server(ublk add $*)"
+ if ! __run_io_and_remove "$dev_id" "${size}" "yes"; then
+ echo "/dev/ublkc$dev_id isn't removed res ${res}"
+ exit 255
+ fi
+}
+
+run_io_and_recover()
+{
+ local state
+ local dev_id
+
+ dev_id=$(_add_ublk_dev "$@")
+ _check_add_dev "$TID" $?
+
+ fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio \
+ --rw=readwrite --iodepth=256 --size="${size}" --numjobs=4 \
+ --runtime=20 --time_based > /dev/null 2>&1 &
+ sleep 4
+
+ state=$(__ublk_kill_daemon "${dev_id}" "QUIESCED")
+ if [ "$state" != "QUIESCED" ]; then
+ echo "device isn't quiesced($state) after killing daemon"
+ return 255
+ fi
+
+ state=$(_recover_ublk_dev -n "$dev_id" "$@")
+ if [ "$state" != "LIVE" ]; then
+ echo "faile to recover to LIVE($state)"
+ return 255
+ fi
+
+ if ! __remove_ublk_dev_return "${dev_id}"; then
+ echo "delete dev ${dev_id} failed"
+ return 255
+ fi
+ wait
+}
+
+
_ublk_test_top_dir()
{
cd "$(dirname "$0")" && pwd
}
-UBLK_TMP=$(mktemp ublk_test_XXXXX)
UBLK_PROG=$(_ublk_test_top_dir)/kublk
UBLK_TEST_QUIET=1
UBLK_TEST_SHOW_RESULT=1
+UBLK_BACKFILES=()
export UBLK_PROG
export UBLK_TEST_QUIET
export UBLK_TEST_SHOW_RESULT
diff --git a/tools/testing/selftests/ublk/test_generic_04.sh b/tools/testing/selftests/ublk/test_generic_04.sh
new file mode 100755
index 000000000000..8a3bc080c577
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_generic_04.sh
@@ -0,0 +1,40 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+TID="generic_04"
+ERR_CODE=0
+
+ublk_run_recover_test()
+{
+ run_io_and_recover "$@"
+ ERR_CODE=$?
+ if [ ${ERR_CODE} -ne 0 ]; then
+ echo "$TID failure: $*"
+ _show_result $TID $ERR_CODE
+ fi
+}
+
+if ! _have_program fio; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+_prep_test "recover" "basic recover function verification"
+
+_create_backfile 0 256M
+_create_backfile 1 128M
+_create_backfile 2 128M
+
+ublk_run_recover_test -t null -q 2 -r 1 &
+ublk_run_recover_test -t loop -q 2 -r 1 "${UBLK_BACKFILES[0]}" &
+ublk_run_recover_test -t stripe -q 2 -r 1 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+wait
+
+ublk_run_recover_test -t null -q 2 -r 1 -i 1 &
+ublk_run_recover_test -t loop -q 2 -r 1 -i 1 "${UBLK_BACKFILES[0]}" &
+ublk_run_recover_test -t stripe -q 2 -r 1 -i 1 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+wait
+
+_cleanup_test "recover"
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_generic_05.sh b/tools/testing/selftests/ublk/test_generic_05.sh
new file mode 100755
index 000000000000..3bb00a347402
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_generic_05.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+TID="generic_05"
+ERR_CODE=0
+
+ublk_run_recover_test()
+{
+ run_io_and_recover "$@"
+ ERR_CODE=$?
+ if [ ${ERR_CODE} -ne 0 ]; then
+ echo "$TID failure: $*"
+ _show_result $TID $ERR_CODE
+ fi
+}
+
+if ! _have_program fio; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+if ! _have_feature "ZERO_COPY"; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+_prep_test "recover" "basic recover function verification (zero copy)"
+
+_create_backfile 0 256M
+_create_backfile 1 128M
+_create_backfile 2 128M
+
+ublk_run_recover_test -t null -q 2 -r 1 -z &
+ublk_run_recover_test -t loop -q 2 -r 1 -z "${UBLK_BACKFILES[0]}" &
+ublk_run_recover_test -t stripe -q 2 -r 1 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+wait
+
+ublk_run_recover_test -t null -q 2 -r 1 -z -i 1 &
+ublk_run_recover_test -t loop -q 2 -r 1 -z -i 1 "${UBLK_BACKFILES[0]}" &
+ublk_run_recover_test -t stripe -q 2 -r 1 -z -i 1 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+wait
+
+_cleanup_test "recover"
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_generic_06.sh b/tools/testing/selftests/ublk/test_generic_06.sh
new file mode 100755
index 000000000000..b67230c42c84
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_generic_06.sh
@@ -0,0 +1,41 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+
+TID="generic_06"
+ERR_CODE=0
+
+_prep_test "fault_inject" "fast cleanup when all I/Os of one hctx are in server"
+
+# configure ublk server to sleep 2s before completing each I/O
+dev_id=$(_add_ublk_dev -t fault_inject -q 2 -d 1 --delay_us 2000000)
+_check_add_dev $TID $?
+
+STARTTIME=${SECONDS}
+
+dd if=/dev/urandom of=/dev/ublkb${dev_id} oflag=direct bs=4k count=1 status=none > /dev/null 2>&1 &
+dd_pid=$!
+
+__ublk_kill_daemon ${dev_id} "DEAD"
+
+wait $dd_pid
+dd_exitcode=$?
+
+ENDTIME=${SECONDS}
+ELAPSED=$(($ENDTIME - $STARTTIME))
+
+# assert that dd sees an error and exits quickly after ublk server is
+# killed. previously this relied on seeing an I/O timeout and so would
+# take ~30s
+if [ $dd_exitcode -eq 0 ]; then
+ echo "dd unexpectedly exited successfully!"
+ ERR_CODE=255
+fi
+if [ $ELAPSED -ge 5 ]; then
+ echo "dd took $ELAPSED seconds to exit (>= 5s tolerance)!"
+ ERR_CODE=255
+fi
+
+_cleanup_test "fault_inject"
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_loop_01.sh b/tools/testing/selftests/ublk/test_loop_01.sh
index 1ef8b6044777..833fa0dbc700 100755
--- a/tools/testing/selftests/ublk/test_loop_01.sh
+++ b/tools/testing/selftests/ublk/test_loop_01.sh
@@ -12,10 +12,10 @@ fi
_prep_test "loop" "write and verify test"
-backfile_0=$(_create_backfile 256M)
+_create_backfile 0 256M
-dev_id=$(_add_ublk_dev -t loop "$backfile_0")
-_check_add_dev $TID $? "${backfile_0}"
+dev_id=$(_add_ublk_dev -t loop "${UBLK_BACKFILES[0]}")
+_check_add_dev $TID $?
# run fio over the ublk disk
_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M
@@ -23,6 +23,4 @@ ERR_CODE=$?
_cleanup_test "loop"
-_remove_backfile "$backfile_0"
-
_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_loop_02.sh b/tools/testing/selftests/ublk/test_loop_02.sh
index 03863d825e07..874568b3646b 100755
--- a/tools/testing/selftests/ublk/test_loop_02.sh
+++ b/tools/testing/selftests/ublk/test_loop_02.sh
@@ -8,15 +8,13 @@ ERR_CODE=0
_prep_test "loop" "mkfs & mount & umount"
-backfile_0=$(_create_backfile 256M)
-dev_id=$(_add_ublk_dev -t loop "$backfile_0")
-_check_add_dev $TID $? "$backfile_0"
+_create_backfile 0 256M
+dev_id=$(_add_ublk_dev -t loop "${UBLK_BACKFILES[0]}")
+_check_add_dev $TID $?
_mkfs_mount_test /dev/ublkb"${dev_id}"
ERR_CODE=$?
_cleanup_test "loop"
-_remove_backfile "$backfile_0"
-
_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_loop_03.sh b/tools/testing/selftests/ublk/test_loop_03.sh
index e9ca744de8b1..c30f797c6429 100755
--- a/tools/testing/selftests/ublk/test_loop_03.sh
+++ b/tools/testing/selftests/ublk/test_loop_03.sh
@@ -12,9 +12,9 @@ fi
_prep_test "loop" "write and verify over zero copy"
-backfile_0=$(_create_backfile 256M)
-dev_id=$(_add_ublk_dev -t loop -z "$backfile_0")
-_check_add_dev $TID $? "$backfile_0"
+_create_backfile 0 256M
+dev_id=$(_add_ublk_dev -t loop -z "${UBLK_BACKFILES[0]}")
+_check_add_dev $TID $?
# run fio over the ublk disk
_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M
@@ -22,6 +22,4 @@ ERR_CODE=$?
_cleanup_test "loop"
-_remove_backfile "$backfile_0"
-
_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_loop_04.sh b/tools/testing/selftests/ublk/test_loop_04.sh
index 1435422c38ec..b01d75b3214d 100755
--- a/tools/testing/selftests/ublk/test_loop_04.sh
+++ b/tools/testing/selftests/ublk/test_loop_04.sh
@@ -8,15 +8,14 @@ ERR_CODE=0
_prep_test "loop" "mkfs & mount & umount with zero copy"
-backfile_0=$(_create_backfile 256M)
-dev_id=$(_add_ublk_dev -t loop -z "$backfile_0")
-_check_add_dev $TID $? "$backfile_0"
+_create_backfile 0 256M
+
+dev_id=$(_add_ublk_dev -t loop -z "${UBLK_BACKFILES[0]}")
+_check_add_dev $TID $?
_mkfs_mount_test /dev/ublkb"${dev_id}"
ERR_CODE=$?
_cleanup_test "loop"
-_remove_backfile "$backfile_0"
-
_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_loop_05.sh b/tools/testing/selftests/ublk/test_loop_05.sh
index 2e6e2e6978fc..de2141533074 100755
--- a/tools/testing/selftests/ublk/test_loop_05.sh
+++ b/tools/testing/selftests/ublk/test_loop_05.sh
@@ -12,10 +12,10 @@ fi
_prep_test "loop" "write and verify test"
-backfile_0=$(_create_backfile 256M)
+_create_backfile 0 256M
-dev_id=$(_add_ublk_dev -q 2 -t loop "$backfile_0")
-_check_add_dev $TID $? "${backfile_0}"
+dev_id=$(_add_ublk_dev -q 2 -t loop "${UBLK_BACKFILES[0]}")
+_check_add_dev $TID $?
# run fio over the ublk disk
_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=256M
@@ -23,6 +23,4 @@ ERR_CODE=$?
_cleanup_test "loop"
-_remove_backfile "$backfile_0"
-
_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_stress_01.sh b/tools/testing/selftests/ublk/test_stress_01.sh
index a8be24532b24..7d3150f057d4 100755
--- a/tools/testing/selftests/ublk/test_stress_01.sh
+++ b/tools/testing/selftests/ublk/test_stress_01.sh
@@ -4,44 +4,31 @@
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
TID="stress_01"
ERR_CODE=0
-DEV_ID=-1
ublk_io_and_remove()
{
- local size=$1
- shift 1
- local backfile=""
- if echo "$@" | grep -q "loop"; then
- backfile=${*: -1}
- fi
- DEV_ID=$(_add_ublk_dev "$@")
- _check_add_dev $TID $? "${backfile}"
-
- [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs. remove device(ublk add $*)"
- if ! __run_io_and_remove "${DEV_ID}" "${size}" "no"; then
- echo "/dev/ublkc${DEV_ID} isn't removed"
- _remove_backfile "${backfile}"
- exit 255
+ run_io_and_remove "$@"
+ ERR_CODE=$?
+ if [ ${ERR_CODE} -ne 0 ]; then
+ echo "$TID failure: $*"
+ _show_result $TID $ERR_CODE
fi
}
+if ! _have_program fio; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
_prep_test "stress" "run IO and remove device"
-ublk_io_and_remove 8G -t null -q 4
-ERR_CODE=$?
-if [ ${ERR_CODE} -ne 0 ]; then
- _show_result $TID $ERR_CODE
-fi
+_create_backfile 0 256M
+_create_backfile 1 128M
+_create_backfile 2 128M
-BACK_FILE=$(_create_backfile 256M)
-ublk_io_and_remove 256M -t loop -q 4 "${BACK_FILE}"
-ERR_CODE=$?
-if [ ${ERR_CODE} -ne 0 ]; then
- _show_result $TID $ERR_CODE
-fi
+ublk_io_and_remove 8G -t null -q 4 &
+ublk_io_and_remove 256M -t loop -q 4 "${UBLK_BACKFILES[0]}" &
+ublk_io_and_remove 256M -t stripe -q 4 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+wait
-ublk_io_and_remove 256M -t loop -q 4 -z "${BACK_FILE}"
-ERR_CODE=$?
_cleanup_test "stress"
-_remove_backfile "${BACK_FILE}"
_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_stress_02.sh b/tools/testing/selftests/ublk/test_stress_02.sh
index 2159e4cc8140..1a9065125ae1 100755
--- a/tools/testing/selftests/ublk/test_stress_02.sh
+++ b/tools/testing/selftests/ublk/test_stress_02.sh
@@ -4,44 +4,31 @@
. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
TID="stress_02"
ERR_CODE=0
-DEV_ID=-1
+
+if ! _have_program fio; then
+ exit "$UBLK_SKIP_CODE"
+fi
ublk_io_and_kill_daemon()
{
- local size=$1
- shift 1
- local backfile=""
- if echo "$@" | grep -q "loop"; then
- backfile=${*: -1}
- fi
- DEV_ID=$(_add_ublk_dev "$@")
- _check_add_dev $TID $? "${backfile}"
-
- [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs kill ublk server(ublk add $*)"
- if ! __run_io_and_remove "${DEV_ID}" "${size}" "yes"; then
- echo "/dev/ublkc${DEV_ID} isn't removed res ${res}"
- _remove_backfile "${backfile}"
- exit 255
+ run_io_and_kill_daemon "$@"
+ ERR_CODE=$?
+ if [ ${ERR_CODE} -ne 0 ]; then
+ echo "$TID failure: $*"
+ _show_result $TID $ERR_CODE
fi
}
_prep_test "stress" "run IO and kill ublk server"
-ublk_io_and_kill_daemon 8G -t null -q 4
-ERR_CODE=$?
-if [ ${ERR_CODE} -ne 0 ]; then
- _show_result $TID $ERR_CODE
-fi
+_create_backfile 0 256M
+_create_backfile 1 128M
+_create_backfile 2 128M
-BACK_FILE=$(_create_backfile 256M)
-ublk_io_and_kill_daemon 256M -t loop -q 4 "${BACK_FILE}"
-ERR_CODE=$?
-if [ ${ERR_CODE} -ne 0 ]; then
- _show_result $TID $ERR_CODE
-fi
+ublk_io_and_kill_daemon 8G -t null -q 4 &
+ublk_io_and_kill_daemon 256M -t loop -q 4 "${UBLK_BACKFILES[0]}" &
+ublk_io_and_kill_daemon 256M -t stripe -q 4 "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+wait
-ublk_io_and_kill_daemon 256M -t loop -q 4 -z "${BACK_FILE}"
-ERR_CODE=$?
_cleanup_test "stress"
-_remove_backfile "${BACK_FILE}"
_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_stress_03.sh b/tools/testing/selftests/ublk/test_stress_03.sh
new file mode 100755
index 000000000000..e0854f71d35b
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_stress_03.sh
@@ -0,0 +1,38 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+TID="stress_03"
+ERR_CODE=0
+
+ublk_io_and_remove()
+{
+ run_io_and_remove "$@"
+ ERR_CODE=$?
+ if [ ${ERR_CODE} -ne 0 ]; then
+ echo "$TID failure: $*"
+ _show_result $TID $ERR_CODE
+ fi
+}
+
+if ! _have_program fio; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+if ! _have_feature "ZERO_COPY"; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+_prep_test "stress" "run IO and remove device(zero copy)"
+
+_create_backfile 0 256M
+_create_backfile 1 128M
+_create_backfile 2 128M
+
+ublk_io_and_remove 8G -t null -q 4 -z &
+ublk_io_and_remove 256M -t loop -q 4 -z "${UBLK_BACKFILES[0]}" &
+ublk_io_and_remove 256M -t stripe -q 4 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+wait
+
+_cleanup_test "stress"
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_stress_04.sh b/tools/testing/selftests/ublk/test_stress_04.sh
new file mode 100755
index 000000000000..1798a98387e8
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_stress_04.sh
@@ -0,0 +1,37 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+TID="stress_04"
+ERR_CODE=0
+
+ublk_io_and_kill_daemon()
+{
+ run_io_and_kill_daemon "$@"
+ ERR_CODE=$?
+ if [ ${ERR_CODE} -ne 0 ]; then
+ echo "$TID failure: $*"
+ _show_result $TID $ERR_CODE
+ fi
+}
+
+if ! _have_program fio; then
+ exit "$UBLK_SKIP_CODE"
+fi
+if ! _have_feature "ZERO_COPY"; then
+ exit "$UBLK_SKIP_CODE"
+fi
+
+_prep_test "stress" "run IO and kill ublk server(zero copy)"
+
+_create_backfile 0 256M
+_create_backfile 1 128M
+_create_backfile 2 128M
+
+ublk_io_and_kill_daemon 8G -t null -q 4 -z &
+ublk_io_and_kill_daemon 256M -t loop -q 4 -z "${UBLK_BACKFILES[0]}" &
+ublk_io_and_kill_daemon 256M -t stripe -q 4 -z "${UBLK_BACKFILES[1]}" "${UBLK_BACKFILES[2]}" &
+wait
+
+_cleanup_test "stress"
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_stress_05.sh b/tools/testing/selftests/ublk/test_stress_05.sh
new file mode 100755
index 000000000000..a7071b10224d
--- /dev/null
+++ b/tools/testing/selftests/ublk/test_stress_05.sh
@@ -0,0 +1,64 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+. "$(cd "$(dirname "$0")" && pwd)"/test_common.sh
+TID="stress_05"
+ERR_CODE=0
+
+run_io_and_remove()
+{
+ local size=$1
+ local dev_id
+ local dev_pid
+ shift 1
+
+ dev_id=$(_add_ublk_dev "$@")
+ _check_add_dev $TID $?
+
+ [ "$UBLK_TEST_QUIET" -eq 0 ] && echo "run ublk IO vs. remove device(ublk add $*)"
+
+ fio --name=job1 --filename=/dev/ublkb"${dev_id}" --ioengine=libaio \
+ --rw=readwrite --iodepth=128 --size="${size}" --numjobs=4 \
+ --runtime=40 --time_based > /dev/null 2>&1 &
+ sleep 4
+
+ dev_pid=$(_get_ublk_daemon_pid "$dev_id")
+ kill -9 "$dev_pid"
+
+ if ! __remove_ublk_dev_return "${dev_id}"; then
+ echo "delete dev ${dev_id} failed"
+ return 255
+ fi
+}
+
+ublk_io_and_remove()
+{
+ run_io_and_remove "$@"
+ ERR_CODE=$?
+ if [ ${ERR_CODE} -ne 0 ]; then
+ echo "$TID failure: $*"
+ _show_result $TID $ERR_CODE
+ fi
+}
+
+_prep_test "stress" "run IO and remove device with recovery enabled"
+
+_create_backfile 0 256M
+_create_backfile 1 256M
+
+for reissue in $(seq 0 1); do
+ ublk_io_and_remove 8G -t null -q 4 -g 1 -r 1 -i "$reissue" &
+ ublk_io_and_remove 256M -t loop -q 4 -g 1 -r 1 -i "$reissue" "${UBLK_BACKFILES[0]}" &
+ wait
+done
+
+if _have_feature "ZERO_COPY"; then
+ for reissue in $(seq 0 1); do
+ ublk_io_and_remove 8G -t null -q 4 -g 1 -z -r 1 -i "$reissue" &
+ ublk_io_and_remove 256M -t loop -q 4 -g 1 -z -r 1 -i "$reissue" "${UBLK_BACKFILES[1]}" &
+ wait
+ done
+fi
+
+_cleanup_test "stress"
+_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_stripe_01.sh b/tools/testing/selftests/ublk/test_stripe_01.sh
index 7e387ef656ea..4e4f0fdf3c9b 100755
--- a/tools/testing/selftests/ublk/test_stripe_01.sh
+++ b/tools/testing/selftests/ublk/test_stripe_01.sh
@@ -12,19 +12,15 @@ fi
_prep_test "stripe" "write and verify test"
-backfile_0=$(_create_backfile 256M)
-backfile_1=$(_create_backfile 256M)
+_create_backfile 0 256M
+_create_backfile 1 256M
-dev_id=$(_add_ublk_dev -t stripe "$backfile_0" "$backfile_1")
-_check_add_dev $TID $? "${backfile_0}"
+dev_id=$(_add_ublk_dev -t stripe "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}")
+_check_add_dev $TID $?
# run fio over the ublk disk
_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=512M
ERR_CODE=$?
_cleanup_test "stripe"
-
-_remove_backfile "$backfile_0"
-_remove_backfile "$backfile_1"
-
_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_stripe_02.sh b/tools/testing/selftests/ublk/test_stripe_02.sh
index e8a45fa82dde..5820ab2efba4 100755
--- a/tools/testing/selftests/ublk/test_stripe_02.sh
+++ b/tools/testing/selftests/ublk/test_stripe_02.sh
@@ -8,17 +8,14 @@ ERR_CODE=0
_prep_test "stripe" "mkfs & mount & umount"
-backfile_0=$(_create_backfile 256M)
-backfile_1=$(_create_backfile 256M)
-dev_id=$(_add_ublk_dev -t stripe "$backfile_0" "$backfile_1")
-_check_add_dev $TID $? "$backfile_0" "$backfile_1"
+_create_backfile 0 256M
+_create_backfile 1 256M
+
+dev_id=$(_add_ublk_dev -t stripe "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}")
+_check_add_dev $TID $?
_mkfs_mount_test /dev/ublkb"${dev_id}"
ERR_CODE=$?
_cleanup_test "stripe"
-
-_remove_backfile "$backfile_0"
-_remove_backfile "$backfile_1"
-
_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_stripe_03.sh b/tools/testing/selftests/ublk/test_stripe_03.sh
index c1b34af36145..20b977e27814 100755
--- a/tools/testing/selftests/ublk/test_stripe_03.sh
+++ b/tools/testing/selftests/ublk/test_stripe_03.sh
@@ -12,19 +12,15 @@ fi
_prep_test "stripe" "write and verify test"
-backfile_0=$(_create_backfile 256M)
-backfile_1=$(_create_backfile 256M)
+_create_backfile 0 256M
+_create_backfile 1 256M
-dev_id=$(_add_ublk_dev -q 2 -t stripe "$backfile_0" "$backfile_1")
-_check_add_dev $TID $? "${backfile_0}"
+dev_id=$(_add_ublk_dev -q 2 -t stripe "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}")
+_check_add_dev $TID $?
# run fio over the ublk disk
_run_fio_verify_io --filename=/dev/ublkb"${dev_id}" --size=512M
ERR_CODE=$?
_cleanup_test "stripe"
-
-_remove_backfile "$backfile_0"
-_remove_backfile "$backfile_1"
-
_show_result $TID $ERR_CODE
diff --git a/tools/testing/selftests/ublk/test_stripe_04.sh b/tools/testing/selftests/ublk/test_stripe_04.sh
index 1f2b642381d1..1b51ed2f1d84 100755
--- a/tools/testing/selftests/ublk/test_stripe_04.sh
+++ b/tools/testing/selftests/ublk/test_stripe_04.sh
@@ -8,17 +8,14 @@ ERR_CODE=0
_prep_test "stripe" "mkfs & mount & umount on zero copy"
-backfile_0=$(_create_backfile 256M)
-backfile_1=$(_create_backfile 256M)
-dev_id=$(_add_ublk_dev -t stripe -z -q 2 "$backfile_0" "$backfile_1")
-_check_add_dev $TID $? "$backfile_0" "$backfile_1"
+_create_backfile 0 256M
+_create_backfile 1 256M
+
+dev_id=$(_add_ublk_dev -t stripe -z -q 2 "${UBLK_BACKFILES[0]}" "${UBLK_BACKFILES[1]}")
+_check_add_dev $TID $?
_mkfs_mount_test /dev/ublkb"${dev_id}"
ERR_CODE=$?
_cleanup_test "stripe"
-
-_remove_backfile "$backfile_0"
-_remove_backfile "$backfile_1"
-
_show_result $TID $ERR_CODE