summaryrefslogtreecommitdiff
path: root/tools
diff options
context:
space:
mode:
Diffstat (limited to 'tools')
-rw-r--r--tools/include/linux/objtool_types.h12
-rw-r--r--tools/include/uapi/asm-generic/socket.h2
-rw-r--r--tools/include/uapi/linux/if_link.h2
-rw-r--r--tools/net/ynl/Makefile29
-rw-r--r--tools/net/ynl/generated/.gitignore1
-rw-r--r--tools/net/ynl/generated/Makefile51
-rw-r--r--tools/net/ynl/lib/.gitignore1
-rw-r--r--tools/net/ynl/lib/Makefile1
-rw-r--r--tools/net/ynl/pyproject.toml24
-rw-r--r--tools/net/ynl/pyynl/.gitignore2
-rw-r--r--tools/net/ynl/pyynl/__init__.py0
-rwxr-xr-xtools/net/ynl/pyynl/cli.py (renamed from tools/net/ynl/cli.py)45
-rwxr-xr-xtools/net/ynl/pyynl/ethtool.py (renamed from tools/net/ynl/ethtool.py)7
-rw-r--r--tools/net/ynl/pyynl/lib/__init__.py (renamed from tools/net/ynl/lib/__init__.py)0
-rw-r--r--tools/net/ynl/pyynl/lib/nlspec.py (renamed from tools/net/ynl/lib/nlspec.py)5
-rw-r--r--tools/net/ynl/pyynl/lib/ynl.py (renamed from tools/net/ynl/lib/ynl.py)74
-rwxr-xr-xtools/net/ynl/pyynl/ynl_gen_c.py (renamed from tools/net/ynl/ynl-gen-c.py)201
-rwxr-xr-xtools/net/ynl/pyynl/ynl_gen_rst.py (renamed from tools/net/ynl/ynl-gen-rst.py)0
-rwxr-xr-xtools/net/ynl/ynl-regen.sh2
-rw-r--r--tools/objtool/arch/loongarch/special.c3
-rw-r--r--tools/objtool/arch/powerpc/special.c3
-rw-r--r--tools/objtool/arch/x86/special.c4
-rw-r--r--tools/objtool/check.c426
-rw-r--r--tools/objtool/include/objtool/check.h5
-rw-r--r--tools/objtool/include/objtool/special.h3
-rw-r--r--tools/perf/Documentation/perf-arm-spe.txt26
-rw-r--r--tools/testing/selftests/arm64/abi/hwcap.c235
-rw-r--r--tools/testing/selftests/bpf/Makefile1
-rw-r--r--tools/testing/selftests/bpf/prog_tests/tc_netkit.c49
-rw-r--r--tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c87
-rw-r--r--tools/testing/selftests/bpf/progs/test_tc_link.c15
-rw-r--r--tools/testing/selftests/bpf/progs/test_xdp_meta.c4
-rwxr-xr-xtools/testing/selftests/bpf/test_xdp_meta.sh58
-rw-r--r--tools/testing/selftests/bpf/xdp_hw_metadata.c3
-rwxr-xr-xtools/testing/selftests/cgroup/test_cpuset_prs.sh33
-rw-r--r--tools/testing/selftests/coredump/Makefile7
-rw-r--r--tools/testing/selftests/coredump/README.rst50
-rwxr-xr-xtools/testing/selftests/coredump/stackdump14
-rw-r--r--tools/testing/selftests/coredump/stackdump_test.c151
-rw-r--r--tools/testing/selftests/drivers/net/Makefile3
-rw-r--r--tools/testing/selftests/drivers/net/bonding/Makefile2
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond_macvlan.sh99
-rwxr-xr-xtools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh96
-rw-r--r--tools/testing/selftests/drivers/net/bonding/config1
-rwxr-xr-xtools/testing/selftests/drivers/net/hds.py120
-rw-r--r--tools/testing/selftests/drivers/net/hw/ncdevmem.c3
-rwxr-xr-xtools/testing/selftests/drivers/net/hw/pp_alloc_fail.py6
-rw-r--r--tools/testing/selftests/drivers/net/lib/py/env.py10
-rw-r--r--tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh225
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/rif_bridge.sh1
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/rif_lag.sh1
-rwxr-xr-xtools/testing/selftests/drivers/net/mlxsw/rif_lag_vlan.sh1
-rwxr-xr-xtools/testing/selftests/drivers/net/netcons_basic.sh218
-rwxr-xr-xtools/testing/selftests/drivers/net/netcons_overflow.sh67
-rwxr-xr-xtools/testing/selftests/drivers/net/netdevsim/tc-mq-visibility.sh9
-rwxr-xr-xtools/testing/selftests/drivers/net/stats.py94
-rw-r--r--tools/testing/selftests/exec/execveat.c75
-rw-r--r--tools/testing/selftests/filesystems/nsfs/.gitignore (renamed from tools/testing/selftests/nsfs/.gitignore)1
-rw-r--r--tools/testing/selftests/filesystems/nsfs/Makefile (renamed from tools/testing/selftests/nsfs/Makefile)4
-rw-r--r--tools/testing/selftests/filesystems/nsfs/config (renamed from tools/testing/selftests/nsfs/config)0
-rw-r--r--tools/testing/selftests/filesystems/nsfs/iterate_mntns.c149
-rw-r--r--tools/testing/selftests/filesystems/nsfs/owner.c (renamed from tools/testing/selftests/nsfs/owner.c)0
-rw-r--r--tools/testing/selftests/filesystems/nsfs/pidns.c (renamed from tools/testing/selftests/nsfs/pidns.c)0
-rw-r--r--tools/testing/selftests/filesystems/statmount/Makefile2
-rw-r--r--tools/testing/selftests/filesystems/statmount/listmount_test.c66
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe_repeat.tc19
-rw-r--r--tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc4
-rw-r--r--tools/testing/selftests/kselftest/ktap_helpers.sh15
-rw-r--r--tools/testing/selftests/kvm/aarch64/set_id_regs.c1
-rw-r--r--tools/testing/selftests/kvm/s390x/ucontrol_test.c172
-rwxr-xr-xtools/testing/selftests/livepatch/test-callbacks.sh2
-rwxr-xr-xtools/testing/selftests/livepatch/test-sysfs.sh71
-rw-r--r--tools/testing/selftests/lsm/lsm_set_self_attr_test.c7
-rw-r--r--tools/testing/selftests/mm/cow.c8
-rw-r--r--tools/testing/selftests/net/Makefile2
-rw-r--r--tools/testing/selftests/net/busy_poller.c88
-rw-r--r--tools/testing/selftests/net/cmsg_sender.c11
-rwxr-xr-xtools/testing/selftests/net/cmsg_so_priority.sh151
-rwxr-xr-xtools/testing/selftests/net/cmsg_time.sh35
-rwxr-xr-xtools/testing/selftests/net/fdb_notify.sh6
-rwxr-xr-xtools/testing/selftests/net/fib_rule_tests.sh31
-rw-r--r--tools/testing/selftests/net/forwarding/Makefile1
-rwxr-xr-xtools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh25
-rw-r--r--tools/testing/selftests/net/forwarding/lib.sh11
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh1
-rwxr-xr-xtools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh1
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_bridge_1d_lag.sh1
-rwxr-xr-xtools/testing/selftests/net/forwarding/router_bridge_lag.sh1
-rwxr-xr-xtools/testing/selftests/net/forwarding/vxlan_reserved.sh352
-rw-r--r--tools/testing/selftests/net/ipsec.c3
-rw-r--r--tools/testing/selftests/net/lib.sh68
-rw-r--r--tools/testing/selftests/net/lib/py/ksft.py5
-rw-r--r--tools/testing/selftests/net/lib/py/utils.py6
-rw-r--r--tools/testing/selftests/net/lib/py/ynl.py4
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_connect.c43
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_connect.sh13
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_join.sh9
-rw-r--r--tools/testing/selftests/net/mptcp/mptcp_lib.sh21
-rwxr-xr-xtools/testing/selftests/net/mptcp/mptcp_sockopt.sh17
-rwxr-xr-xtools/testing/selftests/net/mptcp/simult_flows.sh21
-rwxr-xr-xtools/testing/selftests/net/nl_netdev.py19
-rwxr-xr-xtools/testing/selftests/net/packetdrill/ksft_runner.sh24
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-accept.pkt18
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-connect.pkt13
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt29
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-write.pkt35
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_close_close-local-close-then-remote-fin.pkt23
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_close_close-on-syn-sent.pkt21
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_close_close-remote-fin-then-close.pkt36
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_ecn_ecn-uses-ect0.pkt21
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-large.pkt38
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-retrans.pkt72
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-small.pkt36
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-subsequent.pkt66
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-10pkt-lost-1.pkt72
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost-1_4-11_16.pkt50
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost1_4.pkt43
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-ack-below-snd_una-cubic.pkt41
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-no-sack.pkt53
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-sack.pkt50
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_nagle_https_client.pkt40
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_nagle_sendmsg_msg_more.pkt66
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_nagle_sockopt_cork_nodelay.pkt43
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_sack_sack-route-refresh-ip-tos.pkt37
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-2-6-8-3-9-nofack.pkt64
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-3-4-8-9-fack.pkt66
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-5-6-8-9-fack.pkt62
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_sendfile_sendfile-simple.pkt26
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_splice_tcp_splice_loop_test.pkt20
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_fastopen-invalid-buf-ptr.pkt42
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt30
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_syscall-invalid-buf-ptr.pkt25
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-last_data_recv.pkt20
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-rwnd-limited.pkt54
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-sndbuf-limited.pkt38
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_timestamping_client-only-last-byte.pkt92
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_timestamping_partial.pkt91
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_timestamping_server.pkt145
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_ts_recent_fin_tsval.pkt23
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_ts_recent_invalid_ack.pkt25
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_ts_recent_reset_tsval.pkt25
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt37
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_user_timeout_user_timeout.pkt32
-rw-r--r--tools/testing/selftests/net/packetdrill/tcp_validate_validate-established-no-flags.pkt24
-rw-r--r--tools/testing/selftests/net/tls.c478
-rwxr-xr-xtools/testing/selftests/net/udpgso_bench.sh3
-rwxr-xr-xtools/testing/selftests/net/vlan_bridge_binding.sh256
-rw-r--r--tools/testing/selftests/net/ynl.mk3
-rw-r--r--tools/testing/selftests/pid_namespace/.gitignore1
-rw-r--r--tools/testing/selftests/pid_namespace/Makefile2
-rw-r--r--tools/testing/selftests/pid_namespace/pid_max.c358
-rw-r--r--tools/testing/selftests/pidfd/.gitignore2
-rw-r--r--tools/testing/selftests/pidfd/Makefile3
-rw-r--r--tools/testing/selftests/pidfd/pidfd.h40
-rw-r--r--tools/testing/selftests/pidfd/pidfd_bind_mount.c188
-rw-r--r--tools/testing/selftests/pidfd/pidfd_file_handle_test.c503
-rw-r--r--tools/testing/selftests/pidfd/pidfd_setns_test.c47
-rw-r--r--tools/testing/selftests/pidfd/pidfd_wait.c47
-rw-r--r--tools/testing/selftests/powerpc/benchmarks/gettimeofday.c2
-rw-r--r--tools/testing/selftests/powerpc/include/pkeys.h8
-rw-r--r--tools/testing/selftests/powerpc/ptrace/core-pkey.c31
-rw-r--r--tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c26
-rw-r--r--tools/testing/selftests/powerpc/vphn/test-vphn.c2
-rwxr-xr-xtools/testing/selftests/rcutorture/bin/kvm-remote.sh25
-rw-r--r--tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot1
-rw-r--r--tools/testing/selftests/riscv/abi/pointer_masking.c28
-rw-r--r--tools/testing/selftests/riscv/vector/v_initval_nolibc.c4
-rw-r--r--tools/testing/selftests/riscv/vector/vstate_prctl.c2
-rwxr-xr-xtools/testing/selftests/tc-testing/scripts/sfq_rejects_limit_1.py21
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/filters/flow.json4
-rw-r--r--tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json20
-rw-r--r--tools/testing/shared/linux/maple_tree.h2
-rw-r--r--tools/testing/vma/linux/atomic.h2
-rw-r--r--tools/testing/vma/vma.c4
-rw-r--r--tools/testing/vma/vma_internal.h4
-rw-r--r--tools/testing/vsock/README15
-rw-r--r--tools/testing/vsock/util.c33
-rw-r--r--tools/testing/vsock/util.h2
-rw-r--r--tools/testing/vsock/vsock_test.c265
179 files changed, 7296 insertions, 1138 deletions
diff --git a/tools/include/linux/objtool_types.h b/tools/include/linux/objtool_types.h
index 453a4f4ef39d..df5d9fa84dba 100644
--- a/tools/include/linux/objtool_types.h
+++ b/tools/include/linux/objtool_types.h
@@ -54,4 +54,16 @@ struct unwind_hint {
#define UNWIND_HINT_TYPE_SAVE 6
#define UNWIND_HINT_TYPE_RESTORE 7
+/*
+ * Annotate types
+ */
+#define ANNOTYPE_NOENDBR 1
+#define ANNOTYPE_RETPOLINE_SAFE 2
+#define ANNOTYPE_INSTR_BEGIN 3
+#define ANNOTYPE_INSTR_END 4
+#define ANNOTYPE_UNRET_BEGIN 5
+#define ANNOTYPE_IGNORE_ALTS 6
+#define ANNOTYPE_INTRA_FUNCTION_CALL 7
+#define ANNOTYPE_REACHABLE 8
+
#endif /* _LINUX_OBJTOOL_TYPES_H */
diff --git a/tools/include/uapi/asm-generic/socket.h b/tools/include/uapi/asm-generic/socket.h
index 281df9139d2b..ffff554a5230 100644
--- a/tools/include/uapi/asm-generic/socket.h
+++ b/tools/include/uapi/asm-generic/socket.h
@@ -126,6 +126,8 @@
#define SCM_TS_OPT_ID 78
+#define SO_RCVPRIORITY 79
+
#if !defined(__KERNEL__)
#if __BITS_PER_LONG == 64 || (defined(__x86_64__) && defined(__ILP32__))
diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index 8516c1ccd57a..7e46ca4cd31b 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -1315,6 +1315,8 @@ enum {
IFLA_NETKIT_MODE,
IFLA_NETKIT_SCRUB,
IFLA_NETKIT_PEER_SCRUB,
+ IFLA_NETKIT_HEADROOM,
+ IFLA_NETKIT_TAILROOM,
__IFLA_NETKIT_MAX,
};
#define IFLA_NETKIT_MAX (__IFLA_NETKIT_MAX - 1)
diff --git a/tools/net/ynl/Makefile b/tools/net/ynl/Makefile
index d1cdf2a8f826..211df5a93ad9 100644
--- a/tools/net/ynl/Makefile
+++ b/tools/net/ynl/Makefile
@@ -1,5 +1,17 @@
# SPDX-License-Identifier: GPL-2.0
+include ../../scripts/Makefile.arch
+
+INSTALL ?= install
+prefix ?= /usr
+ifeq ($(LP64), 1)
+ libdir_relative = lib64
+else
+ libdir_relative = lib
+endif
+libdir ?= $(prefix)/$(libdir_relative)
+includedir ?= $(prefix)/include
+
SUBDIRS = lib generated samples
all: $(SUBDIRS) libynl.a
@@ -21,5 +33,20 @@ clean distclean:
fi \
done
rm -f libynl.a
+ rm -rf pyynl/__pycache__
+ rm -rf pyynl/lib/__pycache__
+ rm -rf pyynl.egg-info
+ rm -rf build
+
+install: libynl.a lib/*.h
+ @echo -e "\tINSTALL libynl.a"
+ @$(INSTALL) -d $(DESTDIR)$(libdir)
+ @$(INSTALL) -m 0644 libynl.a $(DESTDIR)$(libdir)/libynl.a
+ @echo -e "\tINSTALL libynl headers"
+ @$(INSTALL) -d $(DESTDIR)$(includedir)/ynl
+ @$(INSTALL) -m 0644 lib/*.h $(DESTDIR)$(includedir)/ynl/
+ @echo -e "\tINSTALL pyynl"
+ @pip install --prefix=$(DESTDIR)$(prefix) .
+ @make -C generated install
-.PHONY: all clean distclean $(SUBDIRS)
+.PHONY: all clean distclean install $(SUBDIRS)
diff --git a/tools/net/ynl/generated/.gitignore b/tools/net/ynl/generated/.gitignore
index ade488626d26..859a6fb446e1 100644
--- a/tools/net/ynl/generated/.gitignore
+++ b/tools/net/ynl/generated/.gitignore
@@ -1,2 +1,3 @@
*-user.c
*-user.h
+*.rst
diff --git a/tools/net/ynl/generated/Makefile b/tools/net/ynl/generated/Makefile
index 7db5240de58a..21f9e299dc75 100644
--- a/tools/net/ynl/generated/Makefile
+++ b/tools/net/ynl/generated/Makefile
@@ -7,32 +7,44 @@ ifeq ("$(DEBUG)","1")
CFLAGS += -g -fsanitize=address -fsanitize=leak -static-libasan
endif
+INSTALL ?= install
+prefix ?= /usr
+datarootdir ?= $(prefix)/share
+docdir ?= $(datarootdir)/doc
+includedir ?= $(prefix)/include
+
include ../Makefile.deps
YNL_GEN_ARG_ethtool:=--user-header linux/ethtool_netlink.h \
--exclude-op stats-get
-TOOL:=../ynl-gen-c.py
+TOOL:=../pyynl/ynl_gen_c.py
+TOOL_RST:=../pyynl/ynl_gen_rst.py
+SPECS_DIR:=../../../../Documentation/netlink/specs
GENS_PATHS=$(shell grep -nrI --files-without-match \
'protocol: netlink' \
- ../../../../Documentation/netlink/specs/)
-GENS=$(patsubst ../../../../Documentation/netlink/specs/%.yaml,%,${GENS_PATHS})
+ $(SPECS_DIR))
+GENS=$(patsubst $(SPECS_DIR)/%.yaml,%,${GENS_PATHS})
SRCS=$(patsubst %,%-user.c,${GENS})
HDRS=$(patsubst %,%-user.h,${GENS})
OBJS=$(patsubst %,%-user.o,${GENS})
-all: protos.a $(HDRS) $(SRCS) $(KHDRS) $(KSRCS) $(UAPI)
+SPECS_PATHS=$(wildcard $(SPECS_DIR)/*.yaml)
+SPECS=$(patsubst $(SPECS_DIR)/%.yaml,%,${SPECS_PATHS})
+RSTS=$(patsubst %,%.rst,${SPECS})
+
+all: protos.a $(HDRS) $(SRCS) $(KHDRS) $(KSRCS) $(UAPI) $(RSTS)
protos.a: $(OBJS)
@echo -e "\tAR $@"
@ar rcs $@ $(OBJS)
-%-user.h: ../../../../Documentation/netlink/specs/%.yaml $(TOOL)
+%-user.h: $(SPECS_DIR)/%.yaml $(TOOL)
@echo -e "\tGEN $@"
@$(TOOL) --mode user --header --spec $< -o $@ $(YNL_GEN_ARG_$*)
-%-user.c: ../../../../Documentation/netlink/specs/%.yaml $(TOOL)
+%-user.c: $(SPECS_DIR)/%.yaml $(TOOL)
@echo -e "\tGEN $@"
@$(TOOL) --mode user --source --spec $< -o $@ $(YNL_GEN_ARG_$*)
@@ -40,14 +52,37 @@ protos.a: $(OBJS)
@echo -e "\tCC $@"
@$(COMPILE.c) $(CFLAGS_$*) -o $@ $<
+%.rst: $(SPECS_DIR)/%.yaml $(TOOL_RST)
+ @echo -e "\tGEN_RST $@"
+ @$(TOOL_RST) -o $@ -i $<
+
clean:
rm -f *.o
distclean: clean
- rm -f *.c *.h *.a
+ rm -f *.c *.h *.a *.rst
regen:
@../ynl-regen.sh
-.PHONY: all clean distclean regen
+install-headers: $(HDRS)
+ @echo -e "\tINSTALL generated headers"
+ @$(INSTALL) -d $(DESTDIR)$(includedir)/ynl
+ @$(INSTALL) -m 0644 *.h $(DESTDIR)$(includedir)/ynl/
+
+install-rsts: $(RSTS)
+ @echo -e "\tINSTALL generated docs"
+ @$(INSTALL) -d $(DESTDIR)$(docdir)/ynl
+ @$(INSTALL) -m 0644 $(RSTS) $(DESTDIR)$(docdir)/ynl/
+
+install-specs:
+ @echo -e "\tINSTALL specs"
+ @$(INSTALL) -d $(DESTDIR)$(datarootdir)/ynl
+ @$(INSTALL) -m 0644 ../../../../Documentation/netlink/*.yaml $(DESTDIR)$(datarootdir)/ynl/
+ @$(INSTALL) -d $(DESTDIR)$(datarootdir)/ynl/specs
+ @$(INSTALL) -m 0644 $(SPECS_DIR)/*.yaml $(DESTDIR)$(datarootdir)/ynl/specs/
+
+install: install-headers install-rsts install-specs
+
+.PHONY: all clean distclean regen install install-headers install-rsts install-specs
.DEFAULT_GOAL: all
diff --git a/tools/net/ynl/lib/.gitignore b/tools/net/ynl/lib/.gitignore
index 296c4035dbf2..a4383358ec72 100644
--- a/tools/net/ynl/lib/.gitignore
+++ b/tools/net/ynl/lib/.gitignore
@@ -1,2 +1 @@
-__pycache__/
*.d
diff --git a/tools/net/ynl/lib/Makefile b/tools/net/ynl/lib/Makefile
index 94c49cca3dca..4b2b98704ff9 100644
--- a/tools/net/ynl/lib/Makefile
+++ b/tools/net/ynl/lib/Makefile
@@ -19,7 +19,6 @@ ynl.a: $(OBJS)
clean:
rm -f *.o *.d *~
- rm -rf __pycache__
distclean: clean
rm -f *.a
diff --git a/tools/net/ynl/pyproject.toml b/tools/net/ynl/pyproject.toml
new file mode 100644
index 000000000000..a81d8779b0e0
--- /dev/null
+++ b/tools/net/ynl/pyproject.toml
@@ -0,0 +1,24 @@
+[build-system]
+requires = ["setuptools>=61.0"]
+build-backend = "setuptools.build_meta"
+
+[project]
+name = "pyynl"
+authors = [
+ {name = "Donald Hunter", email = "donald.hunter@gmail.com"},
+ {name = "Jakub Kicinski", email = "kuba@kernel.org"},
+]
+description = "yaml netlink (ynl)"
+version = "0.0.1"
+requires-python = ">=3.9"
+dependencies = [
+ "pyyaml==6.*",
+ "jsonschema==4.*"
+]
+
+[tool.setuptools.packages.find]
+include = ["pyynl", "pyynl.lib"]
+
+[project.scripts]
+ynl = "pyynl.cli:main"
+ynl-ethtool = "pyynl.ethtool:main"
diff --git a/tools/net/ynl/pyynl/.gitignore b/tools/net/ynl/pyynl/.gitignore
new file mode 100644
index 000000000000..b801cd2d016e
--- /dev/null
+++ b/tools/net/ynl/pyynl/.gitignore
@@ -0,0 +1,2 @@
+__pycache__/
+lib/__pycache__/
diff --git a/tools/net/ynl/pyynl/__init__.py b/tools/net/ynl/pyynl/__init__.py
new file mode 100644
index 000000000000..e69de29bb2d1
--- /dev/null
+++ b/tools/net/ynl/pyynl/__init__.py
diff --git a/tools/net/ynl/cli.py b/tools/net/ynl/pyynl/cli.py
index 41d9fa5c818d..794e3c7dcc65 100755
--- a/tools/net/ynl/cli.py
+++ b/tools/net/ynl/pyynl/cli.py
@@ -3,6 +3,7 @@
import argparse
import json
+import os
import pathlib
import pprint
import sys
@@ -10,6 +11,24 @@ import sys
sys.path.append(pathlib.Path(__file__).resolve().parent.as_posix())
from lib import YnlFamily, Netlink, NlError
+sys_schema_dir='/usr/share/ynl'
+relative_schema_dir='../../../../Documentation/netlink'
+
+def schema_dir():
+ script_dir = os.path.dirname(os.path.abspath(__file__))
+ schema_dir = os.path.abspath(f"{script_dir}/{relative_schema_dir}")
+ if not os.path.isdir(schema_dir):
+ schema_dir = sys_schema_dir
+ if not os.path.isdir(schema_dir):
+ raise Exception(f"Schema directory {schema_dir} does not exist")
+ return schema_dir
+
+def spec_dir():
+ spec_dir = schema_dir() + '/specs'
+ if not os.path.isdir(spec_dir):
+ raise Exception(f"Spec directory {spec_dir} does not exist")
+ return spec_dir
+
class YnlEncoder(json.JSONEncoder):
def default(self, obj):
@@ -32,7 +51,14 @@ def main():
parser = argparse.ArgumentParser(description=description,
epilog=epilog)
- parser.add_argument('--spec', dest='spec', type=str, required=True)
+ spec_group = parser.add_mutually_exclusive_group(required=True)
+ spec_group.add_argument('--family', dest='family', type=str,
+ help='name of the netlink FAMILY')
+ spec_group.add_argument('--list-families', action='store_true',
+ help='list all netlink families supported by YNL (has spec)')
+ spec_group.add_argument('--spec', dest='spec', type=str,
+ help='choose the family by SPEC file path')
+
parser.add_argument('--schema', dest='schema', type=str)
parser.add_argument('--no-schema', action='store_true')
parser.add_argument('--json', dest='json_text', type=str)
@@ -70,6 +96,12 @@ def main():
else:
pprint.PrettyPrinter().pprint(msg)
+ if args.list_families:
+ for filename in sorted(os.listdir(spec_dir())):
+ if filename.endswith('.yaml'):
+ print(filename.removesuffix('.yaml'))
+ return
+
if args.no_schema:
args.schema = ''
@@ -77,7 +109,16 @@ def main():
if args.json_text:
attrs = json.loads(args.json_text)
- ynl = YnlFamily(args.spec, args.schema, args.process_unknown,
+ if args.family:
+ spec = f"{spec_dir()}/{args.family}.yaml"
+ if args.schema is None and spec.startswith(sys_schema_dir):
+ args.schema = '' # disable schema validation when installed
+ else:
+ spec = args.spec
+ if not os.path.isfile(spec):
+ raise Exception(f"Spec file {spec} does not exist")
+
+ ynl = YnlFamily(spec, args.schema, args.process_unknown,
recv_size=args.dbg_small_recv)
if args.dbg_small_recv:
ynl.set_recv_dbg(True)
diff --git a/tools/net/ynl/ethtool.py b/tools/net/ynl/pyynl/ethtool.py
index ebb0a11f67bf..af7fddd7b085 100755
--- a/tools/net/ynl/ethtool.py
+++ b/tools/net/ynl/pyynl/ethtool.py
@@ -11,6 +11,7 @@ import os
sys.path.append(pathlib.Path(__file__).resolve().parent.as_posix())
from lib import YnlFamily
+from cli import schema_dir, spec_dir
def args_to_req(ynl, op_name, args, req):
"""
@@ -156,10 +157,8 @@ def main():
args = parser.parse_args()
script_abs_dir = os.path.dirname(os.path.abspath(sys.argv[0]))
- spec = os.path.join(script_abs_dir,
- '../../../Documentation/netlink/specs/ethtool.yaml')
- schema = os.path.join(script_abs_dir,
- '../../../Documentation/netlink/genetlink-legacy.yaml')
+ spec = os.path.join(spec_dir(), 'ethtool.yaml')
+ schema = os.path.join(schema_dir(), 'genetlink-legacy.yaml')
ynl = YnlFamily(spec, schema)
diff --git a/tools/net/ynl/lib/__init__.py b/tools/net/ynl/pyynl/lib/__init__.py
index 9137b83e580a..9137b83e580a 100644
--- a/tools/net/ynl/lib/__init__.py
+++ b/tools/net/ynl/pyynl/lib/__init__.py
diff --git a/tools/net/ynl/lib/nlspec.py b/tools/net/ynl/pyynl/lib/nlspec.py
index a745739655ad..314ec8007496 100644
--- a/tools/net/ynl/lib/nlspec.py
+++ b/tools/net/ynl/pyynl/lib/nlspec.py
@@ -219,7 +219,10 @@ class SpecAttrSet(SpecElement):
else:
real_set = family.attr_sets[self.subset_of]
for elem in self.yaml['attributes']:
- attr = real_set[elem['name']]
+ real_attr = real_set[elem['name']]
+ combined_elem = real_attr.yaml | elem
+ attr = self.new_attr(combined_elem, real_attr.value)
+
self.attrs[attr.name] = attr
self.attrs_by_val[attr.value] = attr
diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/pyynl/lib/ynl.py
index eea29359a899..08f8bf89cfc2 100644
--- a/tools/net/ynl/lib/ynl.py
+++ b/tools/net/ynl/pyynl/lib/ynl.py
@@ -733,41 +733,45 @@ class YnlFamily(SpecFamily):
self._rsp_add(rsp, attr_name, None, self._decode_unknown(attr))
continue
- if attr_spec["type"] == 'nest':
- subdict = self._decode(NlAttrs(attr.raw), attr_spec['nested-attributes'], search_attrs)
- decoded = subdict
- elif attr_spec["type"] == 'string':
- decoded = attr.as_strz()
- elif attr_spec["type"] == 'binary':
- decoded = self._decode_binary(attr, attr_spec)
- elif attr_spec["type"] == 'flag':
- decoded = True
- elif attr_spec.is_auto_scalar:
- decoded = attr.as_auto_scalar(attr_spec['type'], attr_spec.byte_order)
- elif attr_spec["type"] in NlAttr.type_formats:
- decoded = attr.as_scalar(attr_spec['type'], attr_spec.byte_order)
- if 'enum' in attr_spec:
- decoded = self._decode_enum(decoded, attr_spec)
- elif attr_spec.display_hint:
- decoded = self._formatted_string(decoded, attr_spec.display_hint)
- elif attr_spec["type"] == 'indexed-array':
- decoded = self._decode_array_attr(attr, attr_spec)
- elif attr_spec["type"] == 'bitfield32':
- value, selector = struct.unpack("II", attr.raw)
- if 'enum' in attr_spec:
- value = self._decode_enum(value, attr_spec)
- selector = self._decode_enum(selector, attr_spec)
- decoded = {"value": value, "selector": selector}
- elif attr_spec["type"] == 'sub-message':
- decoded = self._decode_sub_msg(attr, attr_spec, search_attrs)
- elif attr_spec["type"] == 'nest-type-value':
- decoded = self._decode_nest_type_value(attr, attr_spec)
- else:
- if not self.process_unknown:
- raise Exception(f'Unknown {attr_spec["type"]} with name {attr_spec["name"]}')
- decoded = self._decode_unknown(attr)
-
- self._rsp_add(rsp, attr_spec["name"], attr_spec.is_multi, decoded)
+ try:
+ if attr_spec["type"] == 'nest':
+ subdict = self._decode(NlAttrs(attr.raw), attr_spec['nested-attributes'], search_attrs)
+ decoded = subdict
+ elif attr_spec["type"] == 'string':
+ decoded = attr.as_strz()
+ elif attr_spec["type"] == 'binary':
+ decoded = self._decode_binary(attr, attr_spec)
+ elif attr_spec["type"] == 'flag':
+ decoded = True
+ elif attr_spec.is_auto_scalar:
+ decoded = attr.as_auto_scalar(attr_spec['type'], attr_spec.byte_order)
+ elif attr_spec["type"] in NlAttr.type_formats:
+ decoded = attr.as_scalar(attr_spec['type'], attr_spec.byte_order)
+ if 'enum' in attr_spec:
+ decoded = self._decode_enum(decoded, attr_spec)
+ elif attr_spec.display_hint:
+ decoded = self._formatted_string(decoded, attr_spec.display_hint)
+ elif attr_spec["type"] == 'indexed-array':
+ decoded = self._decode_array_attr(attr, attr_spec)
+ elif attr_spec["type"] == 'bitfield32':
+ value, selector = struct.unpack("II", attr.raw)
+ if 'enum' in attr_spec:
+ value = self._decode_enum(value, attr_spec)
+ selector = self._decode_enum(selector, attr_spec)
+ decoded = {"value": value, "selector": selector}
+ elif attr_spec["type"] == 'sub-message':
+ decoded = self._decode_sub_msg(attr, attr_spec, search_attrs)
+ elif attr_spec["type"] == 'nest-type-value':
+ decoded = self._decode_nest_type_value(attr, attr_spec)
+ else:
+ if not self.process_unknown:
+ raise Exception(f'Unknown {attr_spec["type"]} with name {attr_spec["name"]}')
+ decoded = self._decode_unknown(attr)
+
+ self._rsp_add(rsp, attr_spec["name"], attr_spec.is_multi, decoded)
+ except:
+ print(f"Error decoding '{attr_spec.name}' from '{space}'")
+ raise
return rsp
diff --git a/tools/net/ynl/ynl-gen-c.py b/tools/net/ynl/pyynl/ynl_gen_c.py
index d8201c4b1520..c2eabc90dce8 100755
--- a/tools/net/ynl/ynl-gen-c.py
+++ b/tools/net/ynl/pyynl/ynl_gen_c.py
@@ -79,6 +79,20 @@ class Type(SpecAttr):
self.enum_name = None
delattr(self, "enum_name")
+ def _get_real_attr(self):
+ # if the attr is for a subset return the "real" attr (just one down, does not recurse)
+ return self.family.attr_sets[self.attr_set.subset_of][self.name]
+
+ def set_request(self):
+ self.request = True
+ if self.attr_set.subset_of:
+ self._get_real_attr().set_request()
+
+ def set_reply(self):
+ self.reply = True
+ if self.attr_set.subset_of:
+ self._get_real_attr().set_reply()
+
def get_limit(self, limit, default=None):
value = self.checks.get(limit, default)
if value is None:
@@ -106,6 +120,10 @@ class Type(SpecAttr):
enum_name = f"{self.attr_set.name_prefix}{self.name}"
self.enum_name = c_upper(enum_name)
+ if self.attr_set.subset_of:
+ if self.checks != self._get_real_attr().checks:
+ raise Exception("Overriding checks not supported by codegen, yet")
+
def is_multi_val(self):
return None
@@ -801,6 +819,8 @@ class EnumSet(SpecEnumSet):
self.user_type = 'int'
self.value_pfx = yaml.get('name-prefix', f"{family.ident_name}-{yaml['name']}-")
+ self.header = yaml.get('header', None)
+ self.enum_cnt_name = yaml.get('enum-cnt-name', None)
super().__init__(family, yaml)
@@ -1117,17 +1137,17 @@ class Family(SpecFamily):
for _, struct in self.pure_nested_structs.items():
if struct.request:
for _, arg in struct.member_list():
- arg.request = True
+ arg.set_request()
if struct.reply:
for _, arg in struct.member_list():
- arg.reply = True
+ arg.set_reply()
for root_set, rs_members in self.root_sets.items():
for attr, spec in self.attr_sets[root_set].items():
if attr in rs_members['request']:
- spec.request = True
+ spec.set_request()
if attr in rs_members['reply']:
- spec.reply = True
+ spec.set_reply()
def _load_global_policy(self):
global_set = set()
@@ -1763,7 +1783,14 @@ def parse_rsp_nested(ri, struct):
f'{struct.ptr_name}dst = yarg->data;']
init_lines = []
- _multi_parse(ri, struct, init_lines, local_vars)
+ if struct.member_list():
+ _multi_parse(ri, struct, init_lines, local_vars)
+ else:
+ # Empty nest
+ ri.cw.block_start()
+ ri.cw.p('return 0;')
+ ri.cw.block_end()
+ ri.cw.nl()
def parse_rsp_msg(ri, deref=False):
@@ -2384,6 +2411,17 @@ def print_kernel_family_struct_src(family, cw):
if not kernel_can_gen_family_struct(family):
return
+ if 'sock-priv' in family.kernel_family:
+ # Generate "trampolines" to make CFI happy
+ cw.write_func("static void", f"__{family.c_name}_nl_sock_priv_init",
+ [f"{family.c_name}_nl_sock_priv_init(priv);"],
+ ["void *priv"])
+ cw.nl()
+ cw.write_func("static void", f"__{family.c_name}_nl_sock_priv_destroy",
+ [f"{family.c_name}_nl_sock_priv_destroy(priv);"],
+ ["void *priv"])
+ cw.nl()
+
cw.block_start(f"struct genl_family {family.ident_name}_nl_family __ro_after_init =")
cw.p('.name\t\t= ' + family.fam_key + ',')
cw.p('.version\t= ' + family.ver_key + ',')
@@ -2401,9 +2439,8 @@ def print_kernel_family_struct_src(family, cw):
cw.p(f'.n_mcgrps\t= ARRAY_SIZE({family.c_name}_nl_mcgrps),')
if 'sock-priv' in family.kernel_family:
cw.p(f'.sock_priv_size\t= sizeof({family.kernel_family["sock-priv"]}),')
- # Force cast here, actual helpers take pointer to the real type.
- cw.p(f'.sock_priv_init\t= (void *){family.c_name}_nl_sock_priv_init,')
- cw.p(f'.sock_priv_destroy = (void *){family.c_name}_nl_sock_priv_destroy,')
+ cw.p(f'.sock_priv_init\t= __{family.c_name}_nl_sock_priv_init,')
+ cw.p(f'.sock_priv_destroy = __{family.c_name}_nl_sock_priv_destroy,')
cw.block_end(';')
@@ -2417,6 +2454,87 @@ def uapi_enum_start(family, cw, obj, ckey='', enum_name='enum-name'):
cw.block_start(line=start_line)
+def render_uapi_unified(family, cw, max_by_define, separate_ntf):
+ max_name = c_upper(family.get('cmd-max-name', f"{family.op_prefix}MAX"))
+ cnt_name = c_upper(family.get('cmd-cnt-name', f"__{family.op_prefix}MAX"))
+ max_value = f"({cnt_name} - 1)"
+
+ uapi_enum_start(family, cw, family['operations'], 'enum-name')
+ val = 0
+ for op in family.msgs.values():
+ if separate_ntf and ('notify' in op or 'event' in op):
+ continue
+
+ suffix = ','
+ if op.value != val:
+ suffix = f" = {op.value},"
+ val = op.value
+ cw.p(op.enum_name + suffix)
+ val += 1
+ cw.nl()
+ cw.p(cnt_name + ('' if max_by_define else ','))
+ if not max_by_define:
+ cw.p(f"{max_name} = {max_value}")
+ cw.block_end(line=';')
+ if max_by_define:
+ cw.p(f"#define {max_name} {max_value}")
+ cw.nl()
+
+
+def render_uapi_directional(family, cw, max_by_define):
+ max_name = f"{family.op_prefix}USER_MAX"
+ cnt_name = f"__{family.op_prefix}USER_CNT"
+ max_value = f"({cnt_name} - 1)"
+
+ cw.block_start(line='enum')
+ cw.p(c_upper(f'{family.name}_MSG_USER_NONE = 0,'))
+ val = 0
+ for op in family.msgs.values():
+ if 'do' in op and 'event' not in op:
+ suffix = ','
+ if op.value and op.value != val:
+ suffix = f" = {op.value},"
+ val = op.value
+ cw.p(op.enum_name + suffix)
+ val += 1
+ cw.nl()
+ cw.p(cnt_name + ('' if max_by_define else ','))
+ if not max_by_define:
+ cw.p(f"{max_name} = {max_value}")
+ cw.block_end(line=';')
+ if max_by_define:
+ cw.p(f"#define {max_name} {max_value}")
+ cw.nl()
+
+ max_name = f"{family.op_prefix}KERNEL_MAX"
+ cnt_name = f"__{family.op_prefix}KERNEL_CNT"
+ max_value = f"({cnt_name} - 1)"
+
+ cw.block_start(line='enum')
+ cw.p(c_upper(f'{family.name}_MSG_KERNEL_NONE = 0,'))
+ val = 0
+ for op in family.msgs.values():
+ if ('do' in op and 'reply' in op['do']) or 'notify' in op or 'event' in op:
+ enum_name = op.enum_name
+ if 'event' not in op and 'notify' not in op:
+ enum_name = f'{enum_name}_REPLY'
+
+ suffix = ','
+ if op.value and op.value != val:
+ suffix = f" = {op.value},"
+ val = op.value
+ cw.p(enum_name + suffix)
+ val += 1
+ cw.nl()
+ cw.p(cnt_name + ('' if max_by_define else ','))
+ if not max_by_define:
+ cw.p(f"{max_name} = {max_value}")
+ cw.block_end(line=';')
+ if max_by_define:
+ cw.p(f"#define {max_name} {max_value}")
+ cw.nl()
+
+
def render_uapi(family, cw):
hdr_prot = f"_UAPI_LINUX_{c_upper(family.uapi_header_name)}_H"
hdr_prot = hdr_prot.replace('/', '_')
@@ -2440,6 +2558,9 @@ def render_uapi(family, cw):
if const['type'] == 'enum' or const['type'] == 'flags':
enum = family.consts[const['name']]
+ if enum.header:
+ continue
+
if enum.has_doc():
if enum.has_entry_doc():
cw.p('/**')
@@ -2472,9 +2593,12 @@ def render_uapi(family, cw):
max_val = f' = {enum.get_mask()},'
cw.p(max_name + max_val)
else:
+ cnt_name = enum.enum_cnt_name
max_name = c_upper(name_pfx + 'max')
- cw.p('__' + max_name + ',')
- cw.p(max_name + ' = (__' + max_name + ' - 1)')
+ if not cnt_name:
+ cnt_name = '__' + name_pfx + 'max'
+ cw.p(c_upper(cnt_name) + ',')
+ cw.p(max_name + ' = (' + c_upper(cnt_name) + ' - 1)')
cw.block_end(line=';')
cw.nl()
elif const['type'] == 'const':
@@ -2503,7 +2627,8 @@ def render_uapi(family, cw):
val = attr.value
val += 1
cw.p(attr.enum_name + suffix)
- cw.nl()
+ if attr_set.items():
+ cw.nl()
cw.p(attr_set.cnt_name + ('' if max_by_define else ','))
if not max_by_define:
cw.p(f"{attr_set.max_name} = {max_value}")
@@ -2515,30 +2640,12 @@ def render_uapi(family, cw):
# Commands
separate_ntf = 'async-prefix' in family['operations']
- max_name = c_upper(family.get('cmd-max-name', f"{family.op_prefix}MAX"))
- cnt_name = c_upper(family.get('cmd-cnt-name', f"__{family.op_prefix}MAX"))
- max_value = f"({cnt_name} - 1)"
-
- uapi_enum_start(family, cw, family['operations'], 'enum-name')
- val = 0
- for op in family.msgs.values():
- if separate_ntf and ('notify' in op or 'event' in op):
- continue
-
- suffix = ','
- if op.value != val:
- suffix = f" = {op.value},"
- val = op.value
- cw.p(op.enum_name + suffix)
- val += 1
- cw.nl()
- cw.p(cnt_name + ('' if max_by_define else ','))
- if not max_by_define:
- cw.p(f"{max_name} = {max_value}")
- cw.block_end(line=';')
- if max_by_define:
- cw.p(f"#define {max_name} {max_value}")
- cw.nl()
+ if family.msg_id_model == 'unified':
+ render_uapi_unified(family, cw, max_by_define, separate_ntf)
+ elif family.msg_id_model == 'directional':
+ render_uapi_directional(family, cw, max_by_define)
+ else:
+ raise Exception(f'Unsupported message enum-model {family.msg_id_model}')
if separate_ntf:
uapi_enum_start(family, cw, family['operations'], enum_name='async-enum')
@@ -2635,7 +2742,8 @@ def find_kernel_root(full_path):
def main():
parser = argparse.ArgumentParser(description='Netlink simple parsing generator')
- parser.add_argument('--mode', dest='mode', type=str, required=True)
+ parser.add_argument('--mode', dest='mode', type=str, required=True,
+ choices=('user', 'kernel', 'uapi'))
parser.add_argument('--spec', dest='spec', type=str, required=True)
parser.add_argument('--header', dest='header', action='store_true', default=None)
parser.add_argument('--source', dest='header', action='store_false')
@@ -2662,13 +2770,6 @@ def main():
os.sys.exit(1)
return
- supported_models = ['unified']
- if args.mode in ['user', 'kernel']:
- supported_models += ['directional']
- if parsed.msg_id_model not in supported_models:
- print(f'Message enum-model {parsed.msg_id_model} not supported for {args.mode} generation')
- os.sys.exit(1)
-
cw = CodeWriter(BaseNlLib(), args.out_file, overwrite=(not args.cmp_out))
_, spec_kernel = find_kernel_root(args.spec)
@@ -2696,7 +2797,10 @@ def main():
cw.p('#define ' + hdr_prot)
cw.nl()
- hdr_file=os.path.basename(args.out_file[:-2]) + ".h"
+ if args.out_file:
+ hdr_file = os.path.basename(args.out_file[:-2]) + ".h"
+ else:
+ hdr_file = "generated_header_file.h"
if args.mode == 'kernel':
cw.p('#include <net/netlink.h>')
@@ -2718,12 +2822,17 @@ def main():
else:
cw.p(f'#include "{hdr_file}"')
cw.p('#include "ynl.h"')
- headers = [parsed.uapi_header]
+ headers = []
for definition in parsed['definitions']:
if 'header' in definition:
headers.append(definition['header'])
+ if args.mode == 'user':
+ headers.append(parsed.uapi_header)
+ seen_header = []
for one in headers:
- cw.p(f"#include <{one}>")
+ if one not in seen_header:
+ cw.p(f"#include <{one}>")
+ seen_header.append(one)
cw.nl()
if args.mode == "user":
diff --git a/tools/net/ynl/ynl-gen-rst.py b/tools/net/ynl/pyynl/ynl_gen_rst.py
index 6c56d0d726b4..6c56d0d726b4 100755
--- a/tools/net/ynl/ynl-gen-rst.py
+++ b/tools/net/ynl/pyynl/ynl_gen_rst.py
diff --git a/tools/net/ynl/ynl-regen.sh b/tools/net/ynl/ynl-regen.sh
index a37304dcc88e..81b4ecd89100 100755
--- a/tools/net/ynl/ynl-regen.sh
+++ b/tools/net/ynl/ynl-regen.sh
@@ -1,7 +1,7 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
-TOOL=$(dirname $(realpath $0))/ynl-gen-c.py
+TOOL=$(dirname $(realpath $0))/pyynl/ynl_gen_c.py
force=
search=
diff --git a/tools/objtool/arch/loongarch/special.c b/tools/objtool/arch/loongarch/special.c
index 9bba1e9318e0..87230ed570fd 100644
--- a/tools/objtool/arch/loongarch/special.c
+++ b/tools/objtool/arch/loongarch/special.c
@@ -9,7 +9,8 @@ bool arch_support_alt_relocation(struct special_alt *special_alt,
}
struct reloc *arch_find_switch_table(struct objtool_file *file,
- struct instruction *insn)
+ struct instruction *insn,
+ unsigned long *table_size)
{
return NULL;
}
diff --git a/tools/objtool/arch/powerpc/special.c b/tools/objtool/arch/powerpc/special.c
index d33868147196..51610689abf7 100644
--- a/tools/objtool/arch/powerpc/special.c
+++ b/tools/objtool/arch/powerpc/special.c
@@ -13,7 +13,8 @@ bool arch_support_alt_relocation(struct special_alt *special_alt,
}
struct reloc *arch_find_switch_table(struct objtool_file *file,
- struct instruction *insn)
+ struct instruction *insn,
+ unsigned long *table_size)
{
exit(-1);
}
diff --git a/tools/objtool/arch/x86/special.c b/tools/objtool/arch/x86/special.c
index 4ea0f9815fda..9c1c9df09aaa 100644
--- a/tools/objtool/arch/x86/special.c
+++ b/tools/objtool/arch/x86/special.c
@@ -109,7 +109,8 @@ bool arch_support_alt_relocation(struct special_alt *special_alt,
* NOTE: MITIGATION_RETPOLINE made it harder still to decode dynamic jumps.
*/
struct reloc *arch_find_switch_table(struct objtool_file *file,
- struct instruction *insn)
+ struct instruction *insn,
+ unsigned long *table_size)
{
struct reloc *text_reloc, *rodata_reloc;
struct section *table_sec;
@@ -158,5 +159,6 @@ struct reloc *arch_find_switch_table(struct objtool_file *file,
if (reloc_type(text_reloc) == R_X86_64_PC32)
file->ignore_unreachables = true;
+ *table_size = 0;
return rodata_reloc;
}
diff --git a/tools/objtool/check.c b/tools/objtool/check.c
index 76060da755b5..753dbc4f8198 100644
--- a/tools/objtool/check.c
+++ b/tools/objtool/check.c
@@ -150,6 +150,15 @@ static inline struct reloc *insn_jump_table(struct instruction *insn)
return NULL;
}
+static inline unsigned long insn_jump_table_size(struct instruction *insn)
+{
+ if (insn->type == INSN_JUMP_DYNAMIC ||
+ insn->type == INSN_CALL_DYNAMIC)
+ return insn->_jump_table_size;
+
+ return 0;
+}
+
static bool is_jump_table_jump(struct instruction *insn)
{
struct alt_group *alt_group = insn->alt_group;
@@ -614,108 +623,6 @@ static int init_pv_ops(struct objtool_file *file)
return 0;
}
-static struct instruction *find_last_insn(struct objtool_file *file,
- struct section *sec)
-{
- struct instruction *insn = NULL;
- unsigned int offset;
- unsigned int end = (sec->sh.sh_size > 10) ? sec->sh.sh_size - 10 : 0;
-
- for (offset = sec->sh.sh_size - 1; offset >= end && !insn; offset--)
- insn = find_insn(file, sec, offset);
-
- return insn;
-}
-
-/*
- * Mark "ud2" instructions and manually annotated dead ends.
- */
-static int add_dead_ends(struct objtool_file *file)
-{
- struct section *rsec;
- struct reloc *reloc;
- struct instruction *insn;
- uint64_t offset;
-
- /*
- * Check for manually annotated dead ends.
- */
- rsec = find_section_by_name(file->elf, ".rela.discard.unreachable");
- if (!rsec)
- goto reachable;
-
- for_each_reloc(rsec, reloc) {
- if (reloc->sym->type == STT_SECTION) {
- offset = reloc_addend(reloc);
- } else if (reloc->sym->local_label) {
- offset = reloc->sym->offset;
- } else {
- WARN("unexpected relocation symbol type in %s", rsec->name);
- return -1;
- }
-
- insn = find_insn(file, reloc->sym->sec, offset);
- if (insn)
- insn = prev_insn_same_sec(file, insn);
- else if (offset == reloc->sym->sec->sh.sh_size) {
- insn = find_last_insn(file, reloc->sym->sec);
- if (!insn) {
- WARN("can't find unreachable insn at %s+0x%" PRIx64,
- reloc->sym->sec->name, offset);
- return -1;
- }
- } else {
- WARN("can't find unreachable insn at %s+0x%" PRIx64,
- reloc->sym->sec->name, offset);
- return -1;
- }
-
- insn->dead_end = true;
- }
-
-reachable:
- /*
- * These manually annotated reachable checks are needed for GCC 4.4,
- * where the Linux unreachable() macro isn't supported. In that case
- * GCC doesn't know the "ud2" is fatal, so it generates code as if it's
- * not a dead end.
- */
- rsec = find_section_by_name(file->elf, ".rela.discard.reachable");
- if (!rsec)
- return 0;
-
- for_each_reloc(rsec, reloc) {
- if (reloc->sym->type == STT_SECTION) {
- offset = reloc_addend(reloc);
- } else if (reloc->sym->local_label) {
- offset = reloc->sym->offset;
- } else {
- WARN("unexpected relocation symbol type in %s", rsec->name);
- return -1;
- }
-
- insn = find_insn(file, reloc->sym->sec, offset);
- if (insn)
- insn = prev_insn_same_sec(file, insn);
- else if (offset == reloc->sym->sec->sh.sh_size) {
- insn = find_last_insn(file, reloc->sym->sec);
- if (!insn) {
- WARN("can't find reachable insn at %s+0x%" PRIx64,
- reloc->sym->sec->name, offset);
- return -1;
- }
- } else {
- WARN("can't find reachable insn at %s+0x%" PRIx64,
- reloc->sym->sec->name, offset);
- return -1;
- }
-
- insn->dead_end = false;
- }
-
- return 0;
-}
-
static int create_static_call_sections(struct objtool_file *file)
{
struct static_call_site *site;
@@ -1310,40 +1217,6 @@ static void add_uaccess_safe(struct objtool_file *file)
}
/*
- * FIXME: For now, just ignore any alternatives which add retpolines. This is
- * a temporary hack, as it doesn't allow ORC to unwind from inside a retpoline.
- * But it at least allows objtool to understand the control flow *around* the
- * retpoline.
- */
-static int add_ignore_alternatives(struct objtool_file *file)
-{
- struct section *rsec;
- struct reloc *reloc;
- struct instruction *insn;
-
- rsec = find_section_by_name(file->elf, ".rela.discard.ignore_alts");
- if (!rsec)
- return 0;
-
- for_each_reloc(rsec, reloc) {
- if (reloc->sym->type != STT_SECTION) {
- WARN("unexpected relocation symbol type in %s", rsec->name);
- return -1;
- }
-
- insn = find_insn(file, reloc->sym->sec, reloc_addend(reloc));
- if (!insn) {
- WARN("bad .discard.ignore_alts entry");
- return -1;
- }
-
- insn->ignore_alts = true;
- }
-
- return 0;
-}
-
-/*
* Symbols that replace INSN_CALL_DYNAMIC, every (tail) call to such a symbol
* will be added to the .retpoline_sites section.
*/
@@ -2073,6 +1946,7 @@ out:
static int add_jump_table(struct objtool_file *file, struct instruction *insn,
struct reloc *next_table)
{
+ unsigned long table_size = insn_jump_table_size(insn);
struct symbol *pfunc = insn_func(insn)->pfunc;
struct reloc *table = insn_jump_table(insn);
struct instruction *dest_insn;
@@ -2087,6 +1961,8 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn,
for_each_reloc_from(table->sec, reloc) {
/* Check for the end of the table: */
+ if (table_size && reloc_offset(reloc) - reloc_offset(table) >= table_size)
+ break;
if (reloc != table && reloc == next_table)
break;
@@ -2131,12 +2007,12 @@ static int add_jump_table(struct objtool_file *file, struct instruction *insn,
* find_jump_table() - Given a dynamic jump, find the switch jump table
* associated with it.
*/
-static struct reloc *find_jump_table(struct objtool_file *file,
- struct symbol *func,
- struct instruction *insn)
+static void find_jump_table(struct objtool_file *file, struct symbol *func,
+ struct instruction *insn)
{
struct reloc *table_reloc;
struct instruction *dest_insn, *orig_insn = insn;
+ unsigned long table_size;
/*
* Backward search using the @first_jump_src links, these help avoid
@@ -2157,17 +2033,17 @@ static struct reloc *find_jump_table(struct objtool_file *file,
insn->jump_dest->offset > orig_insn->offset))
break;
- table_reloc = arch_find_switch_table(file, insn);
+ table_reloc = arch_find_switch_table(file, insn, &table_size);
if (!table_reloc)
continue;
dest_insn = find_insn(file, table_reloc->sym->sec, reloc_addend(table_reloc));
if (!dest_insn || !insn_func(dest_insn) || insn_func(dest_insn)->pfunc != func)
continue;
- return table_reloc;
+ orig_insn->_jump_table = table_reloc;
+ orig_insn->_jump_table_size = table_size;
+ break;
}
-
- return NULL;
}
/*
@@ -2178,7 +2054,6 @@ static void mark_func_jump_tables(struct objtool_file *file,
struct symbol *func)
{
struct instruction *insn, *last = NULL;
- struct reloc *reloc;
func_for_each_insn(file, func, insn) {
if (!last)
@@ -2201,9 +2076,7 @@ static void mark_func_jump_tables(struct objtool_file *file,
if (insn->type != INSN_JUMP_DYNAMIC)
continue;
- reloc = find_jump_table(file, func, insn);
- if (reloc)
- insn->_jump_table = reloc;
+ find_jump_table(file, func, insn);
}
}
@@ -2373,185 +2246,147 @@ static int read_unwind_hints(struct objtool_file *file)
return 0;
}
-static int read_noendbr_hints(struct objtool_file *file)
+static int read_annotate(struct objtool_file *file,
+ int (*func)(struct objtool_file *file, int type, struct instruction *insn))
{
+ struct section *sec;
struct instruction *insn;
- struct section *rsec;
struct reloc *reloc;
+ uint64_t offset;
+ int type, ret;
- rsec = find_section_by_name(file->elf, ".rela.discard.noendbr");
- if (!rsec)
+ sec = find_section_by_name(file->elf, ".discard.annotate_insn");
+ if (!sec)
return 0;
- for_each_reloc(rsec, reloc) {
- insn = find_insn(file, reloc->sym->sec,
- reloc->sym->offset + reloc_addend(reloc));
- if (!insn) {
- WARN("bad .discard.noendbr entry");
- return -1;
- }
+ if (!sec->rsec)
+ return 0;
- insn->noendbr = 1;
+ if (sec->sh.sh_entsize != 8) {
+ static bool warned = false;
+ if (!warned) {
+ WARN("%s: dodgy linker, sh_entsize != 8", sec->name);
+ warned = true;
+ }
+ sec->sh.sh_entsize = 8;
}
- return 0;
-}
-
-static int read_retpoline_hints(struct objtool_file *file)
-{
- struct section *rsec;
- struct instruction *insn;
- struct reloc *reloc;
-
- rsec = find_section_by_name(file->elf, ".rela.discard.retpoline_safe");
- if (!rsec)
- return 0;
+ for_each_reloc(sec->rsec, reloc) {
+ type = *(u32 *)(sec->data->d_buf + (reloc_idx(reloc) * sec->sh.sh_entsize) + 4);
- for_each_reloc(rsec, reloc) {
- if (reloc->sym->type != STT_SECTION) {
- WARN("unexpected relocation symbol type in %s", rsec->name);
- return -1;
- }
+ offset = reloc->sym->offset + reloc_addend(reloc);
+ insn = find_insn(file, reloc->sym->sec, offset);
- insn = find_insn(file, reloc->sym->sec, reloc_addend(reloc));
if (!insn) {
- WARN("bad .discard.retpoline_safe entry");
+ WARN("bad .discard.annotate_insn entry: %d of type %d", reloc_idx(reloc), type);
return -1;
}
- if (insn->type != INSN_JUMP_DYNAMIC &&
- insn->type != INSN_CALL_DYNAMIC &&
- insn->type != INSN_RETURN &&
- insn->type != INSN_NOP) {
- WARN_INSN(insn, "retpoline_safe hint not an indirect jump/call/ret/nop");
- return -1;
- }
-
- insn->retpoline_safe = true;
+ ret = func(file, type, insn);
+ if (ret < 0)
+ return ret;
}
return 0;
}
-static int read_instr_hints(struct objtool_file *file)
+static int __annotate_early(struct objtool_file *file, int type, struct instruction *insn)
{
- struct section *rsec;
- struct instruction *insn;
- struct reloc *reloc;
+ switch (type) {
+ case ANNOTYPE_IGNORE_ALTS:
+ insn->ignore_alts = true;
+ break;
- rsec = find_section_by_name(file->elf, ".rela.discard.instr_end");
- if (!rsec)
- return 0;
+ /*
+ * Must be before read_unwind_hints() since that needs insn->noendbr.
+ */
+ case ANNOTYPE_NOENDBR:
+ insn->noendbr = 1;
+ break;
- for_each_reloc(rsec, reloc) {
- if (reloc->sym->type != STT_SECTION) {
- WARN("unexpected relocation symbol type in %s", rsec->name);
- return -1;
- }
+ default:
+ break;
+ }
- insn = find_insn(file, reloc->sym->sec, reloc_addend(reloc));
- if (!insn) {
- WARN("bad .discard.instr_end entry");
- return -1;
- }
+ return 0;
+}
- insn->instr--;
- }
+static int __annotate_ifc(struct objtool_file *file, int type, struct instruction *insn)
+{
+ unsigned long dest_off;
- rsec = find_section_by_name(file->elf, ".rela.discard.instr_begin");
- if (!rsec)
+ if (type != ANNOTYPE_INTRA_FUNCTION_CALL)
return 0;
- for_each_reloc(rsec, reloc) {
- if (reloc->sym->type != STT_SECTION) {
- WARN("unexpected relocation symbol type in %s", rsec->name);
- return -1;
- }
+ if (insn->type != INSN_CALL) {
+ WARN_INSN(insn, "intra_function_call not a direct call");
+ return -1;
+ }
- insn = find_insn(file, reloc->sym->sec, reloc_addend(reloc));
- if (!insn) {
- WARN("bad .discard.instr_begin entry");
- return -1;
- }
+ /*
+ * Treat intra-function CALLs as JMPs, but with a stack_op.
+ * See add_call_destinations(), which strips stack_ops from
+ * normal CALLs.
+ */
+ insn->type = INSN_JUMP_UNCONDITIONAL;
- insn->instr++;
+ dest_off = arch_jump_destination(insn);
+ insn->jump_dest = find_insn(file, insn->sec, dest_off);
+ if (!insn->jump_dest) {
+ WARN_INSN(insn, "can't find call dest at %s+0x%lx",
+ insn->sec->name, dest_off);
+ return -1;
}
return 0;
}
-static int read_validate_unret_hints(struct objtool_file *file)
+static int __annotate_late(struct objtool_file *file, int type, struct instruction *insn)
{
- struct section *rsec;
- struct instruction *insn;
- struct reloc *reloc;
-
- rsec = find_section_by_name(file->elf, ".rela.discard.validate_unret");
- if (!rsec)
- return 0;
-
- for_each_reloc(rsec, reloc) {
- if (reloc->sym->type != STT_SECTION) {
- WARN("unexpected relocation symbol type in %s", rsec->name);
- return -1;
- }
+ switch (type) {
+ case ANNOTYPE_NOENDBR:
+ /* early */
+ break;
- insn = find_insn(file, reloc->sym->sec, reloc_addend(reloc));
- if (!insn) {
- WARN("bad .discard.instr_end entry");
+ case ANNOTYPE_RETPOLINE_SAFE:
+ if (insn->type != INSN_JUMP_DYNAMIC &&
+ insn->type != INSN_CALL_DYNAMIC &&
+ insn->type != INSN_RETURN &&
+ insn->type != INSN_NOP) {
+ WARN_INSN(insn, "retpoline_safe hint not an indirect jump/call/ret/nop");
return -1;
}
- insn->unret = 1;
- }
-
- return 0;
-}
-
-static int read_intra_function_calls(struct objtool_file *file)
-{
- struct instruction *insn;
- struct section *rsec;
- struct reloc *reloc;
+ insn->retpoline_safe = true;
+ break;
- rsec = find_section_by_name(file->elf, ".rela.discard.intra_function_calls");
- if (!rsec)
- return 0;
+ case ANNOTYPE_INSTR_BEGIN:
+ insn->instr++;
+ break;
- for_each_reloc(rsec, reloc) {
- unsigned long dest_off;
+ case ANNOTYPE_INSTR_END:
+ insn->instr--;
+ break;
- if (reloc->sym->type != STT_SECTION) {
- WARN("unexpected relocation symbol type in %s",
- rsec->name);
- return -1;
- }
+ case ANNOTYPE_UNRET_BEGIN:
+ insn->unret = 1;
+ break;
- insn = find_insn(file, reloc->sym->sec, reloc_addend(reloc));
- if (!insn) {
- WARN("bad .discard.intra_function_call entry");
- return -1;
- }
+ case ANNOTYPE_IGNORE_ALTS:
+ /* early */
+ break;
- if (insn->type != INSN_CALL) {
- WARN_INSN(insn, "intra_function_call not a direct call");
- return -1;
- }
+ case ANNOTYPE_INTRA_FUNCTION_CALL:
+ /* ifc */
+ break;
- /*
- * Treat intra-function CALLs as JMPs, but with a stack_op.
- * See add_call_destinations(), which strips stack_ops from
- * normal CALLs.
- */
- insn->type = INSN_JUMP_UNCONDITIONAL;
+ case ANNOTYPE_REACHABLE:
+ insn->dead_end = false;
+ break;
- dest_off = arch_jump_destination(insn);
- insn->jump_dest = find_insn(file, insn->sec, dest_off);
- if (!insn->jump_dest) {
- WARN_INSN(insn, "can't find call dest at %s+0x%lx",
- insn->sec->name, dest_off);
- return -1;
- }
+ default:
+ WARN_INSN(insn, "Unknown annotation type: %d", type);
+ break;
}
return 0;
@@ -2666,14 +2501,7 @@ static int decode_sections(struct objtool_file *file)
add_ignores(file);
add_uaccess_safe(file);
- ret = add_ignore_alternatives(file);
- if (ret)
- return ret;
-
- /*
- * Must be before read_unwind_hints() since that needs insn->noendbr.
- */
- ret = read_noendbr_hints(file);
+ ret = read_annotate(file, __annotate_early);
if (ret)
return ret;
@@ -2695,7 +2523,7 @@ static int decode_sections(struct objtool_file *file)
* Must be before add_call_destination(); it changes INSN_CALL to
* INSN_JUMP.
*/
- ret = read_intra_function_calls(file);
+ ret = read_annotate(file, __annotate_ifc);
if (ret)
return ret;
@@ -2703,14 +2531,6 @@ static int decode_sections(struct objtool_file *file)
if (ret)
return ret;
- /*
- * Must be after add_call_destinations() such that it can override
- * dead_end_function() marks.
- */
- ret = add_dead_ends(file);
- if (ret)
- return ret;
-
ret = add_jump_table_alts(file);
if (ret)
return ret;
@@ -2719,15 +2539,11 @@ static int decode_sections(struct objtool_file *file)
if (ret)
return ret;
- ret = read_retpoline_hints(file);
- if (ret)
- return ret;
-
- ret = read_instr_hints(file);
- if (ret)
- return ret;
-
- ret = read_validate_unret_hints(file);
+ /*
+ * Must be after add_call_destinations() such that it can override
+ * dead_end_function() marks.
+ */
+ ret = read_annotate(file, __annotate_late);
if (ret)
return ret;
diff --git a/tools/objtool/include/objtool/check.h b/tools/objtool/include/objtool/check.h
index daa46f1f0965..e1cd13cd28a3 100644
--- a/tools/objtool/include/objtool/check.h
+++ b/tools/objtool/include/objtool/check.h
@@ -71,7 +71,10 @@ struct instruction {
struct instruction *first_jump_src;
union {
struct symbol *_call_dest;
- struct reloc *_jump_table;
+ struct {
+ struct reloc *_jump_table;
+ unsigned long _jump_table_size;
+ };
};
struct alternative *alts;
struct symbol *sym;
diff --git a/tools/objtool/include/objtool/special.h b/tools/objtool/include/objtool/special.h
index 86d4af9c5aa9..e7ee7ffccefd 100644
--- a/tools/objtool/include/objtool/special.h
+++ b/tools/objtool/include/objtool/special.h
@@ -38,5 +38,6 @@ bool arch_support_alt_relocation(struct special_alt *special_alt,
struct instruction *insn,
struct reloc *reloc);
struct reloc *arch_find_switch_table(struct objtool_file *file,
- struct instruction *insn);
+ struct instruction *insn,
+ unsigned long *table_size);
#endif /* _SPECIAL_H */
diff --git a/tools/perf/Documentation/perf-arm-spe.txt b/tools/perf/Documentation/perf-arm-spe.txt
index de2b0b479249..37afade4f1b2 100644
--- a/tools/perf/Documentation/perf-arm-spe.txt
+++ b/tools/perf/Documentation/perf-arm-spe.txt
@@ -150,6 +150,7 @@ arm_spe/load_filter=1,min_latency=10/'
pct_enable=1 - collect physical timestamp instead of virtual timestamp (PMSCR.PCT) - requires privilege
store_filter=1 - collect stores only (PMSFCR.ST)
ts_enable=1 - enable timestamping with value of generic timer (PMSCR.TS)
+ discard=1 - enable SPE PMU events but don't collect sample data - see 'Discard mode' (PMBLIMITR.FM = DISCARD)
+++*+++ Latency is the total latency from the point at which sampling started on that instruction, rather
than only the execution latency.
@@ -220,6 +221,31 @@ Common errors
Increase sampling interval (see above)
+PMU events
+~~~~~~~~~~
+
+SPE has events that can be counted on core PMUs. These are prefixed with
+SAMPLE_, for example SAMPLE_POP, SAMPLE_FEED, SAMPLE_COLLISION and
+SAMPLE_FEED_BR.
+
+These events will only count when an SPE event is running on the same core that
+the PMU event is opened on, otherwise they read as 0. There are various ways to
+ensure that the PMU event and SPE event are scheduled together depending on the
+way the event is opened. For example opening both events as per-process events
+on the same process, although it's not guaranteed that the PMU event is enabled
+first when context switching. For that reason it may be better to open the PMU
+event as a systemwide event and then open SPE on the process of interest.
+
+Discard mode
+~~~~~~~~~~~~
+
+SPE related (SAMPLE_* etc) core PMU events can be used without the overhead of
+collecting sample data if discard mode is supported (optional from Armv8.6).
+First run a system wide SPE session (or on the core of interest) using options
+to minimize output. Then run perf stat:
+
+ perf record -e arm_spe/discard/ -a -N -B --no-bpf-event -o - > /dev/null &
+ perf stat -e SAMPLE_FEED_LD
SEE ALSO
--------
diff --git a/tools/testing/selftests/arm64/abi/hwcap.c b/tools/testing/selftests/arm64/abi/hwcap.c
index 0029ed9c5c9a..35f521e5f41c 100644
--- a/tools/testing/selftests/arm64/abi/hwcap.c
+++ b/tools/testing/selftests/arm64/abi/hwcap.c
@@ -46,6 +46,12 @@ static void atomics_sigill(void)
asm volatile(".inst 0xb82003ff" : : : );
}
+static void cmpbr_sigill(void)
+{
+ /* Not implemented, too complicated and unreliable anyway */
+}
+
+
static void crc32_sigill(void)
{
/* CRC32W W0, W0, W1 */
@@ -82,6 +88,18 @@ static void f8fma_sigill(void)
asm volatile(".inst 0xec0fc00");
}
+static void f8mm4_sigill(void)
+{
+ /* FMMLA V0.4SH, V0.16B, V0.16B */
+ asm volatile(".inst 0x6e00ec00");
+}
+
+static void f8mm8_sigill(void)
+{
+ /* FMMLA V0.4S, V0.16B, V0.16B */
+ asm volatile(".inst 0x6e80ec00");
+}
+
static void faminmax_sigill(void)
{
/* FAMIN V0.4H, V0.4H, V0.4H */
@@ -98,6 +116,12 @@ static void fpmr_sigill(void)
asm volatile("mrs x0, S3_3_C4_C4_2" : : : "x0");
}
+static void fprcvt_sigill(void)
+{
+ /* FCVTAS S0, H0 */
+ asm volatile(".inst 0x1efa0000");
+}
+
static void gcs_sigill(void)
{
unsigned long *gcspr;
@@ -226,6 +250,42 @@ static void sme2p1_sigill(void)
asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
}
+static void sme2p2_sigill(void)
+{
+ /* SMSTART SM */
+ asm volatile("msr S0_3_C4_C3_3, xzr" : : : );
+
+ /* UXTB Z0.D, P0/Z, Z0.D */
+ asm volatile(".inst 0x4c1a000" : : : );
+
+ /* SMSTOP */
+ asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
+}
+
+static void sme_aes_sigill(void)
+{
+ /* SMSTART SM */
+ asm volatile("msr S0_3_C4_C3_3, xzr" : : : );
+
+ /* AESD z0.b, z0.b, z0.b */
+ asm volatile(".inst 0x4522e400" : : : "z0");
+
+ /* SMSTOP */
+ asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
+}
+
+static void sme_sbitperm_sigill(void)
+{
+ /* SMSTART SM */
+ asm volatile("msr S0_3_C4_C3_3, xzr" : : : );
+
+ /* BDEP Z0.B, Z0.B, Z0.B */
+ asm volatile(".inst 0x4500b400" : : : "z0");
+
+ /* SMSTOP */
+ asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
+}
+
static void smei16i32_sigill(void)
{
/* SMSTART */
@@ -339,8 +399,44 @@ static void smesf8fma_sigill(void)
/* SMSTART */
asm volatile("msr S0_3_C4_C7_3, xzr" : : : );
- /* FMLALB V0.8H, V0.16B, V0.16B */
- asm volatile(".inst 0xec0fc00");
+ /* FMLALB Z0.8H, Z0.B, Z0.B */
+ asm volatile(".inst 0x64205000");
+
+ /* SMSTOP */
+ asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
+}
+
+static void smesfexpa_sigill(void)
+{
+ /* SMSTART */
+ asm volatile("msr S0_3_C4_C7_3, xzr" : : : );
+
+ /* FEXPA Z0.D, Z0.D */
+ asm volatile(".inst 0x04e0b800");
+
+ /* SMSTOP */
+ asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
+}
+
+static void smesmop4_sigill(void)
+{
+ /* SMSTART */
+ asm volatile("msr S0_3_C4_C7_3, xzr" : : : );
+
+ /* SMOP4A ZA0.S, Z0.B, { Z0.B - Z1.B } */
+ asm volatile(".inst 0x80108000");
+
+ /* SMSTOP */
+ asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
+}
+
+static void smestmop_sigill(void)
+{
+ /* SMSTART */
+ asm volatile("msr S0_3_C4_C7_3, xzr" : : : );
+
+ /* STMOPA ZA0.S, { Z0.H - Z1.H }, Z0.H, Z20[0] */
+ asm volatile(".inst 0x80408008");
/* SMSTOP */
asm volatile("msr S0_3_C4_C6_3, xzr" : : : );
@@ -364,18 +460,42 @@ static void sve2p1_sigill(void)
asm volatile(".inst 0x65000000" : : : "z0");
}
+static void sve2p2_sigill(void)
+{
+ /* NOT Z0.D, P0/Z, Z0.D */
+ asm volatile(".inst 0x4cea000" : : : "z0");
+}
+
static void sveaes_sigill(void)
{
/* AESD z0.b, z0.b, z0.b */
asm volatile(".inst 0x4522e400" : : : "z0");
}
+static void sveaes2_sigill(void)
+{
+ /* AESD {Z0.B - Z1.B }, { Z0.B - Z1.B }, Z0.Q */
+ asm volatile(".inst 0x4522ec00" : : : "z0");
+}
+
static void sveb16b16_sigill(void)
{
/* BFADD Z0.H, Z0.H, Z0.H */
asm volatile(".inst 0x65000000" : : : );
}
+static void svebfscale_sigill(void)
+{
+ /* BFSCALE Z0.H, P0/M, Z0.H, Z0.H */
+ asm volatile(".inst 0x65098000" : : : "z0");
+}
+
+static void svef16mm_sigill(void)
+{
+ /* FMMLA Z0.S, Z0.H, Z0.H */
+ asm volatile(".inst 0x6420e400");
+}
+
static void svepmull_sigill(void)
{
/* PMULLB Z0.Q, Z0.D, Z0.D */
@@ -394,6 +514,12 @@ static void svesha3_sigill(void)
asm volatile(".inst 0x4203800" : : : "z0");
}
+static void sveeltperm_sigill(void)
+{
+ /* COMPACT Z0.B, P0, Z0.B */
+ asm volatile(".inst 0x5218000" : : : "x0");
+}
+
static void svesm4_sigill(void)
{
/* SM4E Z0.S, Z0.S, Z0.S */
@@ -470,6 +596,13 @@ static const struct hwcap_data {
.sigill_fn = aes_sigill,
},
{
+ .name = "CMPBR",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_CMPBR,
+ .cpuinfo = "cmpbr",
+ .sigill_fn = cmpbr_sigill,
+ },
+ {
.name = "CRC32",
.at_hwcap = AT_HWCAP,
.hwcap_bit = HWCAP_CRC32,
@@ -524,6 +657,20 @@ static const struct hwcap_data {
.sigill_fn = f8fma_sigill,
},
{
+ .name = "F8MM8",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_F8MM8,
+ .cpuinfo = "f8mm8",
+ .sigill_fn = f8mm8_sigill,
+ },
+ {
+ .name = "F8MM4",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_F8MM4,
+ .cpuinfo = "f8mm4",
+ .sigill_fn = f8mm4_sigill,
+ },
+ {
.name = "FAMINMAX",
.at_hwcap = AT_HWCAP2,
.hwcap_bit = HWCAP2_FAMINMAX,
@@ -546,6 +693,13 @@ static const struct hwcap_data {
.sigill_reliable = true,
},
{
+ .name = "FPRCVT",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_FPRCVT,
+ .cpuinfo = "fprcvt",
+ .sigill_fn = fprcvt_sigill,
+ },
+ {
.name = "GCS",
.at_hwcap = AT_HWCAP,
.hwcap_bit = HWCAP_GCS,
@@ -692,6 +846,20 @@ static const struct hwcap_data {
.sigill_fn = sme2p1_sigill,
},
{
+ .name = "SME 2.2",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_SME2P2,
+ .cpuinfo = "sme2p2",
+ .sigill_fn = sme2p2_sigill,
+ },
+ {
+ .name = "SME AES",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_SME_AES,
+ .cpuinfo = "smeaes",
+ .sigill_fn = sme_aes_sigill,
+ },
+ {
.name = "SME I16I32",
.at_hwcap = AT_HWCAP2,
.hwcap_bit = HWCAP2_SME_I16I32,
@@ -741,6 +909,13 @@ static const struct hwcap_data {
.sigill_fn = smelutv2_sigill,
},
{
+ .name = "SME SBITPERM",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_SME_SBITPERM,
+ .cpuinfo = "smesbitperm",
+ .sigill_fn = sme_sbitperm_sigill,
+ },
+ {
.name = "SME SF8FMA",
.at_hwcap = AT_HWCAP2,
.hwcap_bit = HWCAP2_SME_SF8FMA,
@@ -762,6 +937,27 @@ static const struct hwcap_data {
.sigill_fn = smesf8dp4_sigill,
},
{
+ .name = "SME SFEXPA",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_SME_SFEXPA,
+ .cpuinfo = "smesfexpa",
+ .sigill_fn = smesfexpa_sigill,
+ },
+ {
+ .name = "SME SMOP4",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_SME_SMOP4,
+ .cpuinfo = "smesmop4",
+ .sigill_fn = smesmop4_sigill,
+ },
+ {
+ .name = "SME STMOP",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_SME_STMOP,
+ .cpuinfo = "smestmop",
+ .sigill_fn = smestmop_sigill,
+ },
+ {
.name = "SVE",
.at_hwcap = AT_HWCAP,
.hwcap_bit = HWCAP_SVE,
@@ -784,6 +980,13 @@ static const struct hwcap_data {
.sigill_fn = sve2p1_sigill,
},
{
+ .name = "SVE 2.2",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_SVE2P2,
+ .cpuinfo = "sve2p2",
+ .sigill_fn = sve2p2_sigill,
+ },
+ {
.name = "SVE AES",
.at_hwcap = AT_HWCAP2,
.hwcap_bit = HWCAP2_SVEAES,
@@ -791,6 +994,34 @@ static const struct hwcap_data {
.sigill_fn = sveaes_sigill,
},
{
+ .name = "SVE AES2",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_SVE_AES2,
+ .cpuinfo = "sveaes2",
+ .sigill_fn = sveaes2_sigill,
+ },
+ {
+ .name = "SVE BFSCALE",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_SVE_BFSCALE,
+ .cpuinfo = "svebfscale",
+ .sigill_fn = svebfscale_sigill,
+ },
+ {
+ .name = "SVE ELTPERM",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_SVE_ELTPERM,
+ .cpuinfo = "sveeltperm",
+ .sigill_fn = sveeltperm_sigill,
+ },
+ {
+ .name = "SVE F16MM",
+ .at_hwcap = AT_HWCAP,
+ .hwcap_bit = HWCAP_SVE_F16MM,
+ .cpuinfo = "svef16mm",
+ .sigill_fn = svef16mm_sigill,
+ },
+ {
.name = "SVE2 B16B16",
.at_hwcap = AT_HWCAP2,
.hwcap_bit = HWCAP2_SVE_B16B16,
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 7eeb3cbe18c7..0a016cd71cba 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -129,7 +129,6 @@ TEST_FILES = xsk_prereqs.sh $(wildcard progs/btf_dump_test_case_*.c)
TEST_PROGS := test_kmod.sh \
test_xdp_redirect.sh \
test_xdp_redirect_multi.sh \
- test_xdp_meta.sh \
test_tunnel.sh \
test_lwt_seg6local.sh \
test_lirc_mode2.sh \
diff --git a/tools/testing/selftests/bpf/prog_tests/tc_netkit.c b/tools/testing/selftests/bpf/prog_tests/tc_netkit.c
index 151a4210028f..2461d183dee5 100644
--- a/tools/testing/selftests/bpf/prog_tests/tc_netkit.c
+++ b/tools/testing/selftests/bpf/prog_tests/tc_netkit.c
@@ -14,10 +14,16 @@
#include "netlink_helpers.h"
#include "tc_helpers.h"
+#define NETKIT_HEADROOM 32
+#define NETKIT_TAILROOM 8
+
#define MARK 42
#define PRIO 0xeb9f
#define ICMP_ECHO 8
+#define FLAG_ADJUST_ROOM (1 << 0)
+#define FLAG_SAME_NETNS (1 << 1)
+
struct icmphdr {
__u8 type;
__u8 code;
@@ -35,7 +41,7 @@ struct iplink_req {
};
static int create_netkit(int mode, int policy, int peer_policy, int *ifindex,
- bool same_netns, int scrub, int peer_scrub)
+ int scrub, int peer_scrub, __u32 flags)
{
struct rtnl_handle rth = { .fd = -1 };
struct iplink_req req = {};
@@ -63,6 +69,10 @@ static int create_netkit(int mode, int policy, int peer_policy, int *ifindex,
addattr32(&req.n, sizeof(req), IFLA_NETKIT_SCRUB, scrub);
addattr32(&req.n, sizeof(req), IFLA_NETKIT_PEER_SCRUB, peer_scrub);
addattr32(&req.n, sizeof(req), IFLA_NETKIT_MODE, mode);
+ if (flags & FLAG_ADJUST_ROOM) {
+ addattr16(&req.n, sizeof(req), IFLA_NETKIT_HEADROOM, NETKIT_HEADROOM);
+ addattr16(&req.n, sizeof(req), IFLA_NETKIT_TAILROOM, NETKIT_TAILROOM);
+ }
addattr_nest_end(&req.n, data);
addattr_nest_end(&req.n, linkinfo);
@@ -87,7 +97,7 @@ static int create_netkit(int mode, int policy, int peer_policy, int *ifindex,
" addr ee:ff:bb:cc:aa:dd"),
"set hwaddress");
}
- if (same_netns) {
+ if (flags & FLAG_SAME_NETNS) {
ASSERT_OK(system("ip link set dev " netkit_peer " up"),
"up peer");
ASSERT_OK(system("ip addr add dev " netkit_peer " 10.0.0.2/24"),
@@ -184,8 +194,8 @@ void serial_test_tc_netkit_basic(void)
int err, ifindex;
err = create_netkit(NETKIT_L2, NETKIT_PASS, NETKIT_PASS,
- &ifindex, false, NETKIT_SCRUB_DEFAULT,
- NETKIT_SCRUB_DEFAULT);
+ &ifindex, NETKIT_SCRUB_DEFAULT,
+ NETKIT_SCRUB_DEFAULT, 0);
if (err)
return;
@@ -299,8 +309,8 @@ static void serial_test_tc_netkit_multi_links_target(int mode, int target)
int err, ifindex;
err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS,
- &ifindex, false, NETKIT_SCRUB_DEFAULT,
- NETKIT_SCRUB_DEFAULT);
+ &ifindex, NETKIT_SCRUB_DEFAULT,
+ NETKIT_SCRUB_DEFAULT, 0);
if (err)
return;
@@ -428,8 +438,8 @@ static void serial_test_tc_netkit_multi_opts_target(int mode, int target)
int err, ifindex;
err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS,
- &ifindex, false, NETKIT_SCRUB_DEFAULT,
- NETKIT_SCRUB_DEFAULT);
+ &ifindex, NETKIT_SCRUB_DEFAULT,
+ NETKIT_SCRUB_DEFAULT, 0);
if (err)
return;
@@ -543,8 +553,8 @@ void serial_test_tc_netkit_device(void)
int err, ifindex, ifindex2;
err = create_netkit(NETKIT_L3, NETKIT_PASS, NETKIT_PASS,
- &ifindex, true, NETKIT_SCRUB_DEFAULT,
- NETKIT_SCRUB_DEFAULT);
+ &ifindex, NETKIT_SCRUB_DEFAULT,
+ NETKIT_SCRUB_DEFAULT, FLAG_SAME_NETNS);
if (err)
return;
@@ -655,8 +665,8 @@ static void serial_test_tc_netkit_neigh_links_target(int mode, int target)
int err, ifindex;
err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS,
- &ifindex, false, NETKIT_SCRUB_DEFAULT,
- NETKIT_SCRUB_DEFAULT);
+ &ifindex, NETKIT_SCRUB_DEFAULT,
+ NETKIT_SCRUB_DEFAULT, 0);
if (err)
return;
@@ -733,8 +743,8 @@ static void serial_test_tc_netkit_pkt_type_mode(int mode)
struct bpf_link *link;
err = create_netkit(mode, NETKIT_PASS, NETKIT_PASS,
- &ifindex, true, NETKIT_SCRUB_DEFAULT,
- NETKIT_SCRUB_DEFAULT);
+ &ifindex, NETKIT_SCRUB_DEFAULT,
+ NETKIT_SCRUB_DEFAULT, FLAG_SAME_NETNS);
if (err)
return;
@@ -799,7 +809,7 @@ void serial_test_tc_netkit_pkt_type(void)
serial_test_tc_netkit_pkt_type_mode(NETKIT_L3);
}
-static void serial_test_tc_netkit_scrub_type(int scrub)
+static void serial_test_tc_netkit_scrub_type(int scrub, bool room)
{
LIBBPF_OPTS(bpf_netkit_opts, optl);
struct test_tc_link *skel;
@@ -807,7 +817,8 @@ static void serial_test_tc_netkit_scrub_type(int scrub)
int err, ifindex;
err = create_netkit(NETKIT_L2, NETKIT_PASS, NETKIT_PASS,
- &ifindex, false, scrub, scrub);
+ &ifindex, scrub, scrub,
+ room ? FLAG_ADJUST_ROOM : 0);
if (err)
return;
@@ -842,6 +853,8 @@ static void serial_test_tc_netkit_scrub_type(int scrub)
ASSERT_EQ(skel->bss->seen_tc8, true, "seen_tc8");
ASSERT_EQ(skel->bss->mark, scrub == NETKIT_SCRUB_NONE ? MARK : 0, "mark");
ASSERT_EQ(skel->bss->prio, scrub == NETKIT_SCRUB_NONE ? PRIO : 0, "prio");
+ ASSERT_EQ(skel->bss->headroom, room ? NETKIT_HEADROOM : 0, "headroom");
+ ASSERT_EQ(skel->bss->tailroom, room ? NETKIT_TAILROOM : 0, "tailroom");
cleanup:
test_tc_link__destroy(skel);
@@ -852,6 +865,6 @@ cleanup:
void serial_test_tc_netkit_scrub(void)
{
- serial_test_tc_netkit_scrub_type(NETKIT_SCRUB_DEFAULT);
- serial_test_tc_netkit_scrub_type(NETKIT_SCRUB_NONE);
+ serial_test_tc_netkit_scrub_type(NETKIT_SCRUB_DEFAULT, false);
+ serial_test_tc_netkit_scrub_type(NETKIT_SCRUB_NONE, true);
}
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
index e6a783c7f5db..937da9b7532a 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c
@@ -2,6 +2,14 @@
#include <test_progs.h>
#include <network_helpers.h>
#include "test_xdp_context_test_run.skel.h"
+#include "test_xdp_meta.skel.h"
+
+#define TX_ADDR "10.0.0.1"
+#define RX_ADDR "10.0.0.2"
+#define RX_NAME "veth0"
+#define TX_NAME "veth1"
+#define TX_NETNS "xdp_context_tx"
+#define RX_NETNS "xdp_context_rx"
void test_xdp_context_error(int prog_fd, struct bpf_test_run_opts opts,
__u32 data_meta, __u32 data, __u32 data_end,
@@ -103,3 +111,82 @@ void test_xdp_context_test_run(void)
test_xdp_context_test_run__destroy(skel);
}
+
+void test_xdp_context_functional(void)
+{
+ LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS);
+ LIBBPF_OPTS(bpf_tc_opts, tc_opts, .handle = 1, .priority = 1);
+ struct netns_obj *rx_ns = NULL, *tx_ns = NULL;
+ struct bpf_program *tc_prog, *xdp_prog;
+ struct test_xdp_meta *skel = NULL;
+ struct nstoken *nstoken = NULL;
+ int rx_ifindex;
+ int ret;
+
+ tx_ns = netns_new(TX_NETNS, false);
+ if (!ASSERT_OK_PTR(tx_ns, "create tx_ns"))
+ return;
+
+ rx_ns = netns_new(RX_NETNS, false);
+ if (!ASSERT_OK_PTR(rx_ns, "create rx_ns"))
+ goto close;
+
+ SYS(close, "ip link add " RX_NAME " netns " RX_NETNS
+ " type veth peer name " TX_NAME " netns " TX_NETNS);
+
+ nstoken = open_netns(RX_NETNS);
+ if (!ASSERT_OK_PTR(nstoken, "setns rx_ns"))
+ goto close;
+
+ SYS(close, "ip addr add " RX_ADDR "/24 dev " RX_NAME);
+ SYS(close, "ip link set dev " RX_NAME " up");
+
+ skel = test_xdp_meta__open_and_load();
+ if (!ASSERT_OK_PTR(skel, "open and load skeleton"))
+ goto close;
+
+ rx_ifindex = if_nametoindex(RX_NAME);
+ if (!ASSERT_GE(rx_ifindex, 0, "if_nametoindex rx"))
+ goto close;
+
+ tc_hook.ifindex = rx_ifindex;
+ ret = bpf_tc_hook_create(&tc_hook);
+ if (!ASSERT_OK(ret, "bpf_tc_hook_create"))
+ goto close;
+
+ tc_prog = bpf_object__find_program_by_name(skel->obj, "ing_cls");
+ if (!ASSERT_OK_PTR(tc_prog, "open ing_cls prog"))
+ goto close;
+
+ tc_opts.prog_fd = bpf_program__fd(tc_prog);
+ ret = bpf_tc_attach(&tc_hook, &tc_opts);
+ if (!ASSERT_OK(ret, "bpf_tc_attach"))
+ goto close;
+
+ xdp_prog = bpf_object__find_program_by_name(skel->obj, "ing_xdp");
+ if (!ASSERT_OK_PTR(xdp_prog, "open ing_xdp prog"))
+ goto close;
+
+ ret = bpf_xdp_attach(rx_ifindex,
+ bpf_program__fd(xdp_prog),
+ 0, NULL);
+ if (!ASSERT_GE(ret, 0, "bpf_xdp_attach"))
+ goto close;
+
+ close_netns(nstoken);
+
+ nstoken = open_netns(TX_NETNS);
+ if (!ASSERT_OK_PTR(nstoken, "setns tx_ns"))
+ goto close;
+
+ SYS(close, "ip addr add " TX_ADDR "/24 dev " TX_NAME);
+ SYS(close, "ip link set dev " TX_NAME " up");
+ ASSERT_OK(SYS_NOFAIL("ping -c 1 " RX_ADDR), "ping");
+
+close:
+ close_netns(nstoken);
+ test_xdp_meta__destroy(skel);
+ netns_free(rx_ns);
+ netns_free(tx_ns);
+}
+
diff --git a/tools/testing/selftests/bpf/progs/test_tc_link.c b/tools/testing/selftests/bpf/progs/test_tc_link.c
index 10d825928499..630f12e51b07 100644
--- a/tools/testing/selftests/bpf/progs/test_tc_link.c
+++ b/tools/testing/selftests/bpf/progs/test_tc_link.c
@@ -8,6 +8,7 @@
#include <linux/if_packet.h>
#include <bpf/bpf_endian.h>
#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_core_read.h>
char LICENSE[] SEC("license") = "GPL";
@@ -27,6 +28,7 @@ bool seen_host;
bool seen_mcast;
int mark, prio;
+unsigned short headroom, tailroom;
SEC("tc/ingress")
int tc1(struct __sk_buff *skb)
@@ -104,11 +106,24 @@ out:
return TCX_PASS;
}
+struct sk_buff {
+ struct net_device *dev;
+};
+
+struct net_device {
+ unsigned short needed_headroom;
+ unsigned short needed_tailroom;
+};
+
SEC("tc/egress")
int tc8(struct __sk_buff *skb)
{
+ struct net_device *dev = BPF_CORE_READ((struct sk_buff *)skb, dev);
+
seen_tc8 = true;
mark = skb->mark;
prio = skb->priority;
+ headroom = BPF_CORE_READ(dev, needed_headroom);
+ tailroom = BPF_CORE_READ(dev, needed_tailroom);
return TCX_PASS;
}
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_meta.c b/tools/testing/selftests/bpf/progs/test_xdp_meta.c
index a7c4a7d49fe6..fe2d71ae0e71 100644
--- a/tools/testing/selftests/bpf/progs/test_xdp_meta.c
+++ b/tools/testing/selftests/bpf/progs/test_xdp_meta.c
@@ -8,7 +8,7 @@
#define round_up(x, y) ((((x) - 1) | __round_mask(x, y)) + 1)
#define ctx_ptr(ctx, mem) (void *)(unsigned long)ctx->mem
-SEC("t")
+SEC("tc")
int ing_cls(struct __sk_buff *ctx)
{
__u8 *data, *data_meta, *data_end;
@@ -28,7 +28,7 @@ int ing_cls(struct __sk_buff *ctx)
return diff ? TC_ACT_SHOT : TC_ACT_OK;
}
-SEC("x")
+SEC("xdp")
int ing_xdp(struct xdp_md *ctx)
{
__u8 *data, *data_meta, *data_end;
diff --git a/tools/testing/selftests/bpf/test_xdp_meta.sh b/tools/testing/selftests/bpf/test_xdp_meta.sh
deleted file mode 100755
index 2740322c1878..000000000000
--- a/tools/testing/selftests/bpf/test_xdp_meta.sh
+++ /dev/null
@@ -1,58 +0,0 @@
-#!/bin/sh
-
-BPF_FILE="test_xdp_meta.bpf.o"
-# Kselftest framework requirement - SKIP code is 4.
-readonly KSFT_SKIP=4
-readonly NS1="ns1-$(mktemp -u XXXXXX)"
-readonly NS2="ns2-$(mktemp -u XXXXXX)"
-
-cleanup()
-{
- if [ "$?" = "0" ]; then
- echo "selftests: test_xdp_meta [PASS]";
- else
- echo "selftests: test_xdp_meta [FAILED]";
- fi
-
- set +e
- ip link del veth1 2> /dev/null
- ip netns del ${NS1} 2> /dev/null
- ip netns del ${NS2} 2> /dev/null
-}
-
-ip link set dev lo xdp off 2>/dev/null > /dev/null
-if [ $? -ne 0 ];then
- echo "selftests: [SKIP] Could not run test without the ip xdp support"
- exit $KSFT_SKIP
-fi
-set -e
-
-ip netns add ${NS1}
-ip netns add ${NS2}
-
-trap cleanup 0 2 3 6 9
-
-ip link add veth1 type veth peer name veth2
-
-ip link set veth1 netns ${NS1}
-ip link set veth2 netns ${NS2}
-
-ip netns exec ${NS1} ip addr add 10.1.1.11/24 dev veth1
-ip netns exec ${NS2} ip addr add 10.1.1.22/24 dev veth2
-
-ip netns exec ${NS1} tc qdisc add dev veth1 clsact
-ip netns exec ${NS2} tc qdisc add dev veth2 clsact
-
-ip netns exec ${NS1} tc filter add dev veth1 ingress bpf da obj ${BPF_FILE} sec t
-ip netns exec ${NS2} tc filter add dev veth2 ingress bpf da obj ${BPF_FILE} sec t
-
-ip netns exec ${NS1} ip link set dev veth1 xdp obj ${BPF_FILE} sec x
-ip netns exec ${NS2} ip link set dev veth2 xdp obj ${BPF_FILE} sec x
-
-ip netns exec ${NS1} ip link set dev veth1 up
-ip netns exec ${NS2} ip link set dev veth2 up
-
-ip netns exec ${NS1} ping -c 1 10.1.1.22
-ip netns exec ${NS2} ping -c 1 10.1.1.11
-
-exit 0
diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c
index 6f9956eed797..e38675d9b118 100644
--- a/tools/testing/selftests/bpf/xdp_hw_metadata.c
+++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c
@@ -79,7 +79,7 @@ static int open_xsk(int ifindex, struct xsk *xsk, __u32 queue_id)
.fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS,
.comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS,
.frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE,
- .flags = XSK_UMEM__DEFAULT_FLAGS,
+ .flags = XDP_UMEM_TX_METADATA_LEN,
.tx_metadata_len = sizeof(struct xsk_tx_metadata),
};
__u32 idx = 0;
@@ -551,6 +551,7 @@ static void hwtstamp_enable(const char *ifname)
{
struct hwtstamp_config cfg = {
.rx_filter = HWTSTAMP_FILTER_ALL,
+ .tx_type = HWTSTAMP_TX_ON,
};
hwtstamp_ioctl(SIOCGHWTSTAMP, ifname, &saved_hwtstamp_cfg);
diff --git a/tools/testing/selftests/cgroup/test_cpuset_prs.sh b/tools/testing/selftests/cgroup/test_cpuset_prs.sh
index 03c1bdaed2c3..400a696a0d21 100755
--- a/tools/testing/selftests/cgroup/test_cpuset_prs.sh
+++ b/tools/testing/selftests/cgroup/test_cpuset_prs.sh
@@ -86,15 +86,15 @@ echo "" > test/cpuset.cpus
#
# If isolated CPUs have been reserved at boot time (as shown in
-# cpuset.cpus.isolated), these isolated CPUs should be outside of CPUs 0-7
+# cpuset.cpus.isolated), these isolated CPUs should be outside of CPUs 0-8
# that will be used by this script for testing purpose. If not, some of
-# the tests may fail incorrectly. These isolated CPUs will also be removed
-# before being compared with the expected results.
+# the tests may fail incorrectly. These pre-isolated CPUs should stay in
+# an isolated state throughout the testing process for now.
#
BOOT_ISOLCPUS=$(cat $CGROUP2/cpuset.cpus.isolated)
if [[ -n "$BOOT_ISOLCPUS" ]]
then
- [[ $(echo $BOOT_ISOLCPUS | sed -e "s/[,-].*//") -le 7 ]] &&
+ [[ $(echo $BOOT_ISOLCPUS | sed -e "s/[,-].*//") -le 8 ]] &&
skip_test "Pre-isolated CPUs ($BOOT_ISOLCPUS) overlap CPUs to be tested"
echo "Pre-isolated CPUs: $BOOT_ISOLCPUS"
fi
@@ -684,14 +684,18 @@ check_isolcpus()
fi
#
+ # Appending pre-isolated CPUs
+ # Even though CPU #8 isn't used for testing, it can't be pre-isolated
+ # to make appending those CPUs easier.
+ #
+ [[ -n "$BOOT_ISOLCPUS" ]] && {
+ EXPECT_VAL=${EXPECT_VAL:+${EXPECT_VAL},}${BOOT_ISOLCPUS}
+ EXPECT_VAL2=${EXPECT_VAL2:+${EXPECT_VAL2},}${BOOT_ISOLCPUS}
+ }
+
+ #
# Check cpuset.cpus.isolated cpumask
#
- if [[ -z "$BOOT_ISOLCPUS" ]]
- then
- ISOLCPUS=$(cat $ISCPUS)
- else
- ISOLCPUS=$(cat $ISCPUS | sed -e "s/,*$BOOT_ISOLCPUS//")
- fi
[[ "$EXPECT_VAL2" != "$ISOLCPUS" ]] && {
# Take a 50ms pause and try again
pause 0.05
@@ -731,8 +735,6 @@ check_isolcpus()
fi
done
[[ "$ISOLCPUS" = *- ]] && ISOLCPUS=${ISOLCPUS}$LASTISOLCPU
- [[ -n "BOOT_ISOLCPUS" ]] &&
- ISOLCPUS=$(echo $ISOLCPUS | sed -e "s/,*$BOOT_ISOLCPUS//")
[[ "$EXPECT_VAL" = "$ISOLCPUS" ]]
}
@@ -836,8 +838,11 @@ run_state_test()
# if available
[[ -n "$ICPUS" ]] && {
check_isolcpus $ICPUS
- [[ $? -ne 0 ]] && test_fail $I "isolated CPU" \
- "Expect $ICPUS, get $ISOLCPUS instead"
+ [[ $? -ne 0 ]] && {
+ [[ -n "$BOOT_ISOLCPUS" ]] && ICPUS=${ICPUS},${BOOT_ISOLCPUS}
+ test_fail $I "isolated CPU" \
+ "Expect $ICPUS, get $ISOLCPUS instead"
+ }
}
reset_cgroup_states
#
diff --git a/tools/testing/selftests/coredump/Makefile b/tools/testing/selftests/coredump/Makefile
new file mode 100644
index 000000000000..ed210037b29d
--- /dev/null
+++ b/tools/testing/selftests/coredump/Makefile
@@ -0,0 +1,7 @@
+# SPDX-License-Identifier: GPL-2.0-only
+CFLAGS = $(KHDR_INCLUDES)
+
+TEST_GEN_PROGS := stackdump_test
+TEST_FILES := stackdump
+
+include ../lib.mk
diff --git a/tools/testing/selftests/coredump/README.rst b/tools/testing/selftests/coredump/README.rst
new file mode 100644
index 000000000000..164a7aa181c8
--- /dev/null
+++ b/tools/testing/selftests/coredump/README.rst
@@ -0,0 +1,50 @@
+coredump selftest
+=================
+
+Background context
+------------------
+
+`coredump` is a feature which dumps a process's memory space when the process terminates
+unexpectedly (e.g. due to segmentation fault), which can be useful for debugging. By default,
+`coredump` dumps the memory to the file named `core`, but this behavior can be changed by writing a
+different file name to `/proc/sys/kernel/core_pattern`. Furthermore, `coredump` can be piped to a
+user-space program by writing the pipe symbol (`|`) followed by the command to be executed to
+`/proc/sys/kernel/core_pattern`. For the full description, see `man 5 core`.
+
+The piped user program may be interested in reading the stack pointers of the crashed process. The
+crashed process's stack pointers can be read from `procfs`: it is the `kstkesp` field in
+`/proc/$PID/stat`. See `man 5 proc` for all the details.
+
+The problem
+-----------
+While a thread is active, the stack pointer is unsafe to read and therefore the `kstkesp` field
+reads zero. But when the thread is dead (e.g. during a coredump), this field should have valid
+value.
+
+However, this was broken in the past and `kstkesp` was zero even during coredump:
+
+* commit 0a1eb2d474ed ("fs/proc: Stop reporting eip and esp in /proc/PID/stat") changed kstkesp to
+ always be zero
+
+* commit fd7d56270b52 ("fs/proc: Report eip/esp in /prod/PID/stat for coredumping") fixed it for the
+ coredumping thread. However, other threads in a coredumping process still had the problem.
+
+* commit cb8f381f1613 ("fs/proc/array.c: allow reporting eip/esp for all coredumping threads") fixed
+ for all threads in a coredumping process.
+
+* commit 92307383082d ("coredump: Don't perform any cleanups before dumping core") broke it again
+ for the other threads in a coredumping process.
+
+The problem has been fixed now, but considering the history, it may appear again in the future.
+
+The goal of this test
+---------------------
+This test detects problem with reading `kstkesp` during coredump by doing the following:
+
+#. Tell the kernel to execute the "stackdump" script when a coredump happens. This script
+ reads the stack pointers of all threads of crashed processes.
+
+#. Spawn a child process who creates some threads and then crashes.
+
+#. Read the output from the "stackdump" script, and make sure all stack pointer values are
+ non-zero.
diff --git a/tools/testing/selftests/coredump/stackdump b/tools/testing/selftests/coredump/stackdump
new file mode 100755
index 000000000000..96714ce42d12
--- /dev/null
+++ b/tools/testing/selftests/coredump/stackdump
@@ -0,0 +1,14 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+
+CRASH_PROGRAM_ID=$1
+STACKDUMP_FILE=$2
+
+TMP=$(mktemp)
+
+for t in /proc/$CRASH_PROGRAM_ID/task/*; do
+ tid=$(basename $t)
+ cat /proc/$tid/stat | awk '{print $29}' >> $TMP
+done
+
+mv $TMP $STACKDUMP_FILE
diff --git a/tools/testing/selftests/coredump/stackdump_test.c b/tools/testing/selftests/coredump/stackdump_test.c
new file mode 100644
index 000000000000..137b2364a082
--- /dev/null
+++ b/tools/testing/selftests/coredump/stackdump_test.c
@@ -0,0 +1,151 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <fcntl.h>
+#include <libgen.h>
+#include <linux/limits.h>
+#include <pthread.h>
+#include <string.h>
+#include <sys/resource.h>
+#include <unistd.h>
+
+#include "../kselftest_harness.h"
+
+#define STACKDUMP_FILE "stack_values"
+#define STACKDUMP_SCRIPT "stackdump"
+#define NUM_THREAD_SPAWN 128
+
+static void *do_nothing(void *)
+{
+ while (1)
+ pause();
+}
+
+static void crashing_child(void)
+{
+ pthread_t thread;
+ int i;
+
+ for (i = 0; i < NUM_THREAD_SPAWN; ++i)
+ pthread_create(&thread, NULL, do_nothing, NULL);
+
+ /* crash on purpose */
+ i = *(int *)NULL;
+}
+
+FIXTURE(coredump)
+{
+ char original_core_pattern[256];
+};
+
+FIXTURE_SETUP(coredump)
+{
+ char buf[PATH_MAX];
+ FILE *file;
+ char *dir;
+ int ret;
+
+ file = fopen("/proc/sys/kernel/core_pattern", "r");
+ ASSERT_NE(NULL, file);
+
+ ret = fread(self->original_core_pattern, 1, sizeof(self->original_core_pattern), file);
+ ASSERT_TRUE(ret || feof(file));
+ ASSERT_LT(ret, sizeof(self->original_core_pattern));
+
+ self->original_core_pattern[ret] = '\0';
+
+ ret = fclose(file);
+ ASSERT_EQ(0, ret);
+}
+
+FIXTURE_TEARDOWN(coredump)
+{
+ const char *reason;
+ FILE *file;
+ int ret;
+
+ unlink(STACKDUMP_FILE);
+
+ file = fopen("/proc/sys/kernel/core_pattern", "w");
+ if (!file) {
+ reason = "Unable to open core_pattern";
+ goto fail;
+ }
+
+ ret = fprintf(file, "%s", self->original_core_pattern);
+ if (ret < 0) {
+ reason = "Unable to write to core_pattern";
+ goto fail;
+ }
+
+ ret = fclose(file);
+ if (ret) {
+ reason = "Unable to close core_pattern";
+ goto fail;
+ }
+
+ return;
+fail:
+ /* This should never happen */
+ fprintf(stderr, "Failed to cleanup stackdump test: %s\n", reason);
+}
+
+TEST_F(coredump, stackdump)
+{
+ struct sigaction action = {};
+ unsigned long long stack;
+ char *test_dir, *line;
+ size_t line_length;
+ char buf[PATH_MAX];
+ int ret, i;
+ FILE *file;
+ pid_t pid;
+
+ /*
+ * Step 1: Setup core_pattern so that the stackdump script is executed when the child
+ * process crashes
+ */
+ ret = readlink("/proc/self/exe", buf, sizeof(buf));
+ ASSERT_NE(-1, ret);
+ ASSERT_LT(ret, sizeof(buf));
+ buf[ret] = '\0';
+
+ test_dir = dirname(buf);
+
+ file = fopen("/proc/sys/kernel/core_pattern", "w");
+ ASSERT_NE(NULL, file);
+
+ ret = fprintf(file, "|%1$s/%2$s %%P %1$s/%3$s", test_dir, STACKDUMP_SCRIPT, STACKDUMP_FILE);
+ ASSERT_LT(0, ret);
+
+ ret = fclose(file);
+ ASSERT_EQ(0, ret);
+
+ /* Step 2: Create a process who spawns some threads then crashes */
+ pid = fork();
+ ASSERT_TRUE(pid >= 0);
+ if (pid == 0)
+ crashing_child();
+
+ /*
+ * Step 3: Wait for the stackdump script to write the stack pointers to the stackdump file
+ */
+ for (i = 0; i < 10; ++i) {
+ file = fopen(STACKDUMP_FILE, "r");
+ if (file)
+ break;
+ sleep(1);
+ }
+ ASSERT_NE(file, NULL);
+
+ /* Step 4: Make sure all stack pointer values are non-zero */
+ for (i = 0; -1 != getline(&line, &line_length, file); ++i) {
+ stack = strtoull(line, NULL, 10);
+ ASSERT_NE(stack, 0);
+ }
+
+ ASSERT_EQ(i, 1 + NUM_THREAD_SPAWN);
+
+ fclose(file);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/drivers/net/Makefile b/tools/testing/selftests/drivers/net/Makefile
index 0fec8f9801ad..137470bdee0c 100644
--- a/tools/testing/selftests/drivers/net/Makefile
+++ b/tools/testing/selftests/drivers/net/Makefile
@@ -1,15 +1,18 @@
# SPDX-License-Identifier: GPL-2.0
TEST_INCLUDES := $(wildcard lib/py/*.py) \
+ $(wildcard lib/sh/*.sh) \
../../net/net_helper.sh \
../../net/lib.sh \
TEST_PROGS := \
netcons_basic.sh \
+ netcons_overflow.sh \
ping.py \
queues.py \
stats.py \
shaper.py \
+ hds.py \
# end of TEST_PROGS
include ../../lib.mk
diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile
index 03a089165d3f..2b10854e4b1e 100644
--- a/tools/testing/selftests/drivers/net/bonding/Makefile
+++ b/tools/testing/selftests/drivers/net/bonding/Makefile
@@ -10,7 +10,7 @@ TEST_PROGS := \
mode-2-recovery-updelay.sh \
bond_options.sh \
bond-eth-type-change.sh \
- bond_macvlan.sh
+ bond_macvlan_ipvlan.sh
TEST_FILES := \
lag_lib.sh \
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_macvlan.sh b/tools/testing/selftests/drivers/net/bonding/bond_macvlan.sh
deleted file mode 100755
index b609fb6231f4..000000000000
--- a/tools/testing/selftests/drivers/net/bonding/bond_macvlan.sh
+++ /dev/null
@@ -1,99 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-#
-# Test macvlan over balance-alb
-
-lib_dir=$(dirname "$0")
-source ${lib_dir}/bond_topo_2d1c.sh
-
-m1_ns="m1-$(mktemp -u XXXXXX)"
-m2_ns="m1-$(mktemp -u XXXXXX)"
-m1_ip4="192.0.2.11"
-m1_ip6="2001:db8::11"
-m2_ip4="192.0.2.12"
-m2_ip6="2001:db8::12"
-
-cleanup()
-{
- ip -n ${m1_ns} link del macv0
- ip netns del ${m1_ns}
- ip -n ${m2_ns} link del macv0
- ip netns del ${m2_ns}
-
- client_destroy
- server_destroy
- gateway_destroy
-}
-
-check_connection()
-{
- local ns=${1}
- local target=${2}
- local message=${3:-"macvlan_over_bond"}
- RET=0
-
-
- ip netns exec ${ns} ping ${target} -c 4 -i 0.1 &>/dev/null
- check_err $? "ping failed"
- log_test "$mode: $message"
-}
-
-macvlan_over_bond()
-{
- local param="$1"
- RET=0
-
- # setup new bond mode
- bond_reset "${param}"
-
- ip -n ${s_ns} link add link bond0 name macv0 type macvlan mode bridge
- ip -n ${s_ns} link set macv0 netns ${m1_ns}
- ip -n ${m1_ns} link set dev macv0 up
- ip -n ${m1_ns} addr add ${m1_ip4}/24 dev macv0
- ip -n ${m1_ns} addr add ${m1_ip6}/24 dev macv0
-
- ip -n ${s_ns} link add link bond0 name macv0 type macvlan mode bridge
- ip -n ${s_ns} link set macv0 netns ${m2_ns}
- ip -n ${m2_ns} link set dev macv0 up
- ip -n ${m2_ns} addr add ${m2_ip4}/24 dev macv0
- ip -n ${m2_ns} addr add ${m2_ip6}/24 dev macv0
-
- sleep 2
-
- check_connection "${c_ns}" "${s_ip4}" "IPv4: client->server"
- check_connection "${c_ns}" "${s_ip6}" "IPv6: client->server"
- check_connection "${c_ns}" "${m1_ip4}" "IPv4: client->macvlan_1"
- check_connection "${c_ns}" "${m1_ip6}" "IPv6: client->macvlan_1"
- check_connection "${c_ns}" "${m2_ip4}" "IPv4: client->macvlan_2"
- check_connection "${c_ns}" "${m2_ip6}" "IPv6: client->macvlan_2"
- check_connection "${m1_ns}" "${m2_ip4}" "IPv4: macvlan_1->macvlan_2"
- check_connection "${m1_ns}" "${m2_ip6}" "IPv6: macvlan_1->macvlan_2"
-
-
- sleep 5
-
- check_connection "${s_ns}" "${c_ip4}" "IPv4: server->client"
- check_connection "${s_ns}" "${c_ip6}" "IPv6: server->client"
- check_connection "${m1_ns}" "${c_ip4}" "IPv4: macvlan_1->client"
- check_connection "${m1_ns}" "${c_ip6}" "IPv6: macvlan_1->client"
- check_connection "${m2_ns}" "${c_ip4}" "IPv4: macvlan_2->client"
- check_connection "${m2_ns}" "${c_ip6}" "IPv6: macvlan_2->client"
- check_connection "${m2_ns}" "${m1_ip4}" "IPv4: macvlan_2->macvlan_2"
- check_connection "${m2_ns}" "${m1_ip6}" "IPv6: macvlan_2->macvlan_2"
-
- ip -n ${c_ns} neigh flush dev eth0
-}
-
-trap cleanup EXIT
-
-setup_prepare
-ip netns add ${m1_ns}
-ip netns add ${m2_ns}
-
-modes="active-backup balance-tlb balance-alb"
-
-for mode in $modes; do
- macvlan_over_bond "mode $mode"
-done
-
-exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh b/tools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh
new file mode 100755
index 000000000000..c4711272fe45
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/bonding/bond_macvlan_ipvlan.sh
@@ -0,0 +1,96 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test macvlan/ipvlan over bond
+
+lib_dir=$(dirname "$0")
+source ${lib_dir}/bond_topo_2d1c.sh
+
+xvlan1_ns="xvlan1-$(mktemp -u XXXXXX)"
+xvlan2_ns="xvlan2-$(mktemp -u XXXXXX)"
+xvlan1_ip4="192.0.2.11"
+xvlan1_ip6="2001:db8::11"
+xvlan2_ip4="192.0.2.12"
+xvlan2_ip6="2001:db8::12"
+
+cleanup()
+{
+ client_destroy
+ server_destroy
+ gateway_destroy
+
+ ip netns del ${xvlan1_ns}
+ ip netns del ${xvlan2_ns}
+}
+
+check_connection()
+{
+ local ns=${1}
+ local target=${2}
+ local message=${3}
+ RET=0
+
+ ip netns exec ${ns} ping ${target} -c 4 -i 0.1 &>/dev/null
+ check_err $? "ping failed"
+ log_test "${bond_mode}/${xvlan_type}_${xvlan_mode}: ${message}"
+}
+
+xvlan_over_bond()
+{
+ local param="$1"
+ local xvlan_type="$2"
+ local xvlan_mode="$3"
+ RET=0
+
+ # setup new bond mode
+ bond_reset "${param}"
+
+ ip -n ${s_ns} link add link bond0 name ${xvlan_type}0 type ${xvlan_type} mode ${xvlan_mode}
+ ip -n ${s_ns} link set ${xvlan_type}0 netns ${xvlan1_ns}
+ ip -n ${xvlan1_ns} link set dev ${xvlan_type}0 up
+ ip -n ${xvlan1_ns} addr add ${xvlan1_ip4}/24 dev ${xvlan_type}0
+ ip -n ${xvlan1_ns} addr add ${xvlan1_ip6}/24 dev ${xvlan_type}0
+
+ ip -n ${s_ns} link add link bond0 name ${xvlan_type}0 type ${xvlan_type} mode ${xvlan_mode}
+ ip -n ${s_ns} link set ${xvlan_type}0 netns ${xvlan2_ns}
+ ip -n ${xvlan2_ns} link set dev ${xvlan_type}0 up
+ ip -n ${xvlan2_ns} addr add ${xvlan2_ip4}/24 dev ${xvlan_type}0
+ ip -n ${xvlan2_ns} addr add ${xvlan2_ip6}/24 dev ${xvlan_type}0
+
+ sleep 2
+
+ check_connection "${c_ns}" "${s_ip4}" "IPv4: client->server"
+ check_connection "${c_ns}" "${s_ip6}" "IPv6: client->server"
+ check_connection "${c_ns}" "${xvlan1_ip4}" "IPv4: client->${xvlan_type}_1"
+ check_connection "${c_ns}" "${xvlan1_ip6}" "IPv6: client->${xvlan_type}_1"
+ check_connection "${c_ns}" "${xvlan2_ip4}" "IPv4: client->${xvlan_type}_2"
+ check_connection "${c_ns}" "${xvlan2_ip6}" "IPv6: client->${xvlan_type}_2"
+ check_connection "${xvlan1_ns}" "${xvlan2_ip4}" "IPv4: ${xvlan_type}_1->${xvlan_type}_2"
+ check_connection "${xvlan1_ns}" "${xvlan2_ip6}" "IPv6: ${xvlan_type}_1->${xvlan_type}_2"
+
+ check_connection "${s_ns}" "${c_ip4}" "IPv4: server->client"
+ check_connection "${s_ns}" "${c_ip6}" "IPv6: server->client"
+ check_connection "${xvlan1_ns}" "${c_ip4}" "IPv4: ${xvlan_type}_1->client"
+ check_connection "${xvlan1_ns}" "${c_ip6}" "IPv6: ${xvlan_type}_1->client"
+ check_connection "${xvlan2_ns}" "${c_ip4}" "IPv4: ${xvlan_type}_2->client"
+ check_connection "${xvlan2_ns}" "${c_ip6}" "IPv6: ${xvlan_type}_2->client"
+ check_connection "${xvlan2_ns}" "${xvlan1_ip4}" "IPv4: ${xvlan_type}_2->${xvlan_type}_1"
+ check_connection "${xvlan2_ns}" "${xvlan1_ip6}" "IPv6: ${xvlan_type}_2->${xvlan_type}_1"
+
+ ip -n ${c_ns} neigh flush dev eth0
+}
+
+trap cleanup EXIT
+
+setup_prepare
+ip netns add ${xvlan1_ns}
+ip netns add ${xvlan2_ns}
+
+bond_modes="active-backup balance-tlb balance-alb"
+
+for bond_mode in ${bond_modes}; do
+ xvlan_over_bond "mode ${bond_mode}" macvlan bridge
+ xvlan_over_bond "mode ${bond_mode}" ipvlan l2
+done
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/bonding/config b/tools/testing/selftests/drivers/net/bonding/config
index 899d7fb6ea8e..dad4e5fda4db 100644
--- a/tools/testing/selftests/drivers/net/bonding/config
+++ b/tools/testing/selftests/drivers/net/bonding/config
@@ -3,6 +3,7 @@ CONFIG_BRIDGE=y
CONFIG_DUMMY=y
CONFIG_IPV6=y
CONFIG_MACVLAN=y
+CONFIG_IPVLAN=y
CONFIG_NET_ACT_GACT=y
CONFIG_NET_CLS_FLOWER=y
CONFIG_NET_SCH_INGRESS=y
diff --git a/tools/testing/selftests/drivers/net/hds.py b/tools/testing/selftests/drivers/net/hds.py
new file mode 100755
index 000000000000..394971b25c0b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/hds.py
@@ -0,0 +1,120 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+
+import errno
+from lib.py import ksft_run, ksft_exit, ksft_eq, ksft_raises, KsftSkipEx
+from lib.py import EthtoolFamily, NlError
+from lib.py import NetDrvEnv
+
+def get_hds(cfg, netnl) -> None:
+ try:
+ rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ except NlError as e:
+ raise KsftSkipEx('ring-get not supported by device')
+ if 'tcp-data-split' not in rings:
+ raise KsftSkipEx('tcp-data-split not supported by device')
+
+def get_hds_thresh(cfg, netnl) -> None:
+ try:
+ rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ except NlError as e:
+ raise KsftSkipEx('ring-get not supported by device')
+ if 'hds-thresh' not in rings:
+ raise KsftSkipEx('hds-thresh not supported by device')
+
+def set_hds_enable(cfg, netnl) -> None:
+ try:
+ netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'tcp-data-split': 'enabled'})
+ except NlError as e:
+ if e.error == errno.EINVAL:
+ raise KsftSkipEx("disabling of HDS not supported by the device")
+ elif e.error == errno.EOPNOTSUPP:
+ raise KsftSkipEx("ring-set not supported by the device")
+ try:
+ rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ except NlError as e:
+ raise KsftSkipEx('ring-get not supported by device')
+ if 'tcp-data-split' not in rings:
+ raise KsftSkipEx('tcp-data-split not supported by device')
+
+ ksft_eq('enabled', rings['tcp-data-split'])
+
+def set_hds_disable(cfg, netnl) -> None:
+ try:
+ netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'tcp-data-split': 'disabled'})
+ except NlError as e:
+ if e.error == errno.EINVAL:
+ raise KsftSkipEx("disabling of HDS not supported by the device")
+ elif e.error == errno.EOPNOTSUPP:
+ raise KsftSkipEx("ring-set not supported by the device")
+ try:
+ rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ except NlError as e:
+ raise KsftSkipEx('ring-get not supported by device')
+ if 'tcp-data-split' not in rings:
+ raise KsftSkipEx('tcp-data-split not supported by device')
+
+ ksft_eq('disabled', rings['tcp-data-split'])
+
+def set_hds_thresh_zero(cfg, netnl) -> None:
+ try:
+ netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': 0})
+ except NlError as e:
+ if e.error == errno.EINVAL:
+ raise KsftSkipEx("hds-thresh-set not supported by the device")
+ elif e.error == errno.EOPNOTSUPP:
+ raise KsftSkipEx("ring-set not supported by the device")
+ try:
+ rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ except NlError as e:
+ raise KsftSkipEx('ring-get not supported by device')
+ if 'hds-thresh' not in rings:
+ raise KsftSkipEx('hds-thresh not supported by device')
+
+ ksft_eq(0, rings['hds-thresh'])
+
+def set_hds_thresh_max(cfg, netnl) -> None:
+ try:
+ rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ except NlError as e:
+ raise KsftSkipEx('ring-get not supported by device')
+ if 'hds-thresh' not in rings:
+ raise KsftSkipEx('hds-thresh not supported by device')
+ try:
+ netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': rings['hds-thresh-max']})
+ except NlError as e:
+ if e.error == errno.EINVAL:
+ raise KsftSkipEx("hds-thresh-set not supported by the device")
+ elif e.error == errno.EOPNOTSUPP:
+ raise KsftSkipEx("ring-set not supported by the device")
+ rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ ksft_eq(rings['hds-thresh'], rings['hds-thresh-max'])
+
+def set_hds_thresh_gt(cfg, netnl) -> None:
+ try:
+ rings = netnl.rings_get({'header': {'dev-index': cfg.ifindex}})
+ except NlError as e:
+ raise KsftSkipEx('ring-get not supported by device')
+ if 'hds-thresh' not in rings:
+ raise KsftSkipEx('hds-thresh not supported by device')
+ if 'hds-thresh-max' not in rings:
+ raise KsftSkipEx('hds-thresh-max not defined by device')
+ hds_gt = rings['hds-thresh-max'] + 1
+ with ksft_raises(NlError) as e:
+ netnl.rings_set({'header': {'dev-index': cfg.ifindex}, 'hds-thresh': hds_gt})
+ ksft_eq(e.exception.nl_msg.error, -errno.EINVAL)
+
+def main() -> None:
+ with NetDrvEnv(__file__, queue_count=3) as cfg:
+ ksft_run([get_hds,
+ get_hds_thresh,
+ set_hds_disable,
+ set_hds_enable,
+ set_hds_thresh_zero,
+ set_hds_thresh_max,
+ set_hds_thresh_gt],
+ args=(cfg, EthtoolFamily()))
+ ksft_exit()
+
+if __name__ == "__main__":
+ main()
diff --git a/tools/testing/selftests/drivers/net/hw/ncdevmem.c b/tools/testing/selftests/drivers/net/hw/ncdevmem.c
index 8e502a1f8f9b..19a6969643f4 100644
--- a/tools/testing/selftests/drivers/net/hw/ncdevmem.c
+++ b/tools/testing/selftests/drivers/net/hw/ncdevmem.c
@@ -619,9 +619,6 @@ int do_server(struct memory_buffer *mem)
fprintf(stderr, "page_aligned_frags=%lu, non_page_aligned_frags=%lu\n",
page_aligned_frags, non_page_aligned_frags);
- fprintf(stderr, "page_aligned_frags=%lu, non_page_aligned_frags=%lu\n",
- page_aligned_frags, non_page_aligned_frags);
-
cleanup:
free(tmp_mem);
diff --git a/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py b/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py
index 05b6fbb3fcdd..ad192fef3117 100755
--- a/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py
+++ b/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py
@@ -21,9 +21,9 @@ def _enable_pp_allocation_fail():
if not os.path.exists("/sys/kernel/debug/fail_function"):
raise KsftSkipEx("Kernel built without function error injection (or DebugFS)")
- if not os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_pages"):
+ if not os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_netmems"):
with open("/sys/kernel/debug/fail_function/inject", "w") as fp:
- fp.write("page_pool_alloc_pages\n")
+ fp.write("page_pool_alloc_netmems\n")
_write_fail_config({
"verbose": 0,
@@ -37,7 +37,7 @@ def _disable_pp_allocation_fail():
if not os.path.exists("/sys/kernel/debug/fail_function"):
return
- if os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_pages"):
+ if os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_netmems"):
with open("/sys/kernel/debug/fail_function/inject", "w") as fp:
fp.write("\n")
diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py
index 1ea9bb695e94..987e452d3a45 100644
--- a/tools/testing/selftests/drivers/net/lib/py/env.py
+++ b/tools/testing/selftests/drivers/net/lib/py/env.py
@@ -5,7 +5,7 @@ import time
from pathlib import Path
from lib.py import KsftSkipEx, KsftXfailEx
from lib.py import ksft_setup
-from lib.py import cmd, ethtool, ip
+from lib.py import cmd, ethtool, ip, CmdExitFailure
from lib.py import NetNS, NetdevSimDev
from .remote import Remote
@@ -48,6 +48,7 @@ class NetDrvEnv:
else:
self._ns = NetdevSimDev(**kwargs)
self.dev = self._ns.nsims[0].dev
+ self.ifname = self.dev['ifname']
self.ifindex = self.dev['ifindex']
def __enter__(self):
@@ -234,7 +235,12 @@ class NetDrvEpEnv:
Good drivers will tell us via ethtool what their sync period is.
"""
if self._stats_settle_time is None:
- data = ethtool("-c " + self.ifname, json=True)[0]
+ data = {}
+ try:
+ data = ethtool("-c " + self.ifname, json=True)[0]
+ except CmdExitFailure as e:
+ if "Operation not supported" not in e.cmd.stderr:
+ raise
self._stats_settle_time = 0.025 + \
data.get('stats-block-usecs', 0) / 1000 / 1000
diff --git a/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
new file mode 100644
index 000000000000..3acaba41ac7b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/lib/sh/lib_netcons.sh
@@ -0,0 +1,225 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This file contains functions and helpers to support the netconsole
+# selftests
+#
+# Author: Breno Leitao <leitao@debian.org>
+
+set -euo pipefail
+
+LIBDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+
+SRCIF="" # to be populated later
+SRCIP=192.0.2.1
+DSTIF="" # to be populated later
+DSTIP=192.0.2.2
+
+PORT="6666"
+MSG="netconsole selftest"
+USERDATA_KEY="key"
+USERDATA_VALUE="value"
+TARGET=$(mktemp -u netcons_XXXXX)
+DEFAULT_PRINTK_VALUES=$(cat /proc/sys/kernel/printk)
+NETCONS_CONFIGFS="/sys/kernel/config/netconsole"
+NETCONS_PATH="${NETCONS_CONFIGFS}"/"${TARGET}"
+# NAMESPACE will be populated by setup_ns with a random value
+NAMESPACE=""
+
+# IDs for netdevsim
+NSIM_DEV_1_ID=$((256 + RANDOM % 256))
+NSIM_DEV_2_ID=$((512 + RANDOM % 256))
+NSIM_DEV_SYS_NEW="/sys/bus/netdevsim/new_device"
+
+# Used to create and delete namespaces
+source "${LIBDIR}"/../../../../net/lib.sh
+source "${LIBDIR}"/../../../../net/net_helper.sh
+
+# Create netdevsim interfaces
+create_ifaces() {
+
+ echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_NEW"
+ echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_NEW"
+ udevadm settle 2> /dev/null || true
+
+ local NSIM1=/sys/bus/netdevsim/devices/netdevsim"$NSIM_DEV_1_ID"
+ local NSIM2=/sys/bus/netdevsim/devices/netdevsim"$NSIM_DEV_2_ID"
+
+ # These are global variables
+ SRCIF=$(find "$NSIM1"/net -maxdepth 1 -type d ! \
+ -path "$NSIM1"/net -exec basename {} \;)
+ DSTIF=$(find "$NSIM2"/net -maxdepth 1 -type d ! \
+ -path "$NSIM2"/net -exec basename {} \;)
+}
+
+link_ifaces() {
+ local NSIM_DEV_SYS_LINK="/sys/bus/netdevsim/link_device"
+ local SRCIF_IFIDX=$(cat /sys/class/net/"$SRCIF"/ifindex)
+ local DSTIF_IFIDX=$(cat /sys/class/net/"$DSTIF"/ifindex)
+
+ exec {NAMESPACE_FD}</var/run/netns/"${NAMESPACE}"
+ exec {INITNS_FD}</proc/self/ns/net
+
+ # Bind the dst interface to namespace
+ ip link set "${DSTIF}" netns "${NAMESPACE}"
+
+ # Linking one device to the other one (on the other namespace}
+ if ! echo "${INITNS_FD}:$SRCIF_IFIDX $NAMESPACE_FD:$DSTIF_IFIDX" > $NSIM_DEV_SYS_LINK
+ then
+ echo "linking netdevsim1 with netdevsim2 should succeed"
+ cleanup
+ exit "${ksft_skip}"
+ fi
+}
+
+function configure_ip() {
+ # Configure the IPs for both interfaces
+ ip netns exec "${NAMESPACE}" ip addr add "${DSTIP}"/24 dev "${DSTIF}"
+ ip netns exec "${NAMESPACE}" ip link set "${DSTIF}" up
+
+ ip addr add "${SRCIP}"/24 dev "${SRCIF}"
+ ip link set "${SRCIF}" up
+}
+
+function set_network() {
+ # setup_ns function is coming from lib.sh
+ setup_ns NAMESPACE
+
+ # Create both interfaces, and assign the destination to a different
+ # namespace
+ create_ifaces
+
+ # Link both interfaces back to back
+ link_ifaces
+
+ configure_ip
+}
+
+function create_dynamic_target() {
+ DSTMAC=$(ip netns exec "${NAMESPACE}" \
+ ip link show "${DSTIF}" | awk '/ether/ {print $2}')
+
+ # Create a dynamic target
+ mkdir "${NETCONS_PATH}"
+
+ echo "${DSTIP}" > "${NETCONS_PATH}"/remote_ip
+ echo "${SRCIP}" > "${NETCONS_PATH}"/local_ip
+ echo "${DSTMAC}" > "${NETCONS_PATH}"/remote_mac
+ echo "${SRCIF}" > "${NETCONS_PATH}"/dev_name
+
+ echo 1 > "${NETCONS_PATH}"/enabled
+}
+
+function cleanup() {
+ local NSIM_DEV_SYS_DEL="/sys/bus/netdevsim/del_device"
+
+ # delete netconsole dynamic reconfiguration
+ echo 0 > "${NETCONS_PATH}"/enabled
+ # Remove all the keys that got created during the selftest
+ find "${NETCONS_PATH}/userdata/" -mindepth 1 -type d -delete
+ # Remove the configfs entry
+ rmdir "${NETCONS_PATH}"
+
+ # Delete netdevsim devices
+ echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_DEL"
+ echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_DEL"
+
+ # this is coming from lib.sh
+ cleanup_all_ns
+
+ # Restoring printk configurations
+ echo "${DEFAULT_PRINTK_VALUES}" > /proc/sys/kernel/printk
+}
+
+function set_user_data() {
+ if [[ ! -d "${NETCONS_PATH}""/userdata" ]]
+ then
+ echo "Userdata path not available in ${NETCONS_PATH}/userdata"
+ exit "${ksft_skip}"
+ fi
+
+ KEY_PATH="${NETCONS_PATH}/userdata/${USERDATA_KEY}"
+ mkdir -p "${KEY_PATH}"
+ VALUE_PATH="${KEY_PATH}""/value"
+ echo "${USERDATA_VALUE}" > "${VALUE_PATH}"
+}
+
+function listen_port_and_save_to() {
+ local OUTPUT=${1}
+ # Just wait for 2 seconds
+ timeout 2 ip netns exec "${NAMESPACE}" \
+ socat UDP-LISTEN:"${PORT}",fork "${OUTPUT}"
+}
+
+function validate_result() {
+ local TMPFILENAME="$1"
+
+ # TMPFILENAME will contain something like:
+ # 6.11.1-0_fbk0_rc13_509_g30d75cea12f7,13,1822,115075213798,-;netconsole selftest: netcons_gtJHM
+ # key=value
+
+ # Check if the file exists
+ if [ ! -f "$TMPFILENAME" ]; then
+ echo "FAIL: File was not generated." >&2
+ exit "${ksft_fail}"
+ fi
+
+ if ! grep -q "${MSG}" "${TMPFILENAME}"; then
+ echo "FAIL: ${MSG} not found in ${TMPFILENAME}" >&2
+ cat "${TMPFILENAME}" >&2
+ exit "${ksft_fail}"
+ fi
+
+ if ! grep -q "${USERDATA_KEY}=${USERDATA_VALUE}" "${TMPFILENAME}"; then
+ echo "FAIL: ${USERDATA_KEY}=${USERDATA_VALUE} not found in ${TMPFILENAME}" >&2
+ cat "${TMPFILENAME}" >&2
+ exit "${ksft_fail}"
+ fi
+
+ # Delete the file once it is validated, otherwise keep it
+ # for debugging purposes
+ rm "${TMPFILENAME}"
+ exit "${ksft_pass}"
+}
+
+function check_for_dependencies() {
+ if [ "$(id -u)" -ne 0 ]; then
+ echo "This test must be run as root" >&2
+ exit "${ksft_skip}"
+ fi
+
+ if ! which socat > /dev/null ; then
+ echo "SKIP: socat(1) is not available" >&2
+ exit "${ksft_skip}"
+ fi
+
+ if ! which ip > /dev/null ; then
+ echo "SKIP: ip(1) is not available" >&2
+ exit "${ksft_skip}"
+ fi
+
+ if ! which udevadm > /dev/null ; then
+ echo "SKIP: udevadm(1) is not available" >&2
+ exit "${ksft_skip}"
+ fi
+
+ if [ ! -f "${NSIM_DEV_SYS_NEW}" ]; then
+ echo "SKIP: file ${NSIM_DEV_SYS_NEW} does not exist. Check if CONFIG_NETDEVSIM is enabled" >&2
+ exit "${ksft_skip}"
+ fi
+
+ if [ ! -d "${NETCONS_CONFIGFS}" ]; then
+ echo "SKIP: directory ${NETCONS_CONFIGFS} does not exist. Check if NETCONSOLE_DYNAMIC is enabled" >&2
+ exit "${ksft_skip}"
+ fi
+
+ if ip link show "${DSTIF}" 2> /dev/null; then
+ echo "SKIP: interface ${DSTIF} exists in the system. Not overwriting it." >&2
+ exit "${ksft_skip}"
+ fi
+
+ if ip addr list | grep -E "inet.*(${SRCIP}|${DSTIP})" 2> /dev/null; then
+ echo "SKIP: IPs already in use. Skipping it" >&2
+ exit "${ksft_skip}"
+ fi
+}
diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_bridge.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_bridge.sh
index b79542a4dcc7..4a11bf1d514a 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/rif_bridge.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/rif_bridge.sh
@@ -12,6 +12,7 @@ ALL_TESTS="
bridge_rif_remaster_port
"
+REQUIRE_TEAMD="yes"
NUM_NETIFS=2
source $lib_dir/lib.sh
source $lib_dir/devlink_lib.sh
diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_lag.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_lag.sh
index e28f978104f3..b8bbe94f4736 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/rif_lag.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/rif_lag.sh
@@ -10,6 +10,7 @@ ALL_TESTS="
lag_rif_nomaster_addr
"
+REQUIRE_TEAMD="yes"
NUM_NETIFS=2
source $lib_dir/lib.sh
source $lib_dir/devlink_lib.sh
diff --git a/tools/testing/selftests/drivers/net/mlxsw/rif_lag_vlan.sh b/tools/testing/selftests/drivers/net/mlxsw/rif_lag_vlan.sh
index 6318cfa6434c..d1a9d379eaf3 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/rif_lag_vlan.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/rif_lag_vlan.sh
@@ -10,6 +10,7 @@ ALL_TESTS="
lag_rif_nomaster_addr
"
+REQUIRE_TEAMD="yes"
NUM_NETIFS=2
source $lib_dir/lib.sh
source $lib_dir/devlink_lib.sh
diff --git a/tools/testing/selftests/drivers/net/netcons_basic.sh b/tools/testing/selftests/drivers/net/netcons_basic.sh
index b175f4d966e5..fe765da498e8 100755
--- a/tools/testing/selftests/drivers/net/netcons_basic.sh
+++ b/tools/testing/selftests/drivers/net/netcons_basic.sh
@@ -18,224 +18,8 @@ set -euo pipefail
SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
-# Simple script to test dynamic targets in netconsole
-SRCIF="" # to be populated later
-SRCIP=192.0.2.1
-DSTIF="" # to be populated later
-DSTIP=192.0.2.2
+source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh
-PORT="6666"
-MSG="netconsole selftest"
-USERDATA_KEY="key"
-USERDATA_VALUE="value"
-TARGET=$(mktemp -u netcons_XXXXX)
-DEFAULT_PRINTK_VALUES=$(cat /proc/sys/kernel/printk)
-NETCONS_CONFIGFS="/sys/kernel/config/netconsole"
-NETCONS_PATH="${NETCONS_CONFIGFS}"/"${TARGET}"
-KEY_PATH="${NETCONS_PATH}/userdata/${USERDATA_KEY}"
-# NAMESPACE will be populated by setup_ns with a random value
-NAMESPACE=""
-
-# IDs for netdevsim
-NSIM_DEV_1_ID=$((256 + RANDOM % 256))
-NSIM_DEV_2_ID=$((512 + RANDOM % 256))
-NSIM_DEV_SYS_NEW="/sys/bus/netdevsim/new_device"
-
-# Used to create and delete namespaces
-source "${SCRIPTDIR}"/../../net/lib.sh
-source "${SCRIPTDIR}"/../../net/net_helper.sh
-
-# Create netdevsim interfaces
-create_ifaces() {
-
- echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_NEW"
- echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_NEW"
- udevadm settle 2> /dev/null || true
-
- local NSIM1=/sys/bus/netdevsim/devices/netdevsim"$NSIM_DEV_1_ID"
- local NSIM2=/sys/bus/netdevsim/devices/netdevsim"$NSIM_DEV_2_ID"
-
- # These are global variables
- SRCIF=$(find "$NSIM1"/net -maxdepth 1 -type d ! \
- -path "$NSIM1"/net -exec basename {} \;)
- DSTIF=$(find "$NSIM2"/net -maxdepth 1 -type d ! \
- -path "$NSIM2"/net -exec basename {} \;)
-}
-
-link_ifaces() {
- local NSIM_DEV_SYS_LINK="/sys/bus/netdevsim/link_device"
- local SRCIF_IFIDX=$(cat /sys/class/net/"$SRCIF"/ifindex)
- local DSTIF_IFIDX=$(cat /sys/class/net/"$DSTIF"/ifindex)
-
- exec {NAMESPACE_FD}</var/run/netns/"${NAMESPACE}"
- exec {INITNS_FD}</proc/self/ns/net
-
- # Bind the dst interface to namespace
- ip link set "${DSTIF}" netns "${NAMESPACE}"
-
- # Linking one device to the other one (on the other namespace}
- if ! echo "${INITNS_FD}:$SRCIF_IFIDX $NAMESPACE_FD:$DSTIF_IFIDX" > $NSIM_DEV_SYS_LINK
- then
- echo "linking netdevsim1 with netdevsim2 should succeed"
- cleanup
- exit "${ksft_skip}"
- fi
-}
-
-function configure_ip() {
- # Configure the IPs for both interfaces
- ip netns exec "${NAMESPACE}" ip addr add "${DSTIP}"/24 dev "${DSTIF}"
- ip netns exec "${NAMESPACE}" ip link set "${DSTIF}" up
-
- ip addr add "${SRCIP}"/24 dev "${SRCIF}"
- ip link set "${SRCIF}" up
-}
-
-function set_network() {
- # setup_ns function is coming from lib.sh
- setup_ns NAMESPACE
-
- # Create both interfaces, and assign the destination to a different
- # namespace
- create_ifaces
-
- # Link both interfaces back to back
- link_ifaces
-
- configure_ip
-}
-
-function create_dynamic_target() {
- DSTMAC=$(ip netns exec "${NAMESPACE}" \
- ip link show "${DSTIF}" | awk '/ether/ {print $2}')
-
- # Create a dynamic target
- mkdir "${NETCONS_PATH}"
-
- echo "${DSTIP}" > "${NETCONS_PATH}"/remote_ip
- echo "${SRCIP}" > "${NETCONS_PATH}"/local_ip
- echo "${DSTMAC}" > "${NETCONS_PATH}"/remote_mac
- echo "${SRCIF}" > "${NETCONS_PATH}"/dev_name
-
- echo 1 > "${NETCONS_PATH}"/enabled
-}
-
-function cleanup() {
- local NSIM_DEV_SYS_DEL="/sys/bus/netdevsim/del_device"
-
- # delete netconsole dynamic reconfiguration
- echo 0 > "${NETCONS_PATH}"/enabled
- # Remove key
- rmdir "${KEY_PATH}"
- # Remove the configfs entry
- rmdir "${NETCONS_PATH}"
-
- # Delete netdevsim devices
- echo "$NSIM_DEV_2_ID" > "$NSIM_DEV_SYS_DEL"
- echo "$NSIM_DEV_1_ID" > "$NSIM_DEV_SYS_DEL"
-
- # this is coming from lib.sh
- cleanup_all_ns
-
- # Restoring printk configurations
- echo "${DEFAULT_PRINTK_VALUES}" > /proc/sys/kernel/printk
-}
-
-function set_user_data() {
- if [[ ! -d "${NETCONS_PATH}""/userdata" ]]
- then
- echo "Userdata path not available in ${NETCONS_PATH}/userdata"
- exit "${ksft_skip}"
- fi
-
- mkdir -p "${KEY_PATH}"
- VALUE_PATH="${KEY_PATH}""/value"
- echo "${USERDATA_VALUE}" > "${VALUE_PATH}"
-}
-
-function listen_port_and_save_to() {
- local OUTPUT=${1}
- # Just wait for 2 seconds
- timeout 2 ip netns exec "${NAMESPACE}" \
- socat UDP-LISTEN:"${PORT}",fork "${OUTPUT}"
-}
-
-function validate_result() {
- local TMPFILENAME="$1"
-
- # TMPFILENAME will contain something like:
- # 6.11.1-0_fbk0_rc13_509_g30d75cea12f7,13,1822,115075213798,-;netconsole selftest: netcons_gtJHM
- # key=value
-
- # Check if the file exists
- if [ ! -f "$TMPFILENAME" ]; then
- echo "FAIL: File was not generated." >&2
- exit "${ksft_fail}"
- fi
-
- if ! grep -q "${MSG}" "${TMPFILENAME}"; then
- echo "FAIL: ${MSG} not found in ${TMPFILENAME}" >&2
- cat "${TMPFILENAME}" >&2
- exit "${ksft_fail}"
- fi
-
- if ! grep -q "${USERDATA_KEY}=${USERDATA_VALUE}" "${TMPFILENAME}"; then
- echo "FAIL: ${USERDATA_KEY}=${USERDATA_VALUE} not found in ${TMPFILENAME}" >&2
- cat "${TMPFILENAME}" >&2
- exit "${ksft_fail}"
- fi
-
- # Delete the file once it is validated, otherwise keep it
- # for debugging purposes
- rm "${TMPFILENAME}"
- exit "${ksft_pass}"
-}
-
-function check_for_dependencies() {
- if [ "$(id -u)" -ne 0 ]; then
- echo "This test must be run as root" >&2
- exit "${ksft_skip}"
- fi
-
- if ! which socat > /dev/null ; then
- echo "SKIP: socat(1) is not available" >&2
- exit "${ksft_skip}"
- fi
-
- if ! which ip > /dev/null ; then
- echo "SKIP: ip(1) is not available" >&2
- exit "${ksft_skip}"
- fi
-
- if ! which udevadm > /dev/null ; then
- echo "SKIP: udevadm(1) is not available" >&2
- exit "${ksft_skip}"
- fi
-
- if [ ! -f "${NSIM_DEV_SYS_NEW}" ]; then
- echo "SKIP: file ${NSIM_DEV_SYS_NEW} does not exist. Check if CONFIG_NETDEVSIM is enabled" >&2
- exit "${ksft_skip}"
- fi
-
- if [ ! -d "${NETCONS_CONFIGFS}" ]; then
- echo "SKIP: directory ${NETCONS_CONFIGFS} does not exist. Check if NETCONSOLE_DYNAMIC is enabled" >&2
- exit "${ksft_skip}"
- fi
-
- if ip link show "${DSTIF}" 2> /dev/null; then
- echo "SKIP: interface ${DSTIF} exists in the system. Not overwriting it." >&2
- exit "${ksft_skip}"
- fi
-
- if ip addr list | grep -E "inet.*(${SRCIP}|${DSTIP})" 2> /dev/null; then
- echo "SKIP: IPs already in use. Skipping it" >&2
- exit "${ksft_skip}"
- fi
-}
-
-# ========== #
-# Start here #
-# ========== #
modprobe netdevsim 2> /dev/null || true
modprobe netconsole 2> /dev/null || true
diff --git a/tools/testing/selftests/drivers/net/netcons_overflow.sh b/tools/testing/selftests/drivers/net/netcons_overflow.sh
new file mode 100755
index 000000000000..29bad56448a2
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/netcons_overflow.sh
@@ -0,0 +1,67 @@
+#!/usr/bin/env bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test verifies that users can successfully create up to
+# MAX_USERDATA_ITEMS userdata entries without encountering any failures.
+#
+# Additionally, it tests for expected failure when attempting to exceed this
+# maximum limit.
+#
+# Author: Breno Leitao <leitao@debian.org>
+
+set -euo pipefail
+
+SCRIPTDIR=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")")
+
+source "${SCRIPTDIR}"/lib/sh/lib_netcons.sh
+# This is coming from netconsole code. Check for it in drivers/net/netconsole.c
+MAX_USERDATA_ITEMS=16
+
+# Function to create userdata entries
+function create_userdata_max_entries() {
+ # All these keys should be created without any error
+ for i in $(seq $MAX_USERDATA_ITEMS)
+ do
+ # USERDATA_KEY is used by set_user_data
+ USERDATA_KEY="key"${i}
+ set_user_data
+ done
+}
+
+# Function to verify the entry limit
+function verify_entry_limit() {
+ # Allowing the test to fail without exiting, since the next command
+ # will fail
+ set +e
+ mkdir "${NETCONS_PATH}/userdata/key_that_will_fail" 2> /dev/null
+ ret="$?"
+ set -e
+ if [ "$ret" -eq 0 ];
+ then
+ echo "Adding more than ${MAX_USERDATA_ITEMS} entries in userdata should fail, but it didn't" >&2
+ ls "${NETCONS_PATH}/userdata/" >&2
+ exit "${ksft_fail}"
+ fi
+}
+
+# ========== #
+# Start here #
+# ========== #
+
+modprobe netdevsim 2> /dev/null || true
+modprobe netconsole 2> /dev/null || true
+
+# Check for basic system dependency and exit if not found
+check_for_dependencies
+
+# Remove the namespace, interfaces and netconsole target on exit
+trap cleanup EXIT
+# Create one namespace and two interfaces
+set_network
+# Create a dynamic target for netconsole
+create_dynamic_target
+# populate the maximum number of supported keys in userdata
+create_userdata_max_entries
+# Verify an additional entry is not allowed
+verify_entry_limit
+exit "${ksft_pass}"
diff --git a/tools/testing/selftests/drivers/net/netdevsim/tc-mq-visibility.sh b/tools/testing/selftests/drivers/net/netdevsim/tc-mq-visibility.sh
index fd13c8cfb7a8..b411fe66510f 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/tc-mq-visibility.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/tc-mq-visibility.sh
@@ -58,9 +58,12 @@ for root in mq mqprio; do
ethtool -L $NDEV combined 4
n_child_assert 4 "One real queue, rest default"
- # Graft some
- tcq replace parent 100:1 handle 204:
- n_child_assert 3 "Grafted"
+ # Remove real one
+ tcq del parent 100:4 handle 204:
+
+ # Replace default with pfifo
+ tcq replace parent 100:1 handle 205: pfifo limit 1000
+ n_child_assert 3 "Deleting real one, replacing default one with pfifo"
ethtool -L $NDEV combined 1
n_child_assert 1 "Grafted, one"
diff --git a/tools/testing/selftests/drivers/net/stats.py b/tools/testing/selftests/drivers/net/stats.py
index 031ac9def6c0..efcc1e10575b 100755
--- a/tools/testing/selftests/drivers/net/stats.py
+++ b/tools/testing/selftests/drivers/net/stats.py
@@ -2,12 +2,15 @@
# SPDX-License-Identifier: GPL-2.0
import errno
+import subprocess
+import time
from lib.py import ksft_run, ksft_exit, ksft_pr
-from lib.py import ksft_ge, ksft_eq, ksft_in, ksft_true, ksft_raises, KsftSkipEx, KsftXfailEx
+from lib.py import ksft_ge, ksft_eq, ksft_is, ksft_in, ksft_lt, ksft_true, ksft_raises
+from lib.py import KsftSkipEx, KsftXfailEx
from lib.py import ksft_disruptive
from lib.py import EthtoolFamily, NetdevFamily, RtnlFamily, NlError
from lib.py import NetDrvEnv
-from lib.py import ip, defer
+from lib.py import cmd, ip, defer
ethnl = EthtoolFamily()
netfam = NetdevFamily()
@@ -174,10 +177,95 @@ def check_down(cfg) -> None:
netfam.qstats_get({"ifindex": cfg.ifindex, "scope": "queue"}, dump=True)
+def __run_inf_loop(body):
+ body = body.strip()
+ if body[-1] != ';':
+ body += ';'
+
+ return subprocess.Popen(f"while true; do {body} done", shell=True,
+ stdout=subprocess.PIPE, stderr=subprocess.PIPE)
+
+
+def __stats_increase_sanely(old, new) -> None:
+ for k in old.keys():
+ ksft_ge(new[k], old[k])
+ ksft_lt(new[k] - old[k], 1 << 31, comment="likely wrapping error")
+
+
+def procfs_hammer(cfg) -> None:
+ """
+ Reading stats via procfs only holds the RCU lock, which is not an exclusive
+ lock, make sure drivers can handle parallel reads of stats.
+ """
+ one = __run_inf_loop("cat /proc/net/dev")
+ defer(one.kill)
+ two = __run_inf_loop("cat /proc/net/dev")
+ defer(two.kill)
+
+ time.sleep(1)
+ # Make sure the processes are running
+ ksft_is(one.poll(), None)
+ ksft_is(two.poll(), None)
+
+ rtstat1 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
+ time.sleep(2)
+ rtstat2 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
+ __stats_increase_sanely(rtstat1, rtstat2)
+ # defers will kill the loops
+
+
+@ksft_disruptive
+def procfs_downup_hammer(cfg) -> None:
+ """
+ Reading stats via procfs only holds the RCU lock, drivers often try
+ to sleep when reading the stats, or don't protect against races.
+ """
+ # Max out the queues, we'll flip between max and 1
+ channels = ethnl.channels_get({'header': {'dev-index': cfg.ifindex}})
+ if channels['combined-count'] == 0:
+ rx_type = 'rx'
+ else:
+ rx_type = 'combined'
+ cur_queue_cnt = channels[f'{rx_type}-count']
+ max_queue_cnt = channels[f'{rx_type}-max']
+
+ cmd(f"ethtool -L {cfg.ifname} {rx_type} {max_queue_cnt}")
+ defer(cmd, f"ethtool -L {cfg.ifname} {rx_type} {cur_queue_cnt}")
+
+ # Real test stats
+ stats = __run_inf_loop("cat /proc/net/dev")
+ defer(stats.kill)
+
+ ipset = f"ip link set dev {cfg.ifname}"
+ defer(ip, f"link set dev {cfg.ifname} up")
+ # The "echo -n 1" lets us count iterations below
+ updown = f"{ipset} down; sleep 0.05; {ipset} up; sleep 0.05; " + \
+ f"ethtool -L {cfg.ifname} {rx_type} 1; " + \
+ f"ethtool -L {cfg.ifname} {rx_type} {max_queue_cnt}; " + \
+ "echo -n 1"
+ updown = __run_inf_loop(updown)
+ kill_updown = defer(updown.kill)
+
+ time.sleep(1)
+ # Make sure the processes are running
+ ksft_is(stats.poll(), None)
+ ksft_is(updown.poll(), None)
+
+ rtstat1 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
+ # We're looking for crashes, give it extra time
+ time.sleep(9)
+ rtstat2 = rtnl.getlink({"ifi-index": cfg.ifindex})['stats64']
+ __stats_increase_sanely(rtstat1, rtstat2)
+
+ kill_updown.exec()
+ stdout, _ = updown.communicate(timeout=5)
+ ksft_pr("completed up/down cycles:", len(stdout.decode('utf-8')))
+
+
def main() -> None:
with NetDrvEnv(__file__, queue_count=100) as cfg:
ksft_run([check_pause, check_fec, pkt_byte_sum, qstat_by_ifindex,
- check_down],
+ check_down, procfs_hammer, procfs_downup_hammer],
args=(cfg, ))
ksft_exit()
diff --git a/tools/testing/selftests/exec/execveat.c b/tools/testing/selftests/exec/execveat.c
index 071e03532cba..8fb7395fd35b 100644
--- a/tools/testing/selftests/exec/execveat.c
+++ b/tools/testing/selftests/exec/execveat.c
@@ -23,9 +23,11 @@
#include "../kselftest.h"
-#define TESTS_EXPECTED 51
+#define TESTS_EXPECTED 54
#define TEST_NAME_LEN (PATH_MAX * 4)
+#define CHECK_COMM "CHECK_COMM"
+
static char longpath[2 * PATH_MAX] = "";
static char *envp[] = { "IN_TEST=yes", NULL, NULL };
static char *argv[] = { "execveat", "99", NULL };
@@ -237,6 +239,29 @@ static int check_execveat_pathmax(int root_dfd, const char *src, int is_script)
return fail;
}
+static int check_execveat_comm(int fd, char *argv0, char *expected)
+{
+ char buf[128], *old_env, *old_argv0;
+ int ret;
+
+ snprintf(buf, sizeof(buf), CHECK_COMM "=%s", expected);
+
+ old_env = envp[1];
+ envp[1] = buf;
+
+ old_argv0 = argv[0];
+ argv[0] = argv0;
+
+ ksft_print_msg("Check execveat(AT_EMPTY_PATH)'s comm is %s\n",
+ expected);
+ ret = check_execveat_invoked_rc(fd, "", AT_EMPTY_PATH, 0, 0);
+
+ envp[1] = old_env;
+ argv[0] = old_argv0;
+
+ return ret;
+}
+
static int run_tests(void)
{
int fail = 0;
@@ -389,6 +414,14 @@ static int run_tests(void)
fail += check_execveat_pathmax(root_dfd, "execveat", 0);
fail += check_execveat_pathmax(root_dfd, "script", 1);
+
+ /* /proc/pid/comm gives filename by default */
+ fail += check_execveat_comm(fd, "sentinel", "execveat");
+ /* /proc/pid/comm gives argv[0] when invoked via link */
+ fail += check_execveat_comm(fd_symlink, "sentinel", "execveat");
+ /* /proc/pid/comm gives filename if NULL is passed */
+ fail += check_execveat_comm(fd, NULL, "execveat");
+
return fail;
}
@@ -415,9 +448,13 @@ int main(int argc, char **argv)
int ii;
int rc;
const char *verbose = getenv("VERBOSE");
+ const char *check_comm = getenv(CHECK_COMM);
- if (argc >= 2) {
- /* If we are invoked with an argument, don't run tests. */
+ if (argc >= 2 || check_comm) {
+ /*
+ * If we are invoked with an argument, or no arguments but a
+ * command to check, don't run tests.
+ */
const char *in_test = getenv("IN_TEST");
if (verbose) {
@@ -426,6 +463,38 @@ int main(int argc, char **argv)
ksft_print_msg("\t[%d]='%s\n'", ii, argv[ii]);
}
+ /* If the tests wanted us to check the command, do so. */
+ if (check_comm) {
+ /* TASK_COMM_LEN == 16 */
+ char buf[32];
+ int fd, ret;
+
+ fd = open("/proc/self/comm", O_RDONLY);
+ if (fd < 0) {
+ ksft_perror("open() comm failed");
+ exit(1);
+ }
+
+ ret = read(fd, buf, sizeof(buf));
+ if (ret < 0) {
+ ksft_perror("read() comm failed");
+ close(fd);
+ exit(1);
+ }
+ close(fd);
+
+ // trim off the \n
+ buf[ret-1] = 0;
+
+ if (strcmp(buf, check_comm)) {
+ ksft_print_msg("bad comm, got: %s expected: %s\n",
+ buf, check_comm);
+ exit(1);
+ }
+
+ exit(0);
+ }
+
/* Check expected environment transferred. */
if (!in_test || strcmp(in_test, "yes") != 0) {
ksft_print_msg("no IN_TEST=yes in env\n");
diff --git a/tools/testing/selftests/nsfs/.gitignore b/tools/testing/selftests/filesystems/nsfs/.gitignore
index ed79ebdf286e..92a8249006d1 100644
--- a/tools/testing/selftests/nsfs/.gitignore
+++ b/tools/testing/selftests/filesystems/nsfs/.gitignore
@@ -1,3 +1,4 @@
# SPDX-License-Identifier: GPL-2.0-only
owner
pidns
+iterate_mntns
diff --git a/tools/testing/selftests/nsfs/Makefile b/tools/testing/selftests/filesystems/nsfs/Makefile
index dd9bd50b7b93..231aaa7dfd95 100644
--- a/tools/testing/selftests/nsfs/Makefile
+++ b/tools/testing/selftests/filesystems/nsfs/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-only
-TEST_GEN_PROGS := owner pidns
+TEST_GEN_PROGS := owner pidns iterate_mntns
CFLAGS := -Wall -Werror
-include ../lib.mk
+include ../../lib.mk
diff --git a/tools/testing/selftests/nsfs/config b/tools/testing/selftests/filesystems/nsfs/config
index 598d0a225fc9..598d0a225fc9 100644
--- a/tools/testing/selftests/nsfs/config
+++ b/tools/testing/selftests/filesystems/nsfs/config
diff --git a/tools/testing/selftests/filesystems/nsfs/iterate_mntns.c b/tools/testing/selftests/filesystems/nsfs/iterate_mntns.c
new file mode 100644
index 000000000000..457cf76f3c5f
--- /dev/null
+++ b/tools/testing/selftests/filesystems/nsfs/iterate_mntns.c
@@ -0,0 +1,149 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright (c) 2024 Christian Brauner <brauner@kernel.org>
+
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <sched.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <unistd.h>
+
+#include "../../kselftest_harness.h"
+
+#define MNT_NS_COUNT 11
+#define MNT_NS_LAST_INDEX 10
+
+struct mnt_ns_info {
+ __u32 size;
+ __u32 nr_mounts;
+ __u64 mnt_ns_id;
+};
+
+#define MNT_NS_INFO_SIZE_VER0 16 /* size of first published struct */
+
+/* Get information about namespace. */
+#define NS_MNT_GET_INFO _IOR(0xb7, 10, struct mnt_ns_info)
+/* Get next namespace. */
+#define NS_MNT_GET_NEXT _IOR(0xb7, 11, struct mnt_ns_info)
+/* Get previous namespace. */
+#define NS_MNT_GET_PREV _IOR(0xb7, 12, struct mnt_ns_info)
+
+FIXTURE(iterate_mount_namespaces) {
+ int fd_mnt_ns[MNT_NS_COUNT];
+ __u64 mnt_ns_id[MNT_NS_COUNT];
+};
+
+FIXTURE_SETUP(iterate_mount_namespaces)
+{
+ for (int i = 0; i < MNT_NS_COUNT; i++)
+ self->fd_mnt_ns[i] = -EBADF;
+
+ /*
+ * Creating a new user namespace let's us guarantee that we only see
+ * mount namespaces that we did actually create.
+ */
+ ASSERT_EQ(unshare(CLONE_NEWUSER), 0);
+
+ for (int i = 0; i < MNT_NS_COUNT; i++) {
+ struct mnt_ns_info info = {};
+
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+ self->fd_mnt_ns[i] = open("/proc/self/ns/mnt", O_RDONLY | O_CLOEXEC);
+ ASSERT_GE(self->fd_mnt_ns[i], 0);
+ ASSERT_EQ(ioctl(self->fd_mnt_ns[i], NS_MNT_GET_INFO, &info), 0);
+ self->mnt_ns_id[i] = info.mnt_ns_id;
+ }
+}
+
+FIXTURE_TEARDOWN(iterate_mount_namespaces)
+{
+ for (int i = 0; i < MNT_NS_COUNT; i++) {
+ if (self->fd_mnt_ns[i] < 0)
+ continue;
+ ASSERT_EQ(close(self->fd_mnt_ns[i]), 0);
+ }
+}
+
+TEST_F(iterate_mount_namespaces, iterate_all_forward)
+{
+ int fd_mnt_ns_cur, count = 0;
+
+ fd_mnt_ns_cur = fcntl(self->fd_mnt_ns[0], F_DUPFD_CLOEXEC);
+ ASSERT_GE(fd_mnt_ns_cur, 0);
+
+ for (;; count++) {
+ struct mnt_ns_info info = {};
+ int fd_mnt_ns_next;
+
+ fd_mnt_ns_next = ioctl(fd_mnt_ns_cur, NS_MNT_GET_NEXT, &info);
+ if (fd_mnt_ns_next < 0 && errno == ENOENT)
+ break;
+ ASSERT_GE(fd_mnt_ns_next, 0);
+ ASSERT_EQ(close(fd_mnt_ns_cur), 0);
+ fd_mnt_ns_cur = fd_mnt_ns_next;
+ }
+ ASSERT_EQ(count, MNT_NS_LAST_INDEX);
+}
+
+TEST_F(iterate_mount_namespaces, iterate_all_backwards)
+{
+ int fd_mnt_ns_cur, count = 0;
+
+ fd_mnt_ns_cur = fcntl(self->fd_mnt_ns[MNT_NS_LAST_INDEX], F_DUPFD_CLOEXEC);
+ ASSERT_GE(fd_mnt_ns_cur, 0);
+
+ for (;; count++) {
+ struct mnt_ns_info info = {};
+ int fd_mnt_ns_prev;
+
+ fd_mnt_ns_prev = ioctl(fd_mnt_ns_cur, NS_MNT_GET_PREV, &info);
+ if (fd_mnt_ns_prev < 0 && errno == ENOENT)
+ break;
+ ASSERT_GE(fd_mnt_ns_prev, 0);
+ ASSERT_EQ(close(fd_mnt_ns_cur), 0);
+ fd_mnt_ns_cur = fd_mnt_ns_prev;
+ }
+ ASSERT_EQ(count, MNT_NS_LAST_INDEX);
+}
+
+TEST_F(iterate_mount_namespaces, iterate_forward)
+{
+ int fd_mnt_ns_cur;
+
+ ASSERT_EQ(setns(self->fd_mnt_ns[0], CLONE_NEWNS), 0);
+
+ fd_mnt_ns_cur = self->fd_mnt_ns[0];
+ for (int i = 1; i < MNT_NS_COUNT; i++) {
+ struct mnt_ns_info info = {};
+ int fd_mnt_ns_next;
+
+ fd_mnt_ns_next = ioctl(fd_mnt_ns_cur, NS_MNT_GET_NEXT, &info);
+ ASSERT_GE(fd_mnt_ns_next, 0);
+ ASSERT_EQ(close(fd_mnt_ns_cur), 0);
+ fd_mnt_ns_cur = fd_mnt_ns_next;
+ ASSERT_EQ(info.mnt_ns_id, self->mnt_ns_id[i]);
+ }
+}
+
+TEST_F(iterate_mount_namespaces, iterate_backward)
+{
+ int fd_mnt_ns_cur;
+
+ ASSERT_EQ(setns(self->fd_mnt_ns[MNT_NS_LAST_INDEX], CLONE_NEWNS), 0);
+
+ fd_mnt_ns_cur = self->fd_mnt_ns[MNT_NS_LAST_INDEX];
+ for (int i = MNT_NS_LAST_INDEX - 1; i >= 0; i--) {
+ struct mnt_ns_info info = {};
+ int fd_mnt_ns_prev;
+
+ fd_mnt_ns_prev = ioctl(fd_mnt_ns_cur, NS_MNT_GET_PREV, &info);
+ ASSERT_GE(fd_mnt_ns_prev, 0);
+ ASSERT_EQ(close(fd_mnt_ns_cur), 0);
+ fd_mnt_ns_cur = fd_mnt_ns_prev;
+ ASSERT_EQ(info.mnt_ns_id, self->mnt_ns_id[i]);
+ }
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/nsfs/owner.c b/tools/testing/selftests/filesystems/nsfs/owner.c
index 96a976c74550..96a976c74550 100644
--- a/tools/testing/selftests/nsfs/owner.c
+++ b/tools/testing/selftests/filesystems/nsfs/owner.c
diff --git a/tools/testing/selftests/nsfs/pidns.c b/tools/testing/selftests/filesystems/nsfs/pidns.c
index e3c772c6a7c7..e3c772c6a7c7 100644
--- a/tools/testing/selftests/nsfs/pidns.c
+++ b/tools/testing/selftests/filesystems/nsfs/pidns.c
diff --git a/tools/testing/selftests/filesystems/statmount/Makefile b/tools/testing/selftests/filesystems/statmount/Makefile
index 3af3136e35a4..14ee91a41650 100644
--- a/tools/testing/selftests/filesystems/statmount/Makefile
+++ b/tools/testing/selftests/filesystems/statmount/Makefile
@@ -1,6 +1,6 @@
# SPDX-License-Identifier: GPL-2.0-or-later
CFLAGS += -Wall -O2 -g $(KHDR_INCLUDES)
-TEST_GEN_PROGS := statmount_test statmount_test_ns
+TEST_GEN_PROGS := statmount_test statmount_test_ns listmount_test
include ../../lib.mk
diff --git a/tools/testing/selftests/filesystems/statmount/listmount_test.c b/tools/testing/selftests/filesystems/statmount/listmount_test.c
new file mode 100644
index 000000000000..15f0834f7557
--- /dev/null
+++ b/tools/testing/selftests/filesystems/statmount/listmount_test.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright (c) 2024 Christian Brauner <brauner@kernel.org>
+
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <sched.h>
+#include <stdio.h>
+#include <string.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <unistd.h>
+
+#include "statmount.h"
+#include "../../kselftest_harness.h"
+
+#ifndef LISTMOUNT_REVERSE
+#define LISTMOUNT_REVERSE (1 << 0) /* List later mounts first */
+#endif
+
+#define LISTMNT_BUFFER 10
+
+/* Check that all mount ids are in increasing order. */
+TEST(listmount_forward)
+{
+ uint64_t list[LISTMNT_BUFFER], last_mnt_id = 0;
+
+ for (;;) {
+ ssize_t nr_mounts;
+
+ nr_mounts = listmount(LSMT_ROOT, 0, last_mnt_id,
+ list, LISTMNT_BUFFER, 0);
+ ASSERT_GE(nr_mounts, 0);
+ if (nr_mounts == 0)
+ break;
+
+ for (size_t cur = 0; cur < nr_mounts; cur++) {
+ if (cur < nr_mounts - 1)
+ ASSERT_LT(list[cur], list[cur + 1]);
+ last_mnt_id = list[cur];
+ }
+ }
+}
+
+/* Check that all mount ids are in decreasing order. */
+TEST(listmount_backward)
+{
+ uint64_t list[LISTMNT_BUFFER], last_mnt_id = 0;
+
+ for (;;) {
+ ssize_t nr_mounts;
+
+ nr_mounts = listmount(LSMT_ROOT, 0, last_mnt_id,
+ list, LISTMNT_BUFFER, LISTMOUNT_REVERSE);
+ ASSERT_GE(nr_mounts, 0);
+ if (nr_mounts == 0)
+ break;
+
+ for (size_t cur = 0; cur < nr_mounts; cur++) {
+ if (cur < nr_mounts - 1)
+ ASSERT_GT(list[cur], list[cur + 1]);
+ last_mnt_id = list[cur];
+ }
+ }
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe_repeat.tc b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe_repeat.tc
new file mode 100644
index 000000000000..b4ad09237e2a
--- /dev/null
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/add_remove_fprobe_repeat.tc
@@ -0,0 +1,19 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+# description: Generic dynamic event - Repeating add/remove fprobe events
+# requires: dynamic_events "f[:[<group>/][<event>]] <func-name>[%return] [<args>]":README
+
+echo 0 > events/enable
+echo > dynamic_events
+
+PLACE=$FUNCTION_FORK
+REPEAT_TIMES=64
+
+for i in `seq 1 $REPEAT_TIMES`; do
+ echo "f:myevent $PLACE" >> dynamic_events
+ grep -q myevent dynamic_events
+ test -d events/fprobes/myevent
+ echo > dynamic_events
+done
+
+clear_trace
diff --git a/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc
index 61877d166451..c9425a34fae3 100644
--- a/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc
+++ b/tools/testing/selftests/ftrace/test.d/dynevent/fprobe_syntax_errors.tc
@@ -16,9 +16,7 @@ aarch64)
REG=%r0 ;;
esac
-check_error 'f^100 vfs_read' # MAXACT_NO_KPROBE
-check_error 'f^1a111 vfs_read' # BAD_MAXACT
-check_error 'f^100000 vfs_read' # MAXACT_TOO_BIG
+check_error 'f^100 vfs_read' # BAD_MAXACT
check_error 'f ^non_exist_func' # BAD_PROBE_ADDR (enoent)
check_error 'f ^vfs_read+10' # BAD_PROBE_ADDR
diff --git a/tools/testing/selftests/kselftest/ktap_helpers.sh b/tools/testing/selftests/kselftest/ktap_helpers.sh
index 79a125eb24c2..05a461890671 100644
--- a/tools/testing/selftests/kselftest/ktap_helpers.sh
+++ b/tools/testing/selftests/kselftest/ktap_helpers.sh
@@ -7,6 +7,7 @@
KTAP_TESTNO=1
KTAP_CNT_PASS=0
KTAP_CNT_FAIL=0
+KTAP_CNT_XFAIL=0
KTAP_CNT_SKIP=0
KSFT_PASS=0
@@ -69,6 +70,16 @@ ktap_test_skip() {
KTAP_CNT_SKIP=$((KTAP_CNT_SKIP+1))
}
+ktap_test_xfail() {
+ description="$1"
+
+ result="ok"
+ directive="XFAIL"
+ __ktap_test "$result" "$description" "$directive"
+
+ KTAP_CNT_XFAIL=$((KTAP_CNT_XFAIL+1))
+}
+
ktap_test_fail() {
description="$1"
@@ -99,7 +110,7 @@ ktap_exit_fail_msg() {
ktap_finished() {
ktap_print_totals
- if [ $((KTAP_CNT_PASS + KTAP_CNT_SKIP)) -eq "$KSFT_NUM_TESTS" ]; then
+ if [ $((KTAP_CNT_PASS + KTAP_CNT_SKIP + KTAP_CNT_XFAIL)) -eq "$KSFT_NUM_TESTS" ]; then
exit "$KSFT_PASS"
else
exit "$KSFT_FAIL"
@@ -107,5 +118,5 @@ ktap_finished() {
}
ktap_print_totals() {
- echo "# Totals: pass:$KTAP_CNT_PASS fail:$KTAP_CNT_FAIL xfail:0 xpass:0 skip:$KTAP_CNT_SKIP error:0"
+ echo "# Totals: pass:$KTAP_CNT_PASS fail:$KTAP_CNT_FAIL xfail:$KTAP_CNT_XFAIL xpass:0 skip:$KTAP_CNT_SKIP error:0"
}
diff --git a/tools/testing/selftests/kvm/aarch64/set_id_regs.c b/tools/testing/selftests/kvm/aarch64/set_id_regs.c
index a79b7f18452d..3a97c160b5fe 100644
--- a/tools/testing/selftests/kvm/aarch64/set_id_regs.c
+++ b/tools/testing/selftests/kvm/aarch64/set_id_regs.c
@@ -152,7 +152,6 @@ static const struct reg_ftr_bits ftr_id_aa64mmfr0_el1[] = {
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, BIGENDEL0, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, SNSMEM, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, BIGEND, 0),
- REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, ASIDBITS, 0),
REG_FTR_BITS(FTR_LOWER_SAFE, ID_AA64MMFR0_EL1, PARANGE, 0),
REG_FTR_END,
};
diff --git a/tools/testing/selftests/kvm/s390x/ucontrol_test.c b/tools/testing/selftests/kvm/s390x/ucontrol_test.c
index 0c112319dab1..135ee22856cf 100644
--- a/tools/testing/selftests/kvm/s390x/ucontrol_test.c
+++ b/tools/testing/selftests/kvm/s390x/ucontrol_test.c
@@ -210,10 +210,13 @@ TEST_F(uc_kvm, uc_attr_mem_limit)
struct kvm_device_attr attr = {
.group = KVM_S390_VM_MEM_CTRL,
.attr = KVM_S390_VM_MEM_LIMIT_SIZE,
- .addr = (unsigned long)&limit,
+ .addr = (u64)&limit,
};
int rc;
+ rc = ioctl(self->vm_fd, KVM_HAS_DEVICE_ATTR, &attr);
+ EXPECT_EQ(0, rc);
+
rc = ioctl(self->vm_fd, KVM_GET_DEVICE_ATTR, &attr);
EXPECT_EQ(0, rc);
EXPECT_EQ(~0UL, limit);
@@ -635,4 +638,171 @@ TEST_F(uc_kvm, uc_skey)
uc_assert_diag44(self);
}
+static char uc_flic_b[PAGE_SIZE];
+static struct kvm_s390_io_adapter uc_flic_ioa = { .id = 0 };
+static struct kvm_s390_io_adapter_req uc_flic_ioam = { .id = 0 };
+static struct kvm_s390_ais_req uc_flic_asim = { .isc = 0 };
+static struct kvm_s390_ais_all uc_flic_asima = { .simm = 0 };
+static struct uc_flic_attr_test {
+ char *name;
+ struct kvm_device_attr a;
+ int hasrc;
+ int geterrno;
+ int seterrno;
+} uc_flic_attr_tests[] = {
+ {
+ .name = "KVM_DEV_FLIC_GET_ALL_IRQS",
+ .seterrno = EINVAL,
+ .a = {
+ .group = KVM_DEV_FLIC_GET_ALL_IRQS,
+ .addr = (u64)&uc_flic_b,
+ .attr = PAGE_SIZE,
+ },
+ },
+ {
+ .name = "KVM_DEV_FLIC_ENQUEUE",
+ .geterrno = EINVAL,
+ .a = { .group = KVM_DEV_FLIC_ENQUEUE, },
+ },
+ {
+ .name = "KVM_DEV_FLIC_CLEAR_IRQS",
+ .geterrno = EINVAL,
+ .a = { .group = KVM_DEV_FLIC_CLEAR_IRQS, },
+ },
+ {
+ .name = "KVM_DEV_FLIC_ADAPTER_REGISTER",
+ .geterrno = EINVAL,
+ .a = {
+ .group = KVM_DEV_FLIC_ADAPTER_REGISTER,
+ .addr = (u64)&uc_flic_ioa,
+ },
+ },
+ {
+ .name = "KVM_DEV_FLIC_ADAPTER_MODIFY",
+ .geterrno = EINVAL,
+ .seterrno = EINVAL,
+ .a = {
+ .group = KVM_DEV_FLIC_ADAPTER_MODIFY,
+ .addr = (u64)&uc_flic_ioam,
+ .attr = sizeof(uc_flic_ioam),
+ },
+ },
+ {
+ .name = "KVM_DEV_FLIC_CLEAR_IO_IRQ",
+ .geterrno = EINVAL,
+ .seterrno = EINVAL,
+ .a = {
+ .group = KVM_DEV_FLIC_CLEAR_IO_IRQ,
+ .attr = 32,
+ },
+ },
+ {
+ .name = "KVM_DEV_FLIC_AISM",
+ .geterrno = EINVAL,
+ .seterrno = ENOTSUP,
+ .a = {
+ .group = KVM_DEV_FLIC_AISM,
+ .addr = (u64)&uc_flic_asim,
+ },
+ },
+ {
+ .name = "KVM_DEV_FLIC_AIRQ_INJECT",
+ .geterrno = EINVAL,
+ .a = { .group = KVM_DEV_FLIC_AIRQ_INJECT, },
+ },
+ {
+ .name = "KVM_DEV_FLIC_AISM_ALL",
+ .geterrno = ENOTSUP,
+ .seterrno = ENOTSUP,
+ .a = {
+ .group = KVM_DEV_FLIC_AISM_ALL,
+ .addr = (u64)&uc_flic_asima,
+ .attr = sizeof(uc_flic_asima),
+ },
+ },
+ {
+ .name = "KVM_DEV_FLIC_APF_ENABLE",
+ .geterrno = EINVAL,
+ .seterrno = EINVAL,
+ .a = { .group = KVM_DEV_FLIC_APF_ENABLE, },
+ },
+ {
+ .name = "KVM_DEV_FLIC_APF_DISABLE_WAIT",
+ .geterrno = EINVAL,
+ .seterrno = EINVAL,
+ .a = { .group = KVM_DEV_FLIC_APF_DISABLE_WAIT, },
+ },
+};
+
+TEST_F(uc_kvm, uc_flic_attrs)
+{
+ struct kvm_create_device cd = { .type = KVM_DEV_TYPE_FLIC };
+ struct kvm_device_attr attr;
+ u64 value;
+ int rc, i;
+
+ rc = ioctl(self->vm_fd, KVM_CREATE_DEVICE, &cd);
+ ASSERT_EQ(0, rc) TH_LOG("create device failed with err %s (%i)",
+ strerror(errno), errno);
+
+ for (i = 0; i < ARRAY_SIZE(uc_flic_attr_tests); i++) {
+ TH_LOG("test %s", uc_flic_attr_tests[i].name);
+ attr = (struct kvm_device_attr) {
+ .group = uc_flic_attr_tests[i].a.group,
+ .attr = uc_flic_attr_tests[i].a.attr,
+ .addr = uc_flic_attr_tests[i].a.addr,
+ };
+ if (attr.addr == 0)
+ attr.addr = (u64)&value;
+
+ rc = ioctl(cd.fd, KVM_HAS_DEVICE_ATTR, &attr);
+ EXPECT_EQ(uc_flic_attr_tests[i].hasrc, !!rc)
+ TH_LOG("expected dev attr missing %s",
+ uc_flic_attr_tests[i].name);
+
+ rc = ioctl(cd.fd, KVM_GET_DEVICE_ATTR, &attr);
+ EXPECT_EQ(!!uc_flic_attr_tests[i].geterrno, !!rc)
+ TH_LOG("get dev attr rc not expected on %s %s (%i)",
+ uc_flic_attr_tests[i].name,
+ strerror(errno), errno);
+ if (uc_flic_attr_tests[i].geterrno)
+ EXPECT_EQ(uc_flic_attr_tests[i].geterrno, errno)
+ TH_LOG("get dev attr errno not expected on %s %s (%i)",
+ uc_flic_attr_tests[i].name,
+ strerror(errno), errno);
+
+ rc = ioctl(cd.fd, KVM_SET_DEVICE_ATTR, &attr);
+ EXPECT_EQ(!!uc_flic_attr_tests[i].seterrno, !!rc)
+ TH_LOG("set sev attr rc not expected on %s %s (%i)",
+ uc_flic_attr_tests[i].name,
+ strerror(errno), errno);
+ if (uc_flic_attr_tests[i].seterrno)
+ EXPECT_EQ(uc_flic_attr_tests[i].seterrno, errno)
+ TH_LOG("set dev attr errno not expected on %s %s (%i)",
+ uc_flic_attr_tests[i].name,
+ strerror(errno), errno);
+ }
+
+ close(cd.fd);
+}
+
+TEST_F(uc_kvm, uc_set_gsi_routing)
+{
+ struct kvm_irq_routing *routing = kvm_gsi_routing_create();
+ struct kvm_irq_routing_entry ue = {
+ .type = KVM_IRQ_ROUTING_S390_ADAPTER,
+ .gsi = 1,
+ .u.adapter = (struct kvm_irq_routing_s390_adapter) {
+ .ind_addr = 0,
+ },
+ };
+ int rc;
+
+ routing->entries[0] = ue;
+ routing->nr = 1;
+ rc = ioctl(self->vm_fd, KVM_SET_GSI_ROUTING, routing);
+ ASSERT_EQ(-1, rc) TH_LOG("err %s (%i)", strerror(errno), errno);
+ ASSERT_EQ(EINVAL, errno) TH_LOG("err %s (%i)", strerror(errno), errno);
+}
+
TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/livepatch/test-callbacks.sh b/tools/testing/selftests/livepatch/test-callbacks.sh
index 37bbc3fb2780..2a03deb26a12 100755
--- a/tools/testing/selftests/livepatch/test-callbacks.sh
+++ b/tools/testing/selftests/livepatch/test-callbacks.sh
@@ -259,7 +259,7 @@ $MOD_TARGET: ${MOD_TARGET}_init
% insmod test_modules/$MOD_LIVEPATCH.ko pre_patch_ret=-19
livepatch: enabling patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': initializing patching transition
-test_klp_callbacks_demo: pre_patch_callback: vmlinux
+$MOD_LIVEPATCH: pre_patch_callback: vmlinux
livepatch: pre-patch callback failed for object 'vmlinux'
livepatch: failed to enable patch '$MOD_LIVEPATCH'
livepatch: '$MOD_LIVEPATCH': canceling patching transition, going to unpatch
diff --git a/tools/testing/selftests/livepatch/test-sysfs.sh b/tools/testing/selftests/livepatch/test-sysfs.sh
index 2c91428d2997..58fe1d96997c 100755
--- a/tools/testing/selftests/livepatch/test-sysfs.sh
+++ b/tools/testing/selftests/livepatch/test-sysfs.sh
@@ -5,6 +5,8 @@
. $(dirname $0)/functions.sh
MOD_LIVEPATCH=test_klp_livepatch
+MOD_LIVEPATCH2=test_klp_callbacks_demo
+MOD_LIVEPATCH3=test_klp_syscall
setup_config
@@ -19,6 +21,8 @@ check_sysfs_rights "$MOD_LIVEPATCH" "enabled" "-rw-r--r--"
check_sysfs_value "$MOD_LIVEPATCH" "enabled" "1"
check_sysfs_rights "$MOD_LIVEPATCH" "force" "--w-------"
check_sysfs_rights "$MOD_LIVEPATCH" "replace" "-r--r--r--"
+check_sysfs_rights "$MOD_LIVEPATCH" "stack_order" "-r--r--r--"
+check_sysfs_value "$MOD_LIVEPATCH" "stack_order" "1"
check_sysfs_rights "$MOD_LIVEPATCH" "transition" "-r--r--r--"
check_sysfs_value "$MOD_LIVEPATCH" "transition" "0"
check_sysfs_rights "$MOD_LIVEPATCH" "vmlinux/patched" "-r--r--r--"
@@ -131,4 +135,71 @@ livepatch: '$MOD_LIVEPATCH': completing unpatching transition
livepatch: '$MOD_LIVEPATCH': unpatching complete
% rmmod $MOD_LIVEPATCH"
+start_test "sysfs test stack_order value"
+
+load_lp $MOD_LIVEPATCH
+
+check_sysfs_value "$MOD_LIVEPATCH" "stack_order" "1"
+
+load_lp $MOD_LIVEPATCH2
+
+check_sysfs_value "$MOD_LIVEPATCH2" "stack_order" "2"
+
+load_lp $MOD_LIVEPATCH3
+
+check_sysfs_value "$MOD_LIVEPATCH3" "stack_order" "3"
+
+disable_lp $MOD_LIVEPATCH2
+unload_lp $MOD_LIVEPATCH2
+
+check_sysfs_value "$MOD_LIVEPATCH" "stack_order" "1"
+check_sysfs_value "$MOD_LIVEPATCH3" "stack_order" "2"
+
+disable_lp $MOD_LIVEPATCH3
+unload_lp $MOD_LIVEPATCH3
+
+disable_lp $MOD_LIVEPATCH
+unload_lp $MOD_LIVEPATCH
+
+check_result "% insmod test_modules/$MOD_LIVEPATCH.ko
+livepatch: enabling patch '$MOD_LIVEPATCH'
+livepatch: '$MOD_LIVEPATCH': initializing patching transition
+livepatch: '$MOD_LIVEPATCH': starting patching transition
+livepatch: '$MOD_LIVEPATCH': completing patching transition
+livepatch: '$MOD_LIVEPATCH': patching complete
+% insmod test_modules/$MOD_LIVEPATCH2.ko
+livepatch: enabling patch '$MOD_LIVEPATCH2'
+livepatch: '$MOD_LIVEPATCH2': initializing patching transition
+$MOD_LIVEPATCH2: pre_patch_callback: vmlinux
+livepatch: '$MOD_LIVEPATCH2': starting patching transition
+livepatch: '$MOD_LIVEPATCH2': completing patching transition
+$MOD_LIVEPATCH2: post_patch_callback: vmlinux
+livepatch: '$MOD_LIVEPATCH2': patching complete
+% insmod test_modules/$MOD_LIVEPATCH3.ko
+livepatch: enabling patch '$MOD_LIVEPATCH3'
+livepatch: '$MOD_LIVEPATCH3': initializing patching transition
+livepatch: '$MOD_LIVEPATCH3': starting patching transition
+livepatch: '$MOD_LIVEPATCH3': completing patching transition
+livepatch: '$MOD_LIVEPATCH3': patching complete
+% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH2/enabled
+livepatch: '$MOD_LIVEPATCH2': initializing unpatching transition
+$MOD_LIVEPATCH2: pre_unpatch_callback: vmlinux
+livepatch: '$MOD_LIVEPATCH2': starting unpatching transition
+livepatch: '$MOD_LIVEPATCH2': completing unpatching transition
+$MOD_LIVEPATCH2: post_unpatch_callback: vmlinux
+livepatch: '$MOD_LIVEPATCH2': unpatching complete
+% rmmod $MOD_LIVEPATCH2
+% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH3/enabled
+livepatch: '$MOD_LIVEPATCH3': initializing unpatching transition
+livepatch: '$MOD_LIVEPATCH3': starting unpatching transition
+livepatch: '$MOD_LIVEPATCH3': completing unpatching transition
+livepatch: '$MOD_LIVEPATCH3': unpatching complete
+% rmmod $MOD_LIVEPATCH3
+% echo 0 > $SYSFS_KLP_DIR/$MOD_LIVEPATCH/enabled
+livepatch: '$MOD_LIVEPATCH': initializing unpatching transition
+livepatch: '$MOD_LIVEPATCH': starting unpatching transition
+livepatch: '$MOD_LIVEPATCH': completing unpatching transition
+livepatch: '$MOD_LIVEPATCH': unpatching complete
+% rmmod $MOD_LIVEPATCH"
+
exit 0
diff --git a/tools/testing/selftests/lsm/lsm_set_self_attr_test.c b/tools/testing/selftests/lsm/lsm_set_self_attr_test.c
index 66dec47e3ca3..732e89fe99c0 100644
--- a/tools/testing/selftests/lsm/lsm_set_self_attr_test.c
+++ b/tools/testing/selftests/lsm/lsm_set_self_attr_test.c
@@ -56,16 +56,15 @@ TEST(flags_zero_lsm_set_self_attr)
TEST(flags_overset_lsm_set_self_attr)
{
const long page_size = sysconf(_SC_PAGESIZE);
- char *ctx = calloc(page_size, 1);
+ struct lsm_ctx *ctx = calloc(page_size, 1);
__u32 size = page_size;
- struct lsm_ctx *tctx = (struct lsm_ctx *)ctx;
ASSERT_NE(NULL, ctx);
if (attr_lsm_count()) {
- ASSERT_LE(1, lsm_get_self_attr(LSM_ATTR_CURRENT, tctx, &size,
+ ASSERT_LE(1, lsm_get_self_attr(LSM_ATTR_CURRENT, ctx, &size,
0));
}
- ASSERT_EQ(-1, lsm_set_self_attr(LSM_ATTR_CURRENT | LSM_ATTR_PREV, tctx,
+ ASSERT_EQ(-1, lsm_set_self_attr(LSM_ATTR_CURRENT | LSM_ATTR_PREV, ctx,
size, 0));
free(ctx);
diff --git a/tools/testing/selftests/mm/cow.c b/tools/testing/selftests/mm/cow.c
index 32c6ccc2a6be..1238e1c5aae1 100644
--- a/tools/testing/selftests/mm/cow.c
+++ b/tools/testing/selftests/mm/cow.c
@@ -758,7 +758,7 @@ static void do_run_with_base_page(test_fn fn, bool swapout)
}
/* Populate a base page. */
- memset(mem, 0, pagesize);
+ memset(mem, 1, pagesize);
if (swapout) {
madvise(mem, pagesize, MADV_PAGEOUT);
@@ -824,12 +824,12 @@ static void do_run_with_thp(test_fn fn, enum thp_run thp_run, size_t thpsize)
* Try to populate a THP. Touch the first sub-page and test if
* we get the last sub-page populated automatically.
*/
- mem[0] = 0;
+ mem[0] = 1;
if (!pagemap_is_populated(pagemap_fd, mem + thpsize - pagesize)) {
ksft_test_result_skip("Did not get a THP populated\n");
goto munmap;
}
- memset(mem, 0, thpsize);
+ memset(mem, 1, thpsize);
size = thpsize;
switch (thp_run) {
@@ -1012,7 +1012,7 @@ static void run_with_hugetlb(test_fn fn, const char *desc, size_t hugetlbsize)
}
/* Populate an huge page. */
- memset(mem, 0, hugetlbsize);
+ memset(mem, 1, hugetlbsize);
/*
* We need a total of two hugetlb pages to handle COW/unsharing
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index cb2fc601de66..73ee88d6b043 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -32,6 +32,7 @@ TEST_PROGS += ioam6.sh
TEST_PROGS += gro.sh
TEST_PROGS += gre_gso.sh
TEST_PROGS += cmsg_so_mark.sh
+TEST_PROGS += cmsg_so_priority.sh
TEST_PROGS += cmsg_time.sh cmsg_ipv6.sh
TEST_PROGS += netns-name.sh
TEST_PROGS += nl_netdev.py
@@ -95,6 +96,7 @@ TEST_PROGS += test_bridge_backup_port.sh
TEST_PROGS += fdb_flush.sh fdb_notify.sh
TEST_PROGS += fq_band_pktlimit.sh
TEST_PROGS += vlan_hw_filter.sh
+TEST_PROGS += vlan_bridge_binding.sh
TEST_PROGS += bpf_offload.py
TEST_PROGS += ipv6_route_update_soft_lockup.sh
TEST_PROGS += busy_poll_test.sh
diff --git a/tools/testing/selftests/net/busy_poller.c b/tools/testing/selftests/net/busy_poller.c
index 99b0e8c17fca..04c7ff577bb8 100644
--- a/tools/testing/selftests/net/busy_poller.c
+++ b/tools/testing/selftests/net/busy_poller.c
@@ -54,16 +54,16 @@ struct epoll_params {
#define EPIOCGPARAMS _IOR(EPOLL_IOC_TYPE, 0x02, struct epoll_params)
#endif
-static uint32_t cfg_port = 8000;
+static uint16_t cfg_port = 8000;
static struct in_addr cfg_bind_addr = { .s_addr = INADDR_ANY };
static char *cfg_outfile;
static int cfg_max_events = 8;
-static int cfg_ifindex;
+static uint32_t cfg_ifindex;
/* busy poll params */
static uint32_t cfg_busy_poll_usecs;
-static uint32_t cfg_busy_poll_budget;
-static uint32_t cfg_prefer_busy_poll;
+static uint16_t cfg_busy_poll_budget;
+static uint8_t cfg_prefer_busy_poll;
/* IRQ params */
static uint32_t cfg_defer_hard_irqs;
@@ -79,6 +79,7 @@ static void usage(const char *filepath)
static void parse_opts(int argc, char **argv)
{
+ unsigned long long tmp;
int ret;
int c;
@@ -86,31 +87,40 @@ static void parse_opts(int argc, char **argv)
usage(argv[0]);
while ((c = getopt(argc, argv, "p:m:b:u:P:g:o:d:r:s:i:")) != -1) {
+ /* most options take integer values, except o and b, so reduce
+ * code duplication a bit for the common case by calling
+ * strtoull here and leave bounds checking and casting per
+ * option below.
+ */
+ if (c != 'o' && c != 'b')
+ tmp = strtoull(optarg, NULL, 0);
+
switch (c) {
case 'u':
- cfg_busy_poll_usecs = strtoul(optarg, NULL, 0);
- if (cfg_busy_poll_usecs == ULONG_MAX ||
- cfg_busy_poll_usecs > UINT32_MAX)
+ if (tmp == ULLONG_MAX || tmp > UINT32_MAX)
error(1, ERANGE, "busy_poll_usecs too large");
+
+ cfg_busy_poll_usecs = (uint32_t)tmp;
break;
case 'P':
- cfg_prefer_busy_poll = strtoul(optarg, NULL, 0);
- if (cfg_prefer_busy_poll == ULONG_MAX ||
- cfg_prefer_busy_poll > 1)
+ if (tmp == ULLONG_MAX || tmp > 1)
error(1, ERANGE,
"prefer busy poll should be 0 or 1");
+
+ cfg_prefer_busy_poll = (uint8_t)tmp;
break;
case 'g':
- cfg_busy_poll_budget = strtoul(optarg, NULL, 0);
- if (cfg_busy_poll_budget == ULONG_MAX ||
- cfg_busy_poll_budget > UINT16_MAX)
+ if (tmp == ULLONG_MAX || tmp > UINT16_MAX)
error(1, ERANGE,
"busy poll budget must be [0, UINT16_MAX]");
+
+ cfg_busy_poll_budget = (uint16_t)tmp;
break;
case 'p':
- cfg_port = strtoul(optarg, NULL, 0);
- if (cfg_port > UINT16_MAX)
+ if (tmp == ULLONG_MAX || tmp > UINT16_MAX)
error(1, ERANGE, "port must be <= 65535");
+
+ cfg_port = (uint16_t)tmp;
break;
case 'b':
ret = inet_aton(optarg, &cfg_bind_addr);
@@ -124,41 +134,39 @@ static void parse_opts(int argc, char **argv)
error(1, 0, "outfile invalid");
break;
case 'm':
- cfg_max_events = strtol(optarg, NULL, 0);
-
- if (cfg_max_events == LONG_MIN ||
- cfg_max_events == LONG_MAX ||
- cfg_max_events <= 0)
+ if (tmp == ULLONG_MAX || tmp > INT_MAX)
error(1, ERANGE,
- "max events must be > 0 and < LONG_MAX");
+ "max events must be > 0 and <= INT_MAX");
+
+ cfg_max_events = (int)tmp;
break;
case 'd':
- cfg_defer_hard_irqs = strtoul(optarg, NULL, 0);
-
- if (cfg_defer_hard_irqs == ULONG_MAX ||
- cfg_defer_hard_irqs > INT32_MAX)
+ if (tmp == ULLONG_MAX || tmp > INT32_MAX)
error(1, ERANGE,
"defer_hard_irqs must be <= INT32_MAX");
+
+ cfg_defer_hard_irqs = (uint32_t)tmp;
break;
case 'r':
- cfg_gro_flush_timeout = strtoull(optarg, NULL, 0);
-
- if (cfg_gro_flush_timeout == ULLONG_MAX)
+ if (tmp == ULLONG_MAX || tmp > UINT64_MAX)
error(1, ERANGE,
- "gro_flush_timeout must be < ULLONG_MAX");
+ "gro_flush_timeout must be < UINT64_MAX");
+
+ cfg_gro_flush_timeout = (uint64_t)tmp;
break;
case 's':
- cfg_irq_suspend_timeout = strtoull(optarg, NULL, 0);
-
- if (cfg_irq_suspend_timeout == ULLONG_MAX)
+ if (tmp == ULLONG_MAX || tmp > UINT64_MAX)
error(1, ERANGE,
"irq_suspend_timeout must be < ULLONG_MAX");
+
+ cfg_irq_suspend_timeout = (uint64_t)tmp;
break;
case 'i':
- cfg_ifindex = strtoul(optarg, NULL, 0);
- if (cfg_ifindex == ULONG_MAX)
+ if (tmp == ULLONG_MAX || tmp > INT_MAX)
error(1, ERANGE,
- "ifindex must be < ULONG_MAX");
+ "ifindex must be <= INT_MAX");
+
+ cfg_ifindex = (int)tmp;
break;
}
}
@@ -215,7 +223,7 @@ static void setup_queue(void)
struct netdev_napi_set_req *set_req = NULL;
struct ynl_sock *ys;
struct ynl_error yerr;
- uint32_t napi_id;
+ uint32_t napi_id = 0;
ys = ynl_sock_create(&ynl_netdev_family, &yerr);
if (!ys)
@@ -277,8 +285,8 @@ static void run_poller(void)
* here
*/
epoll_params.busy_poll_usecs = cfg_busy_poll_usecs;
- epoll_params.busy_poll_budget = (uint16_t)cfg_busy_poll_budget;
- epoll_params.prefer_busy_poll = (uint8_t)cfg_prefer_busy_poll;
+ epoll_params.busy_poll_budget = cfg_busy_poll_budget;
+ epoll_params.prefer_busy_poll = cfg_prefer_busy_poll;
epoll_params.__pad = 0;
val = 1;
@@ -342,5 +350,9 @@ int main(int argc, char *argv[])
parse_opts(argc, argv);
setup_queue();
run_poller();
+
+ if (cfg_outfile)
+ free(cfg_outfile);
+
return 0;
}
diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c
index 876c2db02a63..bc314382e4e1 100644
--- a/tools/testing/selftests/net/cmsg_sender.c
+++ b/tools/testing/selftests/net/cmsg_sender.c
@@ -59,6 +59,7 @@ struct options {
unsigned int proto;
} sock;
struct option_cmsg_u32 mark;
+ struct option_cmsg_u32 priority;
struct {
bool ena;
unsigned int delay;
@@ -97,6 +98,8 @@ static void __attribute__((noreturn)) cs_usage(const char *bin)
"\n"
"\t\t-m val Set SO_MARK with given value\n"
"\t\t-M val Set SO_MARK via setsockopt\n"
+ "\t\t-P val Set SO_PRIORITY via setsockopt\n"
+ "\t\t-Q val Set SO_PRIORITY via cmsg\n"
"\t\t-d val Set SO_TXTIME with given delay (usec)\n"
"\t\t-t Enable time stamp reporting\n"
"\t\t-f val Set don't fragment via cmsg\n"
@@ -115,7 +118,7 @@ static void cs_parse_args(int argc, char *argv[])
{
int o;
- while ((o = getopt(argc, argv, "46sS:p:P:m:M:n:d:tf:F:c:C:l:L:H:")) != -1) {
+ while ((o = getopt(argc, argv, "46sS:p:P:m:M:n:d:tf:F:c:C:l:L:H:Q:")) != -1) {
switch (o) {
case 's':
opt.silent_send = true;
@@ -148,6 +151,10 @@ static void cs_parse_args(int argc, char *argv[])
opt.mark.ena = true;
opt.mark.val = atoi(optarg);
break;
+ case 'Q':
+ opt.priority.ena = true;
+ opt.priority.val = atoi(optarg);
+ break;
case 'M':
opt.sockopt.mark = atoi(optarg);
break;
@@ -253,6 +260,8 @@ cs_write_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz)
ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
SOL_SOCKET, SO_MARK, &opt.mark);
ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
+ SOL_SOCKET, SO_PRIORITY, &opt.priority);
+ ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
SOL_IPV6, IPV6_DONTFRAG, &opt.v6.dontfrag);
ca_write_cmsg_u32(cbuf, cbuf_sz, &cmsg_len,
SOL_IPV6, IPV6_TCLASS, &opt.v6.tclass);
diff --git a/tools/testing/selftests/net/cmsg_so_priority.sh b/tools/testing/selftests/net/cmsg_so_priority.sh
new file mode 100755
index 000000000000..ee07d8653262
--- /dev/null
+++ b/tools/testing/selftests/net/cmsg_so_priority.sh
@@ -0,0 +1,151 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+
+readonly KSFT_SKIP=4
+
+IP4=192.0.2.1/24
+TGT4=192.0.2.2
+TGT4_RAW=192.0.2.3
+IP6=2001:db8::1/64
+TGT6=2001:db8::2
+TGT6_RAW=2001:db8::3
+PORT=1234
+TOTAL_TESTS=0
+FAILED_TESTS=0
+
+if ! command -v jq &> /dev/null; then
+ echo "SKIP cmsg_so_priroity.sh test: jq is not installed." >&2
+ exit "$KSFT_SKIP"
+fi
+
+check_result() {
+ ((TOTAL_TESTS++))
+ if [ "$1" -ne 0 ]; then
+ ((FAILED_TESTS++))
+ fi
+}
+
+cleanup()
+{
+ cleanup_ns $NS
+}
+
+trap cleanup EXIT
+
+setup_ns NS
+
+create_filter() {
+ local handle=$1
+ local vlan_prio=$2
+ local ip_type=$3
+ local proto=$4
+ local dst_ip=$5
+ local ip_proto
+
+ if [[ "$proto" == "u" ]]; then
+ ip_proto="udp"
+ elif [[ "$ip_type" == "ipv4" && "$proto" == "i" ]]; then
+ ip_proto="icmp"
+ elif [[ "$ip_type" == "ipv6" && "$proto" == "i" ]]; then
+ ip_proto="icmpv6"
+ fi
+
+ tc -n $NS filter add dev dummy1 \
+ egress pref 1 handle "$handle" proto 802.1q \
+ flower vlan_prio "$vlan_prio" vlan_ethtype "$ip_type" \
+ dst_ip "$dst_ip" ${ip_proto:+ip_proto $ip_proto} \
+ action pass
+}
+
+ip -n $NS link set dev lo up
+ip -n $NS link add name dummy1 up type dummy
+
+ip -n $NS link add link dummy1 name dummy1.10 up type vlan id 10 \
+ egress-qos-map 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+
+ip -n $NS address add $IP4 dev dummy1.10
+ip -n $NS address add $IP6 dev dummy1.10 nodad
+
+ip netns exec $NS sysctl -wq net.ipv4.ping_group_range='0 2147483647'
+
+ip -n $NS neigh add $TGT4 lladdr 00:11:22:33:44:55 nud permanent \
+ dev dummy1.10
+ip -n $NS neigh add $TGT6 lladdr 00:11:22:33:44:55 nud permanent \
+ dev dummy1.10
+ip -n $NS neigh add $TGT4_RAW lladdr 00:11:22:33:44:66 nud permanent \
+ dev dummy1.10
+ip -n $NS neigh add $TGT6_RAW lladdr 00:11:22:33:44:66 nud permanent \
+ dev dummy1.10
+
+tc -n $NS qdisc add dev dummy1 clsact
+
+FILTER_COUNTER=10
+
+for i in 4 6; do
+ for proto in u i r; do
+ echo "Test IPV$i, prot: $proto"
+ for priority in {0..7}; do
+ if [[ $i == 4 && $proto == "r" ]]; then
+ TGT=$TGT4_RAW
+ elif [[ $i == 6 && $proto == "r" ]]; then
+ TGT=$TGT6_RAW
+ elif [ $i == 4 ]; then
+ TGT=$TGT4
+ else
+ TGT=$TGT6
+ fi
+
+ handle="${FILTER_COUNTER}${priority}"
+
+ create_filter $handle $priority ipv$i $proto $TGT
+
+ pkts=$(tc -n $NS -j -s filter show dev dummy1 egress \
+ | jq ".[] | select(.options.handle == ${handle}) | \
+ .options.actions[0].stats.packets")
+
+ if [[ $pkts == 0 ]]; then
+ check_result 0
+ else
+ echo "prio $priority: expected 0, got $pkts"
+ check_result 1
+ fi
+
+ ip netns exec $NS ./cmsg_sender -$i -Q $priority \
+ -p $proto $TGT $PORT
+
+ pkts=$(tc -n $NS -j -s filter show dev dummy1 egress \
+ | jq ".[] | select(.options.handle == ${handle}) | \
+ .options.actions[0].stats.packets")
+ if [[ $pkts == 1 ]]; then
+ check_result 0
+ else
+ echo "prio $priority -Q: expected 1, got $pkts"
+ check_result 1
+ fi
+
+ ip netns exec $NS ./cmsg_sender -$i -P $priority \
+ -p $proto $TGT $PORT
+
+ pkts=$(tc -n $NS -j -s filter show dev dummy1 egress \
+ | jq ".[] | select(.options.handle == ${handle}) | \
+ .options.actions[0].stats.packets")
+ if [[ $pkts == 2 ]]; then
+ check_result 0
+ else
+ echo "prio $priority -P: expected 2, got $pkts"
+ check_result 1
+ fi
+ done
+ FILTER_COUNTER=$((FILTER_COUNTER + 10))
+ done
+done
+
+if [ $FAILED_TESTS -ne 0 ]; then
+ echo "FAIL - $FAILED_TESTS/$TOTAL_TESTS tests failed"
+ exit 1
+else
+ echo "OK - All $TOTAL_TESTS tests passed"
+ exit 0
+fi
diff --git a/tools/testing/selftests/net/cmsg_time.sh b/tools/testing/selftests/net/cmsg_time.sh
index 1d7e756644bc..478af0aefa97 100755
--- a/tools/testing/selftests/net/cmsg_time.sh
+++ b/tools/testing/selftests/net/cmsg_time.sh
@@ -34,13 +34,28 @@ BAD=0
TOTAL=0
check_result() {
+ local ret=$1
+ local got=$2
+ local exp=$3
+ local case=$4
+ local xfail=$5
+ local xf=
+ local inc=
+
+ if [ "$xfail" == "xfail" ]; then
+ xf="(XFAIL)"
+ inc=0
+ else
+ inc=1
+ fi
+
((TOTAL++))
- if [ $1 -ne 0 ]; then
- echo " Case $4 returned $1, expected 0"
- ((BAD++))
+ if [ $ret -ne 0 ]; then
+ echo " Case $case returned $ret, expected 0 $xf"
+ ((BAD+=inc))
elif [ "$2" != "$3" ]; then
- echo " Case $4 returned '$2', expected '$3'"
- ((BAD++))
+ echo " Case $case returned '$got', expected '$exp' $xf"
+ ((BAD+=inc))
fi
}
@@ -66,14 +81,14 @@ for i in "-4 $TGT4" "-6 $TGT6"; do
awk '/SND/ { if ($3 > 1000) print "OK"; }')
check_result $? "$ts" "OK" "$prot - TXTIME abs"
- [ "$KSFT_MACHINE_SLOW" = yes ] && delay=8000 || delay=1000
+ [ "$KSFT_MACHINE_SLOW" = yes ] && xfail=xfail
- ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234 -t -d $delay |
+ ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234 -t -d 1000 |
awk '/SND/ {snd=$3}
/SCHED/ {sch=$3}
- END { if (snd - sch > '$((delay/2))') print "OK";
- else print snd, "-", sch, "<", '$((delay/2))'; }')
- check_result $? "$ts" "OK" "$prot - TXTIME rel"
+ END { if (snd - sch > 500) print "OK";
+ else print snd, "-", sch, "<", 500; }')
+ check_result $? "$ts" "OK" "$prot - TXTIME rel" $xfail
done
done
diff --git a/tools/testing/selftests/net/fdb_notify.sh b/tools/testing/selftests/net/fdb_notify.sh
index c03151e7791c..c159230c9b62 100755
--- a/tools/testing/selftests/net/fdb_notify.sh
+++ b/tools/testing/selftests/net/fdb_notify.sh
@@ -49,7 +49,7 @@ test_dup_vxlan_self()
{
ip_link_add br up type bridge vlan_filtering 1
ip_link_add vx up type vxlan id 2000 dstport 4789
- ip_link_master vx br
+ ip_link_set_master vx br
do_test_dup add "vxlan" dev vx self dst 192.0.2.1
do_test_dup del "vxlan" dev vx self dst 192.0.2.1
@@ -59,7 +59,7 @@ test_dup_vxlan_master()
{
ip_link_add br up type bridge vlan_filtering 1
ip_link_add vx up type vxlan id 2000 dstport 4789
- ip_link_master vx br
+ ip_link_set_master vx br
do_test_dup add "vxlan master" dev vx master
do_test_dup del "vxlan master" dev vx master
@@ -79,7 +79,7 @@ test_dup_macvlan_master()
ip_link_add br up type bridge vlan_filtering 1
ip_link_add dd up type dummy
ip_link_add mv up link dd type macvlan mode passthru
- ip_link_master mv br
+ ip_link_set_master mv br
do_test_dup add "macvlan master" dev mv self
do_test_dup del "macvlan master" dev mv self
diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
index 1d58b3b87465..847936363a12 100755
--- a/tools/testing/selftests/net/fib_rule_tests.sh
+++ b/tools/testing/selftests/net/fib_rule_tests.sh
@@ -291,6 +291,37 @@ fib_rule6_test()
"$getnomatch" "iif dscp redirect to table" \
"iif dscp no redirect to table"
fi
+
+ fib_check_iproute_support "flowlabel" "flowlabel"
+ if [ $? -eq 0 ]; then
+ match="flowlabel 0xfffff"
+ getmatch="flowlabel 0xfffff"
+ getnomatch="flowlabel 0xf"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "flowlabel redirect to table" \
+ "flowlabel no redirect to table"
+
+ match="flowlabel 0xfffff"
+ getmatch="from $SRC_IP6 iif $DEV flowlabel 0xfffff"
+ getnomatch="from $SRC_IP6 iif $DEV flowlabel 0xf"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "iif flowlabel redirect to table" \
+ "iif flowlabel no redirect to table"
+
+ match="flowlabel 0x08000/0x08000"
+ getmatch="flowlabel 0xfffff"
+ getnomatch="flowlabel 0xf7fff"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "flowlabel masked redirect to table" \
+ "flowlabel masked no redirect to table"
+
+ match="flowlabel 0x08000/0x08000"
+ getmatch="from $SRC_IP6 iif $DEV flowlabel 0xfffff"
+ getnomatch="from $SRC_IP6 iif $DEV flowlabel 0xf7fff"
+ fib_rule6_test_match_n_redirect "$match" "$getmatch" \
+ "$getnomatch" "iif flowlabel masked redirect to table" \
+ "iif flowlabel masked no redirect to table"
+ fi
}
fib_rule6_vrf_test()
diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile
index 7d885cff8d79..00bde7b6f39e 100644
--- a/tools/testing/selftests/net/forwarding/Makefile
+++ b/tools/testing/selftests/net/forwarding/Makefile
@@ -105,6 +105,7 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \
vxlan_bridge_1q_port_8472_ipv6.sh \
vxlan_bridge_1q_port_8472.sh \
vxlan_bridge_1q.sh \
+ vxlan_reserved.sh \
vxlan_symmetric_ipv6.sh \
vxlan_symmetric.sh
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
index 1c8a26046589..2b5700b61ffa 100755
--- a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
@@ -1,7 +1,7 @@
#!/bin/bash
# SPDX-License-Identifier: GPL-2.0
-ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding"
+ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding pvid_change"
NUM_NETIFS=4
source lib.sh
@@ -77,12 +77,16 @@ cleanup()
ping_ipv4()
{
- ping_test $h1 192.0.2.2
+ local msg=$1
+
+ ping_test $h1 192.0.2.2 "$msg"
}
ping_ipv6()
{
- ping6_test $h1 2001:db8:1::2
+ local msg=$1
+
+ ping6_test $h1 2001:db8:1::2 "$msg"
}
learning()
@@ -95,6 +99,21 @@ flooding()
flood_test $swp2 $h1 $h2
}
+pvid_change()
+{
+ # Test that the changing of the VLAN-aware PVID does not affect
+ # VLAN-unaware forwarding
+ bridge vlan add vid 3 dev $swp1 pvid untagged
+
+ ping_ipv4 " with bridge port $swp1 PVID changed"
+ ping_ipv6 " with bridge port $swp1 PVID changed"
+
+ bridge vlan del vid 3 dev $swp1
+
+ ping_ipv4 " with bridge port $swp1 PVID deleted"
+ ping_ipv6 " with bridge port $swp1 PVID deleted"
+}
+
trap cleanup EXIT
setup_prepare
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 7337f398f9cc..8de80acf249e 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -68,6 +68,7 @@ declare -A NETIFS=(
: "${REQUIRE_JQ:=yes}"
: "${REQUIRE_MZ:=yes}"
: "${REQUIRE_MTOOLS:=no}"
+: "${REQUIRE_TEAMD:=no}"
# Whether to override MAC addresses on interfaces participating in the test.
: "${STABLE_MAC_ADDRS:=no}"
@@ -321,6 +322,9 @@ fi
if [[ "$REQUIRE_MZ" = "yes" ]]; then
require_command $MZ
fi
+if [[ "$REQUIRE_TEAMD" = "yes" ]]; then
+ require_command $TEAMD
+fi
if [[ "$REQUIRE_MTOOLS" = "yes" ]]; then
# https://github.com/troglobit/mtools
require_command msend
@@ -932,13 +936,6 @@ packets_rate()
echo $(((t1 - t0) / interval))
}
-mac_get()
-{
- local if_name=$1
-
- ip -j link show dev $if_name | jq -r '.[]["address"]'
-}
-
ether_addr_to_u64()
{
local addr="$1"
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh
index fe4d7c906a70..a20d22d1df36 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1q_lag.sh
@@ -49,6 +49,7 @@ ALL_TESTS="
test_mirror_gretap_second
"
+REQUIRE_TEAMD="yes"
NUM_NETIFS=6
source lib.sh
source mirror_lib.sh
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh b/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh
index 1261e6f46e34..ff7049582d35 100755
--- a/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_lag_lacp.sh
@@ -53,6 +53,7 @@ ALL_TESTS="
test_mirror_gretap_second
"
+REQUIRE_TEAMD="yes"
NUM_NETIFS=6
source lib.sh
source mirror_lib.sh
diff --git a/tools/testing/selftests/net/forwarding/router_bridge_1d_lag.sh b/tools/testing/selftests/net/forwarding/router_bridge_1d_lag.sh
index e064b946e821..16583a470ec3 100755
--- a/tools/testing/selftests/net/forwarding/router_bridge_1d_lag.sh
+++ b/tools/testing/selftests/net/forwarding/router_bridge_1d_lag.sh
@@ -109,6 +109,7 @@ ALL_TESTS="
ping_ipv4
ping_ipv6
"
+REQUIRE_TEAMD="yes"
NUM_NETIFS=8
source lib.sh
diff --git a/tools/testing/selftests/net/forwarding/router_bridge_lag.sh b/tools/testing/selftests/net/forwarding/router_bridge_lag.sh
index f05ffe213c46..2a4cd1af1b85 100755
--- a/tools/testing/selftests/net/forwarding/router_bridge_lag.sh
+++ b/tools/testing/selftests/net/forwarding/router_bridge_lag.sh
@@ -76,6 +76,7 @@
ping_ipv4
ping_ipv6
"}
+REQUIRE_TEAMD="yes"
NUM_NETIFS=8
: ${lib_dir:=.}
source $lib_dir/lib.sh
diff --git a/tools/testing/selftests/net/forwarding/vxlan_reserved.sh b/tools/testing/selftests/net/forwarding/vxlan_reserved.sh
new file mode 100755
index 000000000000..46c31794b91b
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/vxlan_reserved.sh
@@ -0,0 +1,352 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# +--------------------+
+# | H1 (vrf) |
+# | + $h1 |
+# | | 192.0.2.1/28 |
+# +----|---------------+
+# |
+# +----|--------------------------------+
+# | SW | |
+# | +--|------------------------------+ |
+# | | + $swp1 BR1 (802.1d) | |
+# | | | |
+# | | + vx1 (vxlan) | |
+# | | local 192.0.2.17 | |
+# | | id 1000 dstport $VXPORT | |
+# | +---------------------------------+ |
+# | |
+# | 192.0.2.32/28 via 192.0.2.18 |
+# | |
+# | + $rp1 |
+# | | 192.0.2.17/28 |
+# +--|----------------------------------+
+# |
+# +--|----------------------------------+
+# | | |
+# | + $rp2 |
+# | 192.0.2.18/28 |
+# | |
+# | VRP2 (vrf) |
+# +-------------------------------------+
+
+: ${VXPORT:=4789}
+: ${ALL_TESTS:="
+ default_test
+ plain_test
+ reserved_0_test
+ reserved_10_test
+ reserved_31_test
+ reserved_56_test
+ reserved_63_test
+ "}
+
+NUM_NETIFS=4
+source lib.sh
+
+h1_create()
+{
+ simple_if_init $h1 192.0.2.1/28
+ defer simple_if_fini $h1 192.0.2.1/28
+
+ tc qdisc add dev $h1 clsact
+ defer tc qdisc del dev $h1 clsact
+
+ tc filter add dev $h1 ingress pref 77 \
+ prot ip flower skip_hw ip_proto icmp action drop
+ defer tc filter del dev $h1 ingress pref 77
+}
+
+switch_create()
+{
+ ip_link_add br1 type bridge vlan_filtering 0 mcast_snooping 0
+ # Make sure the bridge uses the MAC address of the local port and not
+ # that of the VxLAN's device.
+ ip_link_set_addr br1 $(mac_get $swp1)
+ ip_link_set_up br1
+
+ ip_link_set_up $rp1
+ ip_addr_add $rp1 192.0.2.17/28
+ ip_route_add 192.0.2.32/28 nexthop via 192.0.2.18
+
+ ip_link_set_master $swp1 br1
+ ip_link_set_up $swp1
+}
+
+vrp2_create()
+{
+ simple_if_init $rp2 192.0.2.18/28
+ defer simple_if_fini $rp2 192.0.2.18/28
+}
+
+setup_prepare()
+{
+ h1=${NETIFS[p1]}
+ swp1=${NETIFS[p2]}
+
+ rp1=${NETIFS[p3]}
+ rp2=${NETIFS[p4]}
+
+ vrf_prepare
+ defer vrf_cleanup
+
+ forwarding_enable
+ defer forwarding_restore
+
+ h1_create
+ switch_create
+
+ vrp2_create
+}
+
+vxlan_header_bytes()
+{
+ local vni=$1; shift
+ local -a extra_bits=("$@")
+ local -a bits
+ local i
+
+ for ((i=0; i < 64; i++)); do
+ bits[i]=0
+ done
+
+ # Bit 4 is the I flag and is always on.
+ bits[4]=1
+
+ for i in ${extra_bits[@]}; do
+ bits[i]=1
+ done
+
+ # Bits 32..55 carry the VNI
+ local mask=0x800000
+ for ((i=0; i < 24; i++)); do
+ bits[$((i + 32))]=$(((vni & mask) != 0))
+ ((mask >>= 1))
+ done
+
+ local bytes
+ for ((i=0; i < 8; i++)); do
+ local byte=0
+ local j
+ for ((j=0; j < 8; j++)); do
+ local bit=${bits[8 * i + j]}
+ ((byte += bit << (7 - j)))
+ done
+ bytes+=$(printf %02x $byte):
+ done
+
+ echo ${bytes%:}
+}
+
+neg_bytes()
+{
+ local bytes=$1; shift
+
+ local -A neg=([0]=f [1]=e [2]=d [3]=c [4]=b [5]=a [6]=9 [7]=8
+ [8]=7 [9]=6 [a]=5 [b]=4 [c]=3 [d]=2 [e]=1 [f]=0 [:]=:)
+ local out
+ local i
+
+ for ((i=0; i < ${#bytes}; i++)); do
+ local c=${bytes:$i:1}
+ out+=${neg[$c]}
+ done
+ echo $out
+}
+
+vxlan_ping_do()
+{
+ local count=$1; shift
+ local dev=$1; shift
+ local next_hop_mac=$1; shift
+ local dest_ip=$1; shift
+ local dest_mac=$1; shift
+ local vni=$1; shift
+ local reserved_bits=$1; shift
+
+ local vxlan_header=$(vxlan_header_bytes $vni $reserved_bits)
+
+ $MZ $dev -c $count -d 100msec -q \
+ -b $next_hop_mac -B $dest_ip \
+ -t udp sp=23456,dp=$VXPORT,p=$(:
+ )"$vxlan_header:"$( : VXLAN
+ )"$dest_mac:"$( : ETH daddr
+ )"00:11:22:33:44:55:"$( : ETH saddr
+ )"08:00:"$( : ETH type
+ )"45:"$( : IP version + IHL
+ )"00:"$( : IP TOS
+ )"00:54:"$( : IP total length
+ )"99:83:"$( : IP identification
+ )"40:00:"$( : IP flags + frag off
+ )"40:"$( : IP TTL
+ )"01:"$( : IP proto
+ )"00:00:"$( : IP header csum
+ )"$(ipv4_to_bytes 192.0.2.3):"$( : IP saddr
+ )"$(ipv4_to_bytes 192.0.2.1):"$( : IP daddr
+ )"08:"$( : ICMP type
+ )"00:"$( : ICMP code
+ )"8b:f2:"$( : ICMP csum
+ )"1f:6a:"$( : ICMP request identifier
+ )"00:01:"$( : ICMP request seq. number
+ )"4f:ff:c5:5b:00:00:00:00:"$( : ICMP payload
+ )"6d:74:0b:00:00:00:00:00:"$( :
+ )"10:11:12:13:14:15:16:17:"$( :
+ )"18:19:1a:1b:1c:1d:1e:1f:"$( :
+ )"20:21:22:23:24:25:26:27:"$( :
+ )"28:29:2a:2b:2c:2d:2e:2f:"$( :
+ )"30:31:32:33:34:35:36:37"
+}
+
+vxlan_device_add()
+{
+ ip_link_add vx1 up type vxlan id 1000 \
+ local 192.0.2.17 dstport "$VXPORT" \
+ nolearning noudpcsum tos inherit ttl 100 "$@"
+ ip_link_set_master vx1 br1
+}
+
+vxlan_all_reserved_bits()
+{
+ local i
+
+ for ((i=0; i < 64; i++)); do
+ if ((i == 4 || i >= 32 && i < 56)); then
+ continue
+ fi
+ echo $i
+ done
+}
+
+vxlan_ping_vanilla()
+{
+ vxlan_ping_do 10 $rp2 $(mac_get $rp1) 192.0.2.17 $(mac_get $h1) 1000
+}
+
+vxlan_ping_reserved()
+{
+ for bit in $(vxlan_all_reserved_bits); do
+ vxlan_ping_do 1 $rp2 $(mac_get $rp1) \
+ 192.0.2.17 $(mac_get $h1) 1000 "$bit"
+ ((n++))
+ done
+}
+
+vxlan_ping_test()
+{
+ local what=$1; shift
+ local get_stat=$1; shift
+ local expect=$1; shift
+
+ RET=0
+
+ local t0=$($get_stat)
+
+ "$@"
+ check_err $? "Failure when running $@"
+
+ local t1=$($get_stat)
+ local delta=$((t1 - t0))
+
+ ((expect == delta))
+ check_err $? "Expected to capture $expect packets, got $delta."
+
+ log_test "$what"
+}
+
+__default_test_do()
+{
+ local n_allowed_bits=$1; shift
+ local what=$1; shift
+
+ vxlan_ping_test "$what: clean packets" \
+ "tc_rule_stats_get $h1 77 ingress" \
+ 10 vxlan_ping_vanilla
+
+ local t0=$(link_stats_get vx1 rx errors)
+ vxlan_ping_test "$what: mangled packets" \
+ "tc_rule_stats_get $h1 77 ingress" \
+ $n_allowed_bits vxlan_ping_reserved
+ local t1=$(link_stats_get vx1 rx errors)
+
+ RET=0
+ local expect=$((39 - n_allowed_bits))
+ local delta=$((t1 - t0))
+ ((expect == delta))
+ check_err $? "Expected $expect error packets, got $delta."
+ log_test "$what: drops reported"
+}
+
+default_test_do()
+{
+ vxlan_device_add
+ __default_test_do 0 "Default"
+}
+
+default_test()
+{
+ in_defer_scope \
+ default_test_do
+}
+
+plain_test_do()
+{
+ vxlan_device_add reserved_bits 0xf7ffffff000000ff
+ __default_test_do 0 "reserved_bits 0xf7ffffff000000ff"
+}
+
+plain_test()
+{
+ in_defer_scope \
+ plain_test_do
+}
+
+reserved_test()
+{
+ local bit=$1; shift
+
+ local allowed_bytes=$(vxlan_header_bytes 0xffffff $bit)
+ local reserved_bytes=$(neg_bytes $allowed_bytes)
+ local reserved_bits=${reserved_bytes//:/}
+
+ vxlan_device_add reserved_bits 0x$reserved_bits
+ __default_test_do 1 "reserved_bits 0x$reserved_bits"
+}
+
+reserved_0_test()
+{
+ in_defer_scope \
+ reserved_test 0
+}
+
+reserved_10_test()
+{
+ in_defer_scope \
+ reserved_test 10
+}
+
+reserved_31_test()
+{
+ in_defer_scope \
+ reserved_test 31
+}
+
+reserved_56_test()
+{
+ in_defer_scope \
+ reserved_test 56
+}
+
+reserved_63_test()
+{
+ in_defer_scope \
+ reserved_test 63
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/ipsec.c b/tools/testing/selftests/net/ipsec.c
index be4a30a0d02a..9b44a091802c 100644
--- a/tools/testing/selftests/net/ipsec.c
+++ b/tools/testing/selftests/net/ipsec.c
@@ -227,7 +227,8 @@ static int rtattr_pack(struct nlmsghdr *nh, size_t req_sz,
attr->rta_len = RTA_LENGTH(size);
attr->rta_type = rta_type;
- memcpy(RTA_DATA(attr), payload, size);
+ if (payload)
+ memcpy(RTA_DATA(attr), payload, size);
return 0;
}
diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh
index 8994fec1c38f..0bd9a038a1f0 100644
--- a/tools/testing/selftests/net/lib.sh
+++ b/tools/testing/selftests/net/lib.sh
@@ -435,6 +435,13 @@ xfail_on_veth()
fi
}
+mac_get()
+{
+ local if_name=$1
+
+ ip -j link show dev $if_name | jq -r '.[]["address"]'
+}
+
kill_process()
{
local pid=$1; shift
@@ -451,7 +458,7 @@ ip_link_add()
defer ip link del dev "$name"
}
-ip_link_master()
+ip_link_set_master()
{
local member=$1; shift
local master=$1; shift
@@ -459,3 +466,62 @@ ip_link_master()
ip link set dev "$member" master "$master"
defer ip link set dev "$member" nomaster
}
+
+ip_link_set_addr()
+{
+ local name=$1; shift
+ local addr=$1; shift
+
+ local old_addr=$(mac_get "$name")
+ ip link set dev "$name" address "$addr"
+ defer ip link set dev "$name" address "$old_addr"
+}
+
+ip_link_is_up()
+{
+ local name=$1; shift
+
+ local state=$(ip -j link show "$name" |
+ jq -r '(.[].flags[] | select(. == "UP")) // "DOWN"')
+ [[ $state == "UP" ]]
+}
+
+ip_link_set_up()
+{
+ local name=$1; shift
+
+ if ! ip_link_is_up "$name"; then
+ ip link set dev "$name" up
+ defer ip link set dev "$name" down
+ fi
+}
+
+ip_link_set_down()
+{
+ local name=$1; shift
+
+ if ip_link_is_up "$name"; then
+ ip link set dev "$name" down
+ defer ip link set dev "$name" up
+ fi
+}
+
+ip_addr_add()
+{
+ local name=$1; shift
+
+ ip addr add dev "$name" "$@"
+ defer ip addr del dev "$name" "$@"
+}
+
+ip_route_add()
+{
+ ip route add "$@"
+ defer ip route del "$@"
+}
+
+bridge_vlan_add()
+{
+ bridge vlan add "$@"
+ defer bridge vlan del "$@"
+}
diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py
index 477ae76de93d..3efe005436cd 100644
--- a/tools/testing/selftests/net/lib/py/ksft.py
+++ b/tools/testing/selftests/net/lib/py/ksft.py
@@ -71,6 +71,11 @@ def ksft_in(a, b, comment=""):
_fail("Check failed", a, "not in", b, comment)
+def ksft_is(a, b, comment=""):
+ if a is not b:
+ _fail("Check failed", a, "is not", b, comment)
+
+
def ksft_ge(a, b, comment=""):
if a < b:
_fail("Check failed", a, "<", b, comment)
diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py
index 72590c3f90f1..9e3bcddcf3e8 100644
--- a/tools/testing/selftests/net/lib/py/utils.py
+++ b/tools/testing/selftests/net/lib/py/utils.py
@@ -10,7 +10,9 @@ import time
class CmdExitFailure(Exception):
- pass
+ def __init__(self, msg, cmd_obj):
+ super().__init__(msg)
+ self.cmd = cmd_obj
class cmd:
@@ -48,7 +50,7 @@ class cmd:
if len(stderr) > 0 and stderr[-1] == "\n":
stderr = stderr[:-1]
raise CmdExitFailure("Command failed: %s\nSTDOUT: %s\nSTDERR: %s" %
- (self.proc.args, stdout, stderr))
+ (self.proc.args, stdout, stderr), self)
class bkg(cmd):
diff --git a/tools/testing/selftests/net/lib/py/ynl.py b/tools/testing/selftests/net/lib/py/ynl.py
index 076a7e8dc3eb..ad1e36baee2a 100644
--- a/tools/testing/selftests/net/lib/py/ynl.py
+++ b/tools/testing/selftests/net/lib/py/ynl.py
@@ -13,14 +13,14 @@ try:
SPEC_PATH = KSFT_DIR / "net/lib/specs"
sys.path.append(tools_full_path.as_posix())
- from net.lib.ynl.lib import YnlFamily, NlError
+ from net.lib.ynl.pyynl.lib import YnlFamily, NlError
else:
# Running in tree
tools_full_path = KSRC / "tools"
SPEC_PATH = KSRC / "Documentation/netlink/specs"
sys.path.append(tools_full_path.as_posix())
- from net.ynl.lib import YnlFamily, NlError
+ from net.ynl.pyynl.lib import YnlFamily, NlError
except ModuleNotFoundError as e:
ksft_pr("Failed importing `ynl` library from kernel sources")
ksft_pr(str(e))
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c
index 4209b9569039..414addef9a45 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.c
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c
@@ -25,6 +25,8 @@
#include <sys/types.h>
#include <sys/mman.h>
+#include <arpa/inet.h>
+
#include <netdb.h>
#include <netinet/in.h>
@@ -1211,23 +1213,42 @@ static void parse_setsock_options(const char *name)
exit(1);
}
-void xdisconnect(int fd, int addrlen)
+void xdisconnect(int fd)
{
- struct sockaddr_storage empty;
+ socklen_t addrlen = sizeof(struct sockaddr_storage);
+ struct sockaddr_storage addr, empty;
int msec_sleep = 10;
- int queued = 1;
- int i;
+ void *raw_addr;
+ int i, cmdlen;
+ char cmd[128];
+
+ /* get the local address and convert it to string */
+ if (getsockname(fd, (struct sockaddr *)&addr, &addrlen) < 0)
+ xerror("getsockname");
+
+ if (addr.ss_family == AF_INET)
+ raw_addr = &(((struct sockaddr_in *)&addr)->sin_addr);
+ else if (addr.ss_family == AF_INET6)
+ raw_addr = &(((struct sockaddr_in6 *)&addr)->sin6_addr);
+ else
+ xerror("bad family");
+
+ strcpy(cmd, "ss -M | grep -q ");
+ cmdlen = strlen(cmd);
+ if (!inet_ntop(addr.ss_family, raw_addr, &cmd[cmdlen],
+ sizeof(cmd) - cmdlen))
+ xerror("inet_ntop");
shutdown(fd, SHUT_WR);
- /* while until the pending data is completely flushed, the later
+ /*
+ * wait until the pending data is completely flushed and all
+ * the MPTCP sockets reached the closed status.
* disconnect will bypass/ignore/drop any pending data.
*/
for (i = 0; ; i += msec_sleep) {
- if (ioctl(fd, SIOCOUTQ, &queued) < 0)
- xerror("can't query out socket queue: %d", errno);
-
- if (!queued)
+ /* closed socket are not listed by 'ss' */
+ if (system(cmd) != 0)
break;
if (i > poll_timeout)
@@ -1281,9 +1302,9 @@ again:
return ret;
if (cfg_truncate > 0) {
- xdisconnect(fd, peer->ai_addrlen);
+ xdisconnect(fd);
} else if (--cfg_repeat > 0) {
- xdisconnect(fd, peer->ai_addrlen);
+ xdisconnect(fd);
/* the socket could be unblocking at this point, we need the
* connect to be blocking
diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
index b48b4e56826a..5e3c56253274 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh
@@ -137,7 +137,7 @@ TEST_GROUP=""
#shellcheck disable=SC2317
cleanup()
{
- rm -f "$cin_disconnect" "$cout_disconnect"
+ rm -f "$cin_disconnect"
rm -f "$cin" "$cout"
rm -f "$sin" "$sout"
rm -f "$capout"
@@ -155,7 +155,6 @@ cin=$(mktemp)
cout=$(mktemp)
capout=$(mktemp)
cin_disconnect="$cin".disconnect
-cout_disconnect="$cout".disconnect
trap cleanup EXIT
mptcp_lib_ns_init ns1 ns2 ns3 ns4
@@ -445,12 +444,8 @@ do_transfer()
printf "(duration %05sms) " "${duration}"
if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
mptcp_lib_pr_fail "client exit code $retc, server $rets"
- echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2
- ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port"
- cat /tmp/${listener_ns}.out
- echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2
- ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port"
- [ ${listener_ns} != ${connector_ns} ] && cat /tmp/${connector_ns}.out
+ mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}" \
+ "/tmp/${listener_ns}.out" "/tmp/${connector_ns}.out"
echo
cat "$capout"
@@ -587,7 +582,7 @@ make_file()
mptcp_lib_make_file $name 1024 $ksize
dd if=/dev/urandom conv=notrunc of="$name" oflag=append bs=1 count=$rem 2> /dev/null
- echo "Created $name (size $(du -b "$name")) containing data sent by $who"
+ echo "Created $name (size $(stat -c "%s" "$name") B) containing data sent by $who"
}
run_tests_lo()
diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh
index c07e2bd3a315..13a3b68181ee 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_join.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh
@@ -1039,13 +1039,8 @@ do_transfer()
if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
fail_test "client exit code $retc, server $rets"
- echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2
- ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port"
- cat /tmp/${listener_ns}.out
- echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2
- ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port"
- cat /tmp/${connector_ns}.out
-
+ mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}" \
+ "/tmp/${listener_ns}.out" "/tmp/${connector_ns}.out"
return 1
fi
diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
index 975d4d4c862a..051e289d7967 100644
--- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh
@@ -107,6 +107,27 @@ mptcp_lib_pr_info() {
mptcp_lib_print_info "INFO: ${*}"
}
+# $1-2: listener/connector ns ; $3 port ; $4-5 listener/connector stat file
+mptcp_lib_pr_err_stats() {
+ local lns="${1}"
+ local cns="${2}"
+ local port="${3}"
+ local lstat="${4}"
+ local cstat="${5}"
+
+ echo -en "${MPTCP_LIB_COLOR_RED}"
+ {
+ printf "\nnetns %s (listener) socket stat for %d:\n" "${lns}" "${port}"
+ ip netns exec "${lns}" ss -Menitam -o "sport = :${port}"
+ cat "${lstat}"
+
+ printf "\nnetns %s (connector) socket stat for %d:\n" "${cns}" "${port}"
+ ip netns exec "${cns}" ss -Menitam -o "dport = :${port}"
+ [ "${lstat}" != "${cstat}" ] && cat "${cstat}"
+ } 1>&2
+ echo -en "${MPTCP_LIB_COLOR_RESET}"
+}
+
# SELFTESTS_MPTCP_LIB_EXPECT_ALL_FEATURES env var can be set when validating all
# features using the last version of the kernel and the selftests to make sure
# a test is not being skipped by mistake.
diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
index 5e8d5b83e2d0..418a903c3a4d 100755
--- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
+++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh
@@ -169,6 +169,11 @@ do_transfer()
cmsg+=",TCPINQ"
fi
+ NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
+ nstat -n
+ NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
+ nstat -n
+
timeout ${timeout_test} \
ip netns exec ${listener_ns} \
$mptcp_connect -t ${timeout_poll} -l -M 1 -p $port -s ${srv_proto} -c "${cmsg}" \
@@ -189,14 +194,16 @@ do_transfer()
wait $spid
local rets=$?
+ NSTAT_HISTORY=/tmp/${listener_ns}.nstat ip netns exec ${listener_ns} \
+ nstat | grep Tcp > /tmp/${listener_ns}.out
+ NSTAT_HISTORY=/tmp/${connector_ns}.nstat ip netns exec ${connector_ns} \
+ nstat | grep Tcp > /tmp/${connector_ns}.out
+
print_title "Transfer ${ip:2}"
if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then
mptcp_lib_pr_fail "client exit code $retc, server $rets"
- echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2
- ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port"
-
- echo -e "\nnetns ${connector_ns} socket stat for ${port}:" 1>&2
- ip netns exec ${connector_ns} ss -Menita 1>&2 -o "dport = :$port"
+ mptcp_lib_pr_err_stats "${listener_ns}" "${connector_ns}" "${port}" \
+ "/tmp/${listener_ns}.out" "/tmp/${connector_ns}.out"
mptcp_lib_result_fail "transfer ${ip}"
diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh
index 8fa77c8e9b65..9c2a415976cb 100755
--- a/tools/testing/selftests/net/mptcp/simult_flows.sh
+++ b/tools/testing/selftests/net/mptcp/simult_flows.sh
@@ -155,6 +155,11 @@ do_transfer()
sleep 1
fi
+ NSTAT_HISTORY=/tmp/${ns3}.nstat ip netns exec ${ns3} \
+ nstat -n
+ NSTAT_HISTORY=/tmp/${ns1}.nstat ip netns exec ${ns1} \
+ nstat -n
+
timeout ${timeout_test} \
ip netns exec ${ns3} \
./mptcp_connect -jt ${timeout_poll} -l -p $port -T $max_time \
@@ -180,25 +185,27 @@ do_transfer()
kill ${cappid_connector}
fi
+ NSTAT_HISTORY=/tmp/${ns3}.nstat ip netns exec ${ns3} \
+ nstat | grep Tcp > /tmp/${ns3}.out
+ NSTAT_HISTORY=/tmp/${ns1}.nstat ip netns exec ${ns1} \
+ nstat | grep Tcp > /tmp/${ns1}.out
+
cmp $sin $cout > /dev/null 2>&1
local cmps=$?
cmp $cin $sout > /dev/null 2>&1
local cmpc=$?
- printf "%-16s" " max $max_time "
if [ $retc -eq 0 ] && [ $rets -eq 0 ] && \
[ $cmpc -eq 0 ] && [ $cmps -eq 0 ]; then
+ printf "%-16s" " max $max_time "
mptcp_lib_pr_ok
cat "$capout"
return 0
fi
- mptcp_lib_pr_fail
- echo "client exit code $retc, server $rets" 1>&2
- echo -e "\nnetns ${ns3} socket stat for $port:" 1>&2
- ip netns exec ${ns3} ss -nita 1>&2 -o "sport = :$port"
- echo -e "\nnetns ${ns1} socket stat for $port:" 1>&2
- ip netns exec ${ns1} ss -nita 1>&2 -o "dport = :$port"
+ mptcp_lib_pr_fail "client exit code $retc, server $rets"
+ mptcp_lib_pr_err_stats "${ns3}" "${ns1}" "${port}" \
+ "/tmp/${ns3}.out" "/tmp/${ns1}.out"
ls -l $sin $cout
ls -l $cin $sout
diff --git a/tools/testing/selftests/net/nl_netdev.py b/tools/testing/selftests/net/nl_netdev.py
index 93d9d914529b..93e8cb671c3d 100755
--- a/tools/testing/selftests/net/nl_netdev.py
+++ b/tools/testing/selftests/net/nl_netdev.py
@@ -18,6 +18,23 @@ def lo_check(nf) -> None:
ksft_eq(len(lo_info['xdp-rx-metadata-features']), 0)
+def napi_list_check(nf) -> None:
+ with NetdevSimDev(queue_count=100) as nsimdev:
+ nsim = nsimdev.nsims[0]
+
+ ip(f"link set dev {nsim.ifname} up")
+
+ napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True)
+ ksft_eq(len(napis), 100)
+
+ for q in [50, 0, 99]:
+ for i in range(4):
+ nsim.dfs_write("queue_reset", f"{q} {i}")
+ napis = nf.napi_get({'ifindex': nsim.ifindex}, dump=True)
+ ksft_eq(len(napis), 100,
+ comment=f"queue count after reset queue {q} mode {i}")
+
+
def page_pool_check(nf) -> None:
with NetdevSimDev() as nsimdev:
nsim = nsimdev.nsims[0]
@@ -89,7 +106,7 @@ def page_pool_check(nf) -> None:
def main() -> None:
nf = NetdevFamily()
- ksft_run([empty_check, lo_check, page_pool_check],
+ ksft_run([empty_check, lo_check, page_pool_check, napi_list_check],
args=(nf, ))
ksft_exit()
diff --git a/tools/testing/selftests/net/packetdrill/ksft_runner.sh b/tools/testing/selftests/net/packetdrill/ksft_runner.sh
index 4071c133f29e..e15c43b7359b 100755
--- a/tools/testing/selftests/net/packetdrill/ksft_runner.sh
+++ b/tools/testing/selftests/net/packetdrill/ksft_runner.sh
@@ -23,7 +23,7 @@ if [ $# -ne 1 ]; then
ktap_exit_fail_msg "usage: $0 <script>"
exit "$KSFT_FAIL"
fi
-script="$1"
+script="$(basename $1)"
if [ -z "$(which packetdrill)" ]; then
ktap_skip_all "packetdrill not found in PATH"
@@ -31,16 +31,30 @@ if [ -z "$(which packetdrill)" ]; then
fi
declare -a optargs
+failfunc=ktap_test_fail
+
if [[ -n "${KSFT_MACHINE_SLOW}" ]]; then
optargs+=('--tolerance_usecs=14000')
+
+ # xfail tests that are known flaky with dbg config, not fixable.
+ # still run them for coverage (and expect 100% pass without dbg).
+ declare -ar xfail_list=(
+ "tcp_fast_recovery_prr-ss.*.pkt"
+ "tcp_timestamping.*.pkt"
+ "tcp_user_timeout_user-timeout-probe.pkt"
+ "tcp_zerocopy_epoll_.*.pkt"
+ "tcp_tcp_info_tcp-info-*-limited.pkt"
+ )
+ readonly xfail_regex="^($(printf '%s|' "${xfail_list[@]}"))$"
+ [[ "$script" =~ ${xfail_regex} ]] && failfunc=ktap_test_xfail
fi
ktap_print_header
ktap_set_plan 2
-unshare -n packetdrill ${ipv4_args[@]} ${optargs[@]} $(basename $script) > /dev/null \
- && ktap_test_pass "ipv4" || ktap_test_fail "ipv4"
-unshare -n packetdrill ${ipv6_args[@]} ${optargs[@]} $(basename $script) > /dev/null \
- && ktap_test_pass "ipv6" || ktap_test_fail "ipv6"
+unshare -n packetdrill ${ipv4_args[@]} ${optargs[@]} $script > /dev/null \
+ && ktap_test_pass "ipv4" || $failfunc "ipv4"
+unshare -n packetdrill ${ipv6_args[@]} ${optargs[@]} $script > /dev/null \
+ && ktap_test_pass "ipv6" || $failfunc "ipv6"
ktap_finished
diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-accept.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-accept.pkt
new file mode 100644
index 000000000000..38535701656e
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-accept.pkt
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test for blocking accept.
+
+`./defaults.sh`
+
+// Establish a connection.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+ +0...0.200 accept(3, ..., ...) = 4
+
+ +.1 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 257
+
+ +.1 write(4, ..., 2000) = 2000
+ +0 > P. 1:2001(2000) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-connect.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-connect.pkt
new file mode 100644
index 000000000000..3692ef102381
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-connect.pkt
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test for blocking connect.
+
+`./defaults.sh`
+
+// Establish a connection.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+
+ +.1...0.200 connect(3, ..., ...) = 0
+
+ +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
+ +.1 < S. 0:0(0) ack 1 win 5792 <mss 1460,nop,wscale 2,nop,nop,sackOK>
+ +0 > . 1:1(0) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt
new file mode 100644
index 000000000000..914eabab367a
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-read.pkt
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test for blocking read.
+--tolerance_usecs=10000
+
+`./defaults.sh`
+
+// Establish a connection.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +.1 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 257
+ +0 accept(3, ..., ...) = 4
+
+ +0...0.100 read(4, ..., 2000) = 2000
+ +.1 < P. 1:2001(2000) ack 1 win 257
+ +0 > . 1:1(0) ack 2001
+
+ +.1...0.200 read(4, ..., 2000) = 2000
+ +.1 < P. 2001:4001(2000) ack 1 win 257
+ +0 > . 1:1(0) ack 4001
+
+ +.1 < P. 4001:6001(2000) ack 1 win 257
+ +0 > . 1:1(0) ack 6001
+ +0...0.000 read(4, ..., 1000) = 1000
+ +0...0.000 read(4, ..., 1000) = 1000
diff --git a/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-write.pkt b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-write.pkt
new file mode 100644
index 000000000000..cec5a0725d95
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_blocking_blocking-write.pkt
@@ -0,0 +1,35 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test for blocking write.
+--tolerance_usecs=10000
+
+`./defaults.sh
+./set_sysctls.py /proc/sys/net/ipv4/tcp_min_tso_segs=10
+`
+
+// Establish a connection.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +.1 < S 0:0(0) win 50000 <mss 1000,nop,wscale 0>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 50000
+ +0 accept(3, ..., ...) = 4
+
+// Kernel doubles our value -> sk->sk_sndbuf is set to 42000
+ +0 setsockopt(4, SOL_SOCKET, SO_SNDBUF, [21000], 4) = 0
+ +0 getsockopt(4, SOL_SOCKET, SO_SNDBUF, [42000], [4]) = 0
+
+// A write of 60000 does not block.
+ +0...0.300 write(4, ..., 61000) = 61000 // this write() blocks
+
+ +.1 < . 1:1(0) ack 10001 win 50000
+
+ +.1 < . 1:1(0) ack 30001 win 50000
+
+// This ACK should wakeup the write(). An ACK of 35001 does not.
+ +.1 < . 1:1(0) ack 36001 win 50000
+
+// Reset to sysctls defaults.
+`/tmp/sysctl_restore_${PPID}.sh`
diff --git a/tools/testing/selftests/net/packetdrill/tcp_close_close-local-close-then-remote-fin.pkt b/tools/testing/selftests/net/packetdrill/tcp_close_close-local-close-then-remote-fin.pkt
new file mode 100644
index 000000000000..8514d6bdbb6d
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_close_close-local-close-then-remote-fin.pkt
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test basic connection teardown where local process closes first:
+// the local process calls close() first, so we send a FIN, and receive an ACK.
+// Then we receive a FIN and ACK it.
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +.01...0.011 connect(3, ..., ...) = 0
+ +0 > S 0:0(0) <...>
+ +0 < S. 0:0(0) ack 1 win 32768 <mss 1000,nop,wscale 6,nop,nop,sackOK>
+ +0 > . 1:1(0) ack 1
+
+ +0 write(3, ..., 1000) = 1000
+ +0 > P. 1:1001(1000) ack 1
+ +0 < . 1:1(0) ack 1001 win 257
+
+ +0 close(3) = 0
+ +0 > F. 1001:1001(0) ack 1
+ +0 < . 1:1(0) ack 1002 win 257
+
+ +0 < F. 1:1(0) ack 1002 win 257
+ +0 > . 1002:1002(0) ack 2
diff --git a/tools/testing/selftests/net/packetdrill/tcp_close_close-on-syn-sent.pkt b/tools/testing/selftests/net/packetdrill/tcp_close_close-on-syn-sent.pkt
new file mode 100644
index 000000000000..04103134bd99
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_close_close-on-syn-sent.pkt
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test to make sure no RST is being sent when close()
+// is called on a socket with SYN_SENT state.
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+
+ +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+ +0 > S 0:0(0) <...>
+
+// Application decideds to close the socket in SYN_SENT state
+// Make sure no RST is sent after close().
+ +0 close(3) = 0
+
+// Receive syn-ack to trigger the send side packet examination:
+// If a RESET were sent right after close(), it would have failed with
+// a mismatched timestamp.
+ +.1 < S. 0:0(0) ack 1 win 32000 <mss 1460,nop,wscale 7>
+ +0 > R 1:1(0)
diff --git a/tools/testing/selftests/net/packetdrill/tcp_close_close-remote-fin-then-close.pkt b/tools/testing/selftests/net/packetdrill/tcp_close_close-remote-fin-then-close.pkt
new file mode 100644
index 000000000000..5f3a2914213a
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_close_close-remote-fin-then-close.pkt
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+// Verify behavior for the sequence: remote side sends FIN, then we close().
+// Since the remote side (client) closes first, we test our LAST_ACK code path.
+
+`./defaults.sh`
+
+// Initialize a server socket.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +0 < . 1:1(0) ack 1 win 257
+
+ +0 accept(3, ..., ...) = 4
+
+// Client closes first.
+ +.01 < F. 1:1(0) ack 1 win 257
+ +0 > . 1:1(0) ack 2
+
+// App notices that client closed.
+ +0 read(4, ..., 1000) = 0
+
+// Then we close.
+ +.01 close(4) = 0
+ +0 > F. 1:1(0) ack 2
+
+// Client ACKs our FIN.
+ +.01 < . 2:2(0) ack 2 win 257
+
+// Verify that we send RST in response to any incoming segments
+// (because the kernel no longer has any record of this socket).
+ +.01 < . 2:2(0) ack 2 win 257
+ +0 > R 2:2(0)
diff --git a/tools/testing/selftests/net/packetdrill/tcp_ecn_ecn-uses-ect0.pkt b/tools/testing/selftests/net/packetdrill/tcp_ecn_ecn-uses-ect0.pkt
new file mode 100644
index 000000000000..643baf3267cf
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_ecn_ecn-uses-ect0.pkt
@@ -0,0 +1,21 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test ECN: verify that Linux TCP ECN sending code uses ECT0 (not ECT1).
+//
+`./defaults.sh
+sysctl -q net.ipv4.tcp_ecn=1 # fully enabled
+`
+
+// Initialize connection
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
+
+// ECN handshake: send EW flags in SYN packet, E flag in SYN-ACK response
++.002 ... 0.004 connect(4, ..., ...) = 0
+
+ +0 > SEW 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++.002 < SE. 0:0(0) ack 1 win 32767 <mss 1000,nop,wscale 6,nop,nop,sackOK>
+ +0 > . 1:1(0) ack 1
+
+// Write 1 MSS.
++.002 write(4, ..., 1000) = 1000
+// Send 1 MSS with ect0.
+ +0 > [ect0] P. 1:1001(1000) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-large.pkt b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-large.pkt
new file mode 100644
index 000000000000..f95b9b3c9fa1
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-large.pkt
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test TCP does not append any data from consequent writes to the tail
+// skb created for the chunk. The large chunk itself should be packetized as
+// usual.
+`./defaults.sh
+`
+
+// Initialize connection
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.01 < . 1:1(0) ack 1 win 514
+
+ +0 accept(3, ..., ...) = 4
+
+// Write a 10400B chunk to fill the ICW, and have a 400 byte skb sitting on
+// the tail.
+ +0 write(4, ..., 10400) = 10400
+
+// Write another 10040B chunk with no coalescing options.
+ +0 send(4, ..., 10400, MSG_EOR) = 10400
+
+// Write a 2KB chunk. This chunk should not be appended to the packets created
+// the previous chunk.
+ +0 write(4, ..., 2000) = 2000
+
+ +0 > P. 1:10001(10000) ack 1
++.001 < . 1:1(0) ack 10001 win 514
+// Now we have enough room to send out the 2 x 400B packets out.
+ +0 > P. 10001:20801(10800) ack 1
++.001 < . 1:1(0) ack 20801 win 514
+// This 2KB packet should be sent alone.
+ +0 > P. 20801:22801(2000) ack 1
++.001 < . 1:1(0) ack 22801 win 514
diff --git a/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-retrans.pkt b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-retrans.pkt
new file mode 100644
index 000000000000..2ff66075288e
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-retrans.pkt
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test TCP does not append any data from consequent writes to the tail
+// skb created for the chunk. Also, when packets are retransmitted, they
+// will not be coalesce into the same skb.
+`./defaults.sh
+`
+
+// Initialize connection
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.01 < . 1:1(0) ack 1 win 514
+
+ +0 accept(3, ..., ...) = 4
+
+// Write a 10400B chunk to fill the ICW, and have a 400 byte skb sitting on
+// the tail.
+ +0 write(4, ..., 10400) = 10400
+
+// Write 10 400B chunks with no coalescing options.
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+// This chunk should not be appended to the skbs created for the previous chunk.
+ +0 write(4, ..., 10000) = 10000
+
+ +0 > P. 1:10001(10000) ack 1
++.001 < . 1:1(0) ack 10001 win 514
+// Now we have enough room to send out the 2 x 400B packets out.
+ +0 > P. 10001:10801(800) ack 1
+// The 9 remaining 400B chunks should be sent as individual packets.
+ +0 > P. 10801:11201(400) ack 1
+ +0 > P. 11201:11601(400) ack 1
+ +0 > P. 11601:12001(400) ack 1
+ +0 > P. 12001:12401(400) ack 1
+ +0 > P. 12401:12801(400) ack 1
+ +0 > P. 12801:13201(400) ack 1
+ +0 > P. 13201:13601(400) ack 1
+ +0 > P. 13601:14001(400) ack 1
+ +0 > P. 14001:14401(400) ack 1
+// The last 10KB chunk should be sent separately.
+ +0 > P. 14401:24401(10000) ack 1
+
++.001 < . 1:1(0) ack 10401 win 514
++.001 < . 1:1(0) ack 10801 win 514
++.001 < . 1:1(0) ack 11201 win 514
++.001 < . 1:1(0) ack 11601 win 514
++.001 < . 1:1(0) ack 12001 win 514 <sack 13201:14401,nop,nop>
+// TCP should fill the hole but no coalescing should happen, and all
+// retransmissions should be sent out as individual packets.
+
+// Note : This is timeout based retransmit.
+// Do not put +0 here or flakes will come back.
++.004~+.008 > P. 12001:12401(400) ack 1
+
++.001 < . 1:1(0) ack 12401 win 514 <sack 13201:14401,nop,nop>
+ +0 > P. 12401:12801(400) ack 1
+ +0 > P. 12801:13201(400) ack 1
++.001 < . 1:1(0) ack 12801 win 514 <sack 13201:14401,nop,nop>
++.001 < . 1:1(0) ack 14401 win 514
++.001 < . 1:1(0) ack 24401 win 514
diff --git a/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-small.pkt b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-small.pkt
new file mode 100644
index 000000000000..77039c5aac39
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-small.pkt
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test TCP does not append any data from consequent writes to the tail
+// skb created for the chunk.
+`./defaults.sh
+`
+
+// Initialize connection
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.01 < . 1:1(0) ack 1 win 514
+
+ +0 accept(3, ..., ...) = 4
+
+// Write a 10400B chunk to fill the ICW, and have a 400 byte skb sitting on
+// the tail.
+ +0 write(4, ..., 10400) = 10400
+
+// Write a 400B chunk with no coalescing options.
+ +0 send(4, ..., 400, MSG_EOR) = 400
+
+// This chunk should not be appended to the skbs created for the previous chunk.
+ +0 write(4, ..., 10000) = 10000
+
+ +0 > P. 1:10001(10000) ack 1
++.001 < . 1:1(0) ack 10001 win 514
+// Now we have enough room to send out the 2 x 400B packets out.
+ +0 > P. 10001:10801(800) ack 1
+ +0 > P. 10801:20801(10000) ack 1
++.001 < . 1:1(0) ack 10401 win 514
++.001 < . 1:1(0) ack 10801 win 514
++.001 < . 1:1(0) ack 20801 win 514
diff --git a/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-subsequent.pkt b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-subsequent.pkt
new file mode 100644
index 000000000000..dd5a06250595
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_eor_no-coalesce-subsequent.pkt
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test TCP does not append any data from consequent writes to the tail
+// skb created for the chunk even though we have 10 back-to-back small
+// writes.
+`./defaults.sh
+`
+
+// Initialize connection
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.01 < . 1:1(0) ack 1 win 514
+
+ +0 accept(3, ..., ...) = 4
+
+// Write a 10400B chunk to fill the ICW, and have a 400 byte skb sitting on
+// the tail.
+ +0 write(4, ..., 10400) = 10400
+
+// Write 10 400B chunks with no coalescing options.
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+ +0 send(4, ..., 400, MSG_EOR) = 400
+// This chunk should not be appended to the skbs created for the previous chunk.
+ +0 write(4, ..., 10000) = 10000
+
+ +0 > P. 1:10001(10000) ack 1
++.001 < . 1:1(0) ack 10001 win 514
+// Now we have enough room to send out the 2 x 400B packets out.
+ +0 > P. 10001:10801(800) ack 1
+// The 9 remaining 400B chunks should be sent as individual packets.
+ +0 > P. 10801:11201(400) ack 1
+ +0 > P. 11201:11601(400) ack 1
+ +0 > P. 11601:12001(400) ack 1
+ +0 > P. 12001:12401(400) ack 1
+ +0 > P. 12401:12801(400) ack 1
+ +0 > P. 12801:13201(400) ack 1
+ +0 > P. 13201:13601(400) ack 1
+ +0 > P. 13601:14001(400) ack 1
+ +0 > P. 14001:14401(400) ack 1
+// The last 10KB chunk should be sent separately.
+ +0 > P. 14401:24401(10000) ack 1
+
++.001 < . 1:1(0) ack 10401 win 514
++.001 < . 1:1(0) ack 10801 win 514
++.001 < . 1:1(0) ack 11201 win 514
++.001 < . 1:1(0) ack 11601 win 514
++.001 < . 1:1(0) ack 12001 win 514
++.001 < . 1:1(0) ack 12401 win 514
++.001 < . 1:1(0) ack 12801 win 514
++.001 < . 1:1(0) ack 13201 win 514
++.001 < . 1:1(0) ack 13601 win 514
++.001 < . 1:1(0) ack 14001 win 514
++.001 < . 1:1(0) ack 14401 win 514
++.001 < . 1:1(0) ack 24401 win 514
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-10pkt-lost-1.pkt b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-10pkt-lost-1.pkt
new file mode 100644
index 000000000000..0d3c8077e830
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-10pkt-lost-1.pkt
@@ -0,0 +1,72 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test PRR-slowstart implementation.
+// In this variant we test a simple case where in-flight == ssthresh
+// all the way through recovery, so during fast recovery we send one segment
+// for each segment SACKed/ACKed.
+
+// Set up config.
+`./defaults.sh`
+
+// Establish a connection.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+// RTT 100ms
+ +.1 < . 1:1(0) ack 1 win 320
+ +0 accept(3, ..., ...) = 4
+
+// Send 10 data segments.
+ +0 write(4, ..., 10000) = 10000
+ +0 > P. 1:10001(10000) ack 1
+
+// Lost packet 1:1001.
+ +.11 < . 1:1(0) ack 1 win 320 <sack 1001:2001,nop,nop>
+ +.01 < . 1:1(0) ack 1 win 320 <sack 1001:3001,nop,nop>
+ +.01 < . 1:1(0) ack 1 win 320 <sack 1001:4001,nop,nop>
+// Enter fast recovery.
+ +0 > . 1:1001(1000) ack 1
+ +.01 %{
+assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state
+assert tcpi_snd_cwnd == 7, tcpi_snd_cwnd
+assert tcpi_snd_ssthresh == 7, tcpi_snd_ssthresh
+}%
+
+// Write some more, which we will send 1 MSS at a time,
+// as in-flight segments are SACKed or ACKed.
+ +.01 write(4, ..., 7000) = 7000
+
+ +.01 < . 1:1(0) ack 1 win 320 <sack 1001:5001,nop,nop>
+ +0 > . 10001:11001(1000) ack 1
+
+ +.01 < . 1:1(0) ack 1 win 320 <sack 1001:6001,nop,nop>
+ +0 > . 11001:12001(1000) ack 1
+
+ +.01 < . 1:1(0) ack 1 win 320 <sack 1001:7001,nop,nop>
+ +0 > . 12001:13001(1000) ack 1
+
+ +.01 < . 1:1(0) ack 1 win 320 <sack 1001:8001,nop,nop>
+ +0 > . 13001:14001(1000) ack 1
+
+ +.01 < . 1:1(0) ack 1 win 320 <sack 1001:9001,nop,nop>
+ +0 > . 14001:15001(1000) ack 1
+
+ +.01 < . 1:1(0) ack 1 win 320 <sack 1001:10001,nop,nop>
+ +0 > . 15001:16001(1000) ack 1
+
+ +.02 < . 1:1(0) ack 10001 win 320
+ +0 > P. 16001:17001(1000) ack 1
+// Leave fast recovery.
+ +.01 %{
+assert tcpi_ca_state == TCP_CA_Open, tcpi_ca_state
+assert tcpi_snd_cwnd == 7, tcpi_snd_cwnd
+assert tcpi_snd_ssthresh == 7, tcpi_snd_ssthresh
+}%
+
+ +.03 < . 1:1(0) ack 12001 win 320
+ +.02 < . 1:1(0) ack 14001 win 320
+ +.02 < . 1:1(0) ack 16001 win 320
+ +.02 < . 1:1(0) ack 17001 win 320
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost-1_4-11_16.pkt b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost-1_4-11_16.pkt
new file mode 100644
index 000000000000..7842a10b6967
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost-1_4-11_16.pkt
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test PRR-slowstart implementation. The sender sends 20 packets. Packet
+// 1 to 4, and 11 to 16 are dropped.
+`./defaults.sh`
+
+// Establish a connection.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
+ +.01 < . 1:1(0) ack 1 win 320
+ +0 accept(3, ..., ...) = 4
+
+// Write 20 data segments.
+ +0 write(4, ..., 20000) = 20000
+ +0 > P. 1:10001(10000) ack 1
+
+// Receive first DUPACK, entering PRR part
+ +.01 < . 1:1(0) ack 1 win 320 <sack 4001:5001,nop,nop>
+ +0 > . 10001:11001(1000) ack 1
++.002 < . 1:1(0) ack 1 win 320 <sack 4001:6001,nop,nop>
+ +0 > . 11001:12001(1000) ack 1
++.002 < . 1:1(0) ack 1 win 320 <sack 4001:7001,nop,nop>
+ +0 > . 1:1001(1000) ack 1
++.002 < . 1:1(0) ack 1 win 320 <sack 4001:8001,nop,nop>
+ +0 > . 1001:2001(1000) ack 1
++.002 < . 1:1(0) ack 1 win 320 <sack 4001:9001,nop,nop>
+ +0 > . 2001:3001(1000) ack 1
++.002 < . 1:1(0) ack 1 win 320 <sack 4001:10001,nop,nop>
+ +0 > . 3001:4001(1000) ack 1
+// Enter PRR CRB
++.002 < . 1:1(0) ack 1 win 320 <sack 4001:11001,nop,nop>
+ +0 > . 12001:13001(1000) ack 1
++.002 < . 1:1(0) ack 1 win 320 <sack 4001:12001,nop,nop>
+ +0 > . 13001:14001(1000) ack 1
+// Enter PRR slow start
+ +.01 < . 1:1(0) ack 1001 win 320 <sack 4001:12001,nop,nop>
+ +0 > P. 14001:16001(2000) ack 1
++.002 < . 1:1(0) ack 1001 win 320 <sack 2001:12001,nop,nop>
+ +0 > . 1001:2001(1000) ack 1
+ +0 > . 16001:17001(1000) ack 1
+// inflight reaches ssthresh, goes into packet conservation mode
++.002 < . 1:1(0) ack 1001 win 320 <sack 2001:13001,nop,nop>
+ +0 > . 17001:18001(1000) ack 1
++.002 < . 1:1(0) ack 1001 win 320 <sack 2001:14001,nop,nop>
+ +0 > . 18001:19001(1000) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost1_4.pkt b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost1_4.pkt
new file mode 100644
index 000000000000..b66d7644c3b6
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-30pkt-lost1_4.pkt
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test PRR-slowstart implementation. The sender sends 20 packets. Packet
+// 1 to 4 are lost. The sender writes another 10 packets.
+`./defaults.sh`
+
+// Establish a connection.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+
+ +.01 < . 1:1(0) ack 1 win 320
+ +0 accept(3, ..., ...) = 4
+
+// Send 20 data segments.
+ +0 write(4, ..., 10000) = 10000
+ +0 > P. 1:10001(10000) ack 1
+
+// Lost packet 1,2,3,4
+ +.01 < . 1:1(0) ack 1 win 320 <sack 4001:5001,nop,nop>
++.002 < . 1:1(0) ack 1 win 320 <sack 4001:6001,nop,nop>
+ +0 < . 1:1(0) ack 1 win 320 <sack 4001:7001,nop,nop>
+ +0 > . 1:1001(1000) ack 1
+ +0 < . 1:1(0) ack 1 win 320 <sack 4001:8001,nop,nop>
+ +0 > . 1001:2001(1000) ack 1
+ +0 < . 1:1(0) ack 1 win 320 <sack 4001:9001,nop,nop>
+ +0 > . 2001:3001(1000) ack 1
+ +0 < . 1:1(0) ack 1 win 320 <sack 4001:10001,nop,nop>
+ +0 > . 3001:4001(1000) ack 1
+
+// Receiver ACKs all data.
+ +.01 < . 1:1(0) ack 1001 win 320 <sack 4001:10001,nop,nop>
+ +0 < . 1:1(0) ack 2001 win 320 <sack 4001:10001,nop,nop>
+ +0 < . 1:1(0) ack 3001 win 320 <sack 4001:10001,nop,nop>
+ +0 < . 1:1(0) ack 10001 win 320
+
+// Writes another 10 packets, which the ssthresh*mss amount
+// should be sent right away
+ +.01 write(4, ..., 10000) = 10000
+ +0 > . 10001:17001(7000) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-ack-below-snd_una-cubic.pkt b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-ack-below-snd_una-cubic.pkt
new file mode 100644
index 000000000000..8e87bfecabb5
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_fast_recovery_prr-ss-ack-below-snd_una-cubic.pkt
@@ -0,0 +1,41 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test PRR-slowstart implementation.
+// In this variant we verify that the sender uses SACK info on an ACK
+// below snd_una.
+
+// Set up config.
+`./defaults.sh`
+
+// Establish a connection.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 8>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+// RTT 10ms
+ +.01 < . 1:1(0) ack 1 win 320
+ +0 accept(3, ..., ...) = 4
+
+// Send 10 data segments.
+ +0 write(4, ..., 10000) = 10000
+ +0 > P. 1:10001(10000) ack 1
+
+// Lost packet 1:1001,4001:5001,7001:8001.
+ +.01 < . 1:1(0) ack 1 win 320 <sack 1001:2001,nop,nop>
+ +0 < . 1:1(0) ack 1 win 320 <sack 1001:3001,nop,nop>
+ +0 < . 1:1(0) ack 1 win 320 <sack 1001:3001 8001:9001,nop,nop>
+ +0 > . 1:1001(1000) ack 1
+
++.012 < . 1:1(0) ack 4001 win 320 <sack 8001:9001,nop,nop>
+ +0 > . 4001:7001(3000) ack 1
+
+ +0 write(4, ..., 10000) = 10000
+
+// The following ACK was reordered - delayed so that it arrives with
+// an ACK field below snd_una. Here we check that the newly-SACKed
+// 2MSS at 5001:7001 cause us to send out 2 more MSS.
++.002 < . 1:1(0) ack 3001 win 320 <sack 5001:7001,nop,nop>
+ +0 > . 7001:8001(1000) ack 1
+ +0 > . 10001:11001(1000) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-no-sack.pkt b/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-no-sack.pkt
new file mode 100644
index 000000000000..96b01eb5b7a4
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-no-sack.pkt
@@ -0,0 +1,53 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test RFC 3042 "Limited Transmit": "sending a new data segment in
+// response to each of the first two duplicate acknowledgments that
+// arrive at the sender".
+// This variation tests a receiver that doesn't support SACK.
+
+`./defaults.sh`
+
+// Establish a connection.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +.1 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 320
+ +0 accept(3, ..., ...) = 4
+
+// Write some data, and send the initial congestion window.
+ +0 write(4, ..., 15000) = 15000
+ +0 > P. 1:10001(10000) ack 1
+
+// Limited transmit: on first dupack, send a new data segment.
+ +.11 < . 1:1(0) ack 1 win 320
+ +0 > . 10001:11001(1000) ack 1
+
+// Limited transmit: on second dupack, send a new data segment.
+ +.01 < . 1:1(0) ack 1 win 320
+ +0 > . 11001:12001(1000) ack 1
+
+// It turned out to be reordering, not loss.
+// We have one packet newly acked (1001:3001 were DUP-ACK'd)
+// So we revert state back to Open. Slow start cwnd from 10 to 11
+// and send 11 - 9 = 2 packets
+ +.01 < . 1:1(0) ack 3001 win 320
+ +0 > P. 12001:14001(2000) ack 1
+
+ +.02 < . 1:1(0) ack 5001 win 320
+ +0 > P. 14001:15001(1000) ack 1
+
+// Client gradually ACKs all data.
+ +.02 < . 1:1(0) ack 7001 win 320
+ +.02 < . 1:1(0) ack 9001 win 320
+ +.02 < . 1:1(0) ack 11001 win 320
+ +.02 < . 1:1(0) ack 13001 win 320
+ +.02 < . 1:1(0) ack 15001 win 320
+
+// Clean up.
+ +.17 close(4) = 0
+ +0 > F. 15001:15001(0) ack 1
+ +.1 < F. 1:1(0) ack 15002 win 257
+ +0 > . 15002:15002(0) ack 2
diff --git a/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-sack.pkt b/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-sack.pkt
new file mode 100644
index 000000000000..642da51ec3a4
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_limited_transmit_limited-transmit-sack.pkt
@@ -0,0 +1,50 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test RFC 3042 "Limited Transmit": "sending a new data segment in
+// response to each of the first two duplicate acknowledgments that
+// arrive at the sender".
+// This variation tests a receiver that supports SACK.
+
+`./defaults.sh`
+
+// Establish a connection.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +.1 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 320
+ +0 accept(3, ..., ...) = 4
+
+// Write some data, and send the initial congestion window.
+ +0 write(4, ..., 15000) = 15000
+ +0 > P. 1:10001(10000) ack 1
+
+// Limited transmit: on first dupack, send a new data segment.
+ +.11 < . 1:1(0) ack 1 win 320 <sack 1001:2001,nop,nop>
+ +0 > . 10001:11001(1000) ack 1
+
+// Limited transmit: on second dupack, send a new data segment.
+ +.01 < . 1:1(0) ack 1 win 320 <sack 1001:3001,nop,nop>
+ +0 > . 11001:12001(1000) ack 1
+
+// It turned out to be reordering, not loss.
+ +.01 < . 1:1(0) ack 3001 win 320
+ +0 > P. 12001:14001(2000) ack 1
+
+ +.02 < . 1:1(0) ack 5001 win 320
+ +0 > P. 14001:15001(1000) ack 1
+
+// Client gradually ACKs all data.
+ +.02 < . 1:1(0) ack 7001 win 320
+ +.02 < . 1:1(0) ack 9001 win 320
+ +.02 < . 1:1(0) ack 11001 win 320
+ +.02 < . 1:1(0) ack 13001 win 320
+ +.02 < . 1:1(0) ack 15001 win 320
+
+// Clean up.
+ +.17 close(4) = 0
+ +0 > F. 15001:15001(0) ack 1
+ +.1 < F. 1:1(0) ack 15002 win 257
+ +0 > . 15002:15002(0) ack 2
diff --git a/tools/testing/selftests/net/packetdrill/tcp_nagle_https_client.pkt b/tools/testing/selftests/net/packetdrill/tcp_nagle_https_client.pkt
new file mode 100644
index 000000000000..7adae7a9ef4a
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_nagle_https_client.pkt
@@ -0,0 +1,40 @@
+// SPDX-License-Identifier: GPL-2.0
+// This is a test inspired by an Android client app using SSL. This
+// test verifies using TCP_NODELAY would save application latency
+// (Perhaps even better with TCP_NAGLE).
+//
+`./defaults.sh
+ethtool -K tun0 tso off gso off
+./set_sysctls.py /proc/sys/net/ipv4/tcp_timestamps=0`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
+ +0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+ +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0
+
+ +0 connect(4, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+ +0 > S 0:0(0) <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.1 < S. 0:0(0) ack 1 win 5792 <mss 974,nop,nop,sackOK,nop,wscale 7>
+ +0 > . 1:1(0) ack 1
+
+// SSL handshake (resumed session)
+ +0 write(4, ..., 517) = 517
+ +0 > P. 1:518(517) ack 1
+ +.1 < . 1:1(0) ack 518 win 229
+
+ +0 < P. 1:144(143) ack 1 win 229
+ +0 > . 518:518(0) ack 144
+ +0 read(4, ..., 1000) = 143
+
+// Application POST header (51B) and body (2002B)
+ +0 write(4, ..., 51) = 51
+ +0 > P. 518:569(51) ack 144
+ +.03 write(4, ..., 2002) = 2002
+ +0 > . 569:1543(974) ack 144
+ +0 > P. 1543:2517(974) ack 144
+// Without disabling Nagle, this packet will not happen until the remote ACK.
+ +0 > P. 2517:2571(54) ack 144
+
+ +.1 < . 1:1(0) ack 2571 win 229
+
+// Reset sysctls
+`/tmp/sysctl_restore_${PPID}.sh`
diff --git a/tools/testing/selftests/net/packetdrill/tcp_nagle_sendmsg_msg_more.pkt b/tools/testing/selftests/net/packetdrill/tcp_nagle_sendmsg_msg_more.pkt
new file mode 100644
index 000000000000..fa9c01813996
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_nagle_sendmsg_msg_more.pkt
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test the MSG_MORE flag will correctly corks the tiny writes
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8>
+ +.01 < . 1:1(0) ack 1 win 257
+ +0 accept(3, ..., ...) = 4
+// Disable Nagle by default on this socket.
+ +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0
+
+// Test the basic case: MSG_MORE overwrites TCP_NODELAY and enables Nagle.
+ +0 sendmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{..., 40}], msg_flags=0}, MSG_MORE) = 40
+ +.21~+.215 > P. 1:41(40) ack 1
+ +.01 < . 1:1(0) ack 41 win 257
+
+// Test unsetting MSG_MORE releases the packet
+ +0 sendmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{..., 100}], msg_flags=0}, MSG_MORE) = 100
++.005 sendmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{..., 160}], msg_flags=0}, MSG_MORE) = 160
+ +.01 sendmsg(4, {msg_name(...)=...,
+ msg_iov(3)=[{..., 100}, {..., 200}, {..., 195}],
+ msg_flags=0}, MSG_MORE) = 495
++.008 sendmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{..., 5}], msg_flags=0}, 0) = 5
+ +0 > P. 41:801(760) ack 1
+ +.02 < . 1:1(0) ack 801 win 257
+
+
+// Test >MSS write will unleash MSS packets but hold on the remaining data.
+ +.1 sendmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{..., 3100}], msg_flags=0}, MSG_MORE) = 3100
+ +0 > . 801:3801(3000) ack 1
++.003 sendmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{..., 50}], msg_flags=0}, MSG_MORE) = 50
+
+ +.01 < . 1:1(0) ack 2801 win 257
+// Err... we relase the remaining right after the ACK? note that PUSH is reset
+ +0 > . 3801:3951(150) ack 1
+
+// Test we'll hold on the subsequent writes when inflight (3801:3951) > 0
++.001 sendmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{..., 1}], msg_flags=0}, MSG_MORE) = 1
++.002 sendmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{..., 2}], msg_flags=0}, MSG_MORE) = 2
++.003 sendmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{..., 3}], msg_flags=0}, MSG_MORE) = 3
++.004 sendmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{..., 4}], msg_flags=0}, MSG_MORE) = 4
+ +.02 < . 1:1(0) ack 3951 win 257
+ +0 > . 3951:3961(10) ack 1
+ +.02 < . 1:1(0) ack 3961 win 257
+
+
+// Test the case a MSG_MORE send followed by a write flushes the data
+ +0 sendmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{..., 20}], msg_flags=0}, MSG_MORE) = 20
+ +.05 write(4, ..., 20) = 20
+ +0 > P. 3961:4001(40) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_nagle_sockopt_cork_nodelay.pkt b/tools/testing/selftests/net/packetdrill/tcp_nagle_sockopt_cork_nodelay.pkt
new file mode 100644
index 000000000000..0ddec5f7dc1a
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_nagle_sockopt_cork_nodelay.pkt
@@ -0,0 +1,43 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test TCP_CORK and TCP_NODELAY sockopt behavior
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8>
+ +.01 < . 1:1(0) ack 1 win 257
+ +0 accept(3, ..., ...) = 4
+// Set TCP_CORK sockopt to hold small packets
+ +0 setsockopt(4, SOL_TCP, TCP_CORK, [1], 4) = 0
+
+ +0 write(4, ..., 40) = 40
+ +.05 write(4, ..., 40) = 40
+
+// Unset TCP_CORK should push pending bytes out
+ +.01 setsockopt(4, SOL_TCP, TCP_CORK, [0], 4) = 0
+ +0 > P. 1:81(80) ack 1
+ +.01 < . 1:1(0) ack 81 win 257
+
+// Set TCP_CORK sockopt to hold small packets
+ +0 setsockopt(4, SOL_TCP, TCP_CORK, [1], 4) = 0
+
+ +0 write(4, ..., 40) = 40
+ +.05 write(4, ..., 40) = 40
+
+// Set TCP_NODELAY sockopt should push pending bytes out
+ +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0
+ +0 > P. 81:161(80) ack 1
+ +.01 < . 1:1(0) ack 161 win 257
+
+// Set MSG_MORE to hold small packets
+ +0 send(4, ..., 40, MSG_MORE) = 40
+ +.05 send(4, ..., 40, MSG_MORE) = 40
+
+// Set TCP_NODELAY sockopt should push pending bytes out
+ +.01 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0
+ +0 > . 161:241(80) ack 1
+ +.01 < . 1:1(0) ack 241 win 257
diff --git a/tools/testing/selftests/net/packetdrill/tcp_sack_sack-route-refresh-ip-tos.pkt b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-route-refresh-ip-tos.pkt
new file mode 100644
index 000000000000..310ef31518da
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-route-refresh-ip-tos.pkt
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+// Verify that setsockopt calls that force a route refresh do not
+// cause problems matching SACKs with packets in the write queue.
+// This variant tests IP_TOS.
+
+`./defaults.sh`
+
+// Establish a connection.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_IP, IP_MTU_DISCOVER, [IP_PMTUDISC_DONT], 1) = 0
+ +0...0.010 connect(3, ..., ...) = 0
+
+ +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
+ +.01 < S. 0:0(0) ack 1 win 65535 <mss 1460,nop,wscale 2,nop,nop,sackOK>
+ +0 > . 1:1(0) ack 1
+
+ +.01 write(3, ..., 5840) = 5840
+ +0 > P. 1:5841(5840) ack 1
+ +.01 < . 1:1(0) ack 5841 win 65535
+
+ +.01 write(3, ..., 5840) = 5840
+ +0 > P. 5841:11681(5840) ack 1
+ +.01 < . 1:1(0) ack 11681 win 65535
+
+ +.01 write(3, ..., 14600) = 14600
+ +0 > P. 11681:26281(14600) ack 1
+
+// Try the socket option that we know can force a route refresh.
+ +0 setsockopt(3, SOL_IP, IP_TOS, [4], 1) = 0
+// Then revert to avoid routing/mangling/etc implications of that setting.
+ +0 setsockopt(3, SOL_IP, IP_TOS, [0], 1) = 0
+
+// Verify that we do not retransmit the SACKed segments.
+ +.01 < . 1:1(0) ack 13141 win 65535 <sack 16061:17521 20441:26281,nop,nop>
+ +0 > . 13141:16061(2920) ack 1
+ +0 > P. 17521:20441(2920) ack 1
+ +.01 < . 1:1(0) ack 26281 win 65535
diff --git a/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-2-6-8-3-9-nofack.pkt b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-2-6-8-3-9-nofack.pkt
new file mode 100644
index 000000000000..f185e1ac57ea
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-2-6-8-3-9-nofack.pkt
@@ -0,0 +1,64 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test shifting of newly-SACKed ranges onto the previous already-SACKed skb.
+// This variant tests non-FACK SACK with SACKs coming in the order
+// 2 6 8 3 9, to test what happens when we get a new SACKed range
+// (for packet 3) that is on the right of an existing SACKed range
+// (for packet 2).
+
+`./defaults.sh`
+
+// Establish a connection and send 10 MSS.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 1024
+ +0 accept(3, ..., ...) = 4
+
+ +0 write(4, ..., 10000) = 10000
+ +0 > P. 1:10001(10000) ack 1
+
+ +.1 < . 1:1(0) ack 1 win 257 <sack 2001:3001,nop,nop>
++.001 < . 1:1(0) ack 1 win 257 <sack 2001:3001 6001:7001,nop,nop>
++.001 < . 1:1(0) ack 1 win 257 <sack 2001:3001 6001:7001 8001:9001,nop,nop>
+
+// 3 SACKed packets, so we enter Fast Recovery.
+ +0 > . 1:1001(1000) ack 1
+ +0 %{ assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state }%
+ +0 %{ assert tcpi_lost == 6, tcpi_lost }%
+
+// SACK for 3001:4001.
+// This SACK for an adjacent range causes the sender to
+// shift the newly-SACKed range onto the previous skb.
++.007 < . 1:1(0) ack 1 win 257 <sack 2001:4001 6001:7001 8001:9001,nop,nop>
+ +0 > . 1001:2001(1000) ack 1
+ +0 %{ assert tcpi_lost == 5, tcpi_lost }%
+ +0 %{ assert tcpi_reordering == 6, tcpi_reordering }% // 8001:9001 -> 3001:4001 is 6
+
+// SACK for 9001:10001.
+ +.01 < . 1:1(0) ack 1 win 257 <sack 2001:4001 6001:7001 8001:10001,nop,nop>
+ +0 %{ assert tcpi_lost == 5, tcpi_lost }%
+
+// ACK for 1:1001 as packets from t=0.303 arrive.
++.083 < . 1:1(0) ack 1001 win 257 <sack 2001:4001 6001:7001 8001:10001,nop,nop>
+ +0 %{ assert tcpi_lost == 4,tcpi_lost }%
+
+// ACK for 1:4001 as packets from t=0.310 arrive.
++.017 < . 1:1(0) ack 4001 win 257 <sack 6001:7001 8001:10001,nop,nop>
+ +0 %{ assert tcpi_lost == 3,tcpi_lost }%
+
+// ACK for 1:7001 as packets from t=0.320 arrive.
+ +.01 < . 1:1(0) ack 7001 win 257 <sack 8001:10001,nop,nop>
+
+// ACK for all data as packets from t=0.403 arrive.
+ +.1 < . 1:1(0) ack 10001 win 257
+ +0 %{
+assert tcpi_ca_state == TCP_CA_Open, tcpi_ca_state
+assert tcpi_unacked == 0, tcpi_unacked
+assert tcpi_sacked == 0, tcpi_sacked
+assert tcpi_lost == 0, tcpi_lost
+assert tcpi_retrans == 0, tcpi_retrans
+}%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-3-4-8-9-fack.pkt b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-3-4-8-9-fack.pkt
new file mode 100644
index 000000000000..0093b4973934
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-3-4-8-9-fack.pkt
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test shifting of newly-SACKed ranges onto the previous already-SACKed skb.
+// This variant tests the case where we mark packets 0-4 lost, then
+// get a SACK for 3, and then a SACK for 4.
+
+`./defaults.sh`
+
+// Establish a connection and send 10 MSS.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 1024
+ +0 accept(3, ..., ...) = 4
+
+ +0 write(4, ..., 10000) = 10000
+ +0 > P. 1:10001(10000) ack 1
+
+// SACK for 7001:8001. Using RACK we delay the fast retransmit.
+ +.1 < . 1:1(0) ack 1 win 257 <sack 7001:8001,nop,nop>
+// RACK reordering timer
++.027 > . 1:1001(1000) ack 1
+ +0 %{
+assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state
+assert tcpi_lost == 7, tcpi_lost # RACK thinks 1:7001 are lost
+assert tcpi_reordering == 3, tcpi_reordering
+}%
+
+// SACK for 3001:4001.
++.002 < . 1:1(0) ack 1 win 257 <sack 3001:4001 7001:8001,nop,nop>
+ +0 > . 1001:2001(1000) ack 1
+ +0 %{
+assert tcpi_lost == 6, tcpi_lost # since 3001:4001 is no longer lost
+assert tcpi_reordering == 5, tcpi_reordering # 7001:8001 -> 3001:4001
+}%
+
+// SACK for 4001:5001.
+// This SACK for an adjacent range causes the sender to
+// shift the newly-SACKed range onto the previous skb.
+// It uses the RFC3517 algorithm to mark 1:3001 lost
+// because >=3 higher-sequence packets are SACKed.
++.002 < . 1:1(0) ack 1 win 257 <sack 3001:5001 7001:8001,nop,nop>
+ +0 > . 2001:3001(1000) ack 1
+ +0 %{
+assert tcpi_lost == 5,tcpi_lost # SACK/RFC3517 thinks 1:3001 are lost
+}%
+
+// SACK for 8001:9001.
++.002 < . 1:1(0) ack 1 win 257 <sack 3001:5001 7001:9001,nop,nop>
+
+// SACK for 9001:10001.
++.002 < . 1:1(0) ack 1 win 257 <sack 3001:5001 7001:10001,nop,nop>
+ +0 > . 5001:6001(1000) ack 1
+
+// To simplify clean-up, say we get an ACK for all data.
+ +.1 < . 1:1(0) ack 10001 win 257
+ +0 %{
+assert tcpi_ca_state == TCP_CA_Open, tcpi_ca_state
+assert tcpi_unacked == 0, tcpi_unacked
+assert tcpi_sacked == 0, tcpi_sacked
+assert tcpi_lost == 0, tcpi_lost
+assert tcpi_retrans == 0, tcpi_retrans
+}%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-5-6-8-9-fack.pkt b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-5-6-8-9-fack.pkt
new file mode 100644
index 000000000000..980a832dc81c
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_sack_sack-shift-sacked-7-5-6-8-9-fack.pkt
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test shifting of newly-SACKed ranges onto the previous already-SACKed skb.
+// This variant tests the case where we mark packets 0-4 lost, then
+// get a SACK for 5, and then a SACK for 6.
+
+`./defaults.sh`
+
+// Establish a connection and send 10 MSS.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.1 < . 1:1(0) ack 1 win 1024
+ +0 accept(3, ..., ...) = 4
+
+ +0 write(4, ..., 10000) = 10000
+ +0 > P. 1:10001(10000) ack 1
+
+// SACK for 7001:8001. Using RACK we delay a fast retransmit.
+ +.1 < . 1:1(0) ack 1 win 257 <sack 7001:8001,nop,nop>
++.027 > . 1:1001(1000) ack 1
+ +0 %{
+assert tcpi_ca_state == TCP_CA_Recovery, tcpi_ca_state
+assert tcpi_lost == 7,tcpi_lost # RACK thinks 1:7001 are lost
+assert tcpi_reordering == 3, tcpi_reordering
+}%
+
+// SACK for 5001:6001.
+ +0 < . 1:1(0) ack 1 win 257 <sack 5001:6001 7001:8001,nop,nop>
+ +0 > . 1001:2001(1000) ack 1
+ +0 %{
+assert tcpi_lost == 6, tcpi_lost
+assert tcpi_reordering == 3, tcpi_reordering # 7001:8001 -> 5001:6001 is 3
+}%
+
+// SACK for 6001:7001.
+// This SACK for an adjacent range causes the sender to
+// shift the newly-SACKed range onto the previous skb.
+ +0 < . 1:1(0) ack 1 win 257 <sack 5001:8001,nop,nop>
+ +0 > . 2001:3001(1000) ack 1
+ +0 %{ assert tcpi_lost == 5, tcpi_lost }%
+
+// SACK for 8001:9001.
+ +0 < . 1:1(0) ack 1 win 257 <sack 5001:9001,nop,nop>
+ +0 > . 3001:4001(1000) ack 1
+
+// SACK for 9001:10001.
+ +0 < . 1:1(0) ack 1 win 257 <sack 5001:10001,nop,nop>
+ +0 > . 4001:5001(1000) ack 1
+
+// To simplify clean-up, say we get an ACK for all data.
+ +.1 < . 1:1(0) ack 10001 win 257
+ +0 %{
+assert tcpi_ca_state == TCP_CA_Open, tcpi_ca_state
+assert tcpi_unacked == 0, tcpi_unacked
+assert tcpi_sacked == 0, tcpi_sacked
+assert tcpi_lost == 0, tcpi_lost
+assert tcpi_retrans == 0, tcpi_retrans
+}%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_sendfile_sendfile-simple.pkt b/tools/testing/selftests/net/packetdrill/tcp_sendfile_sendfile-simple.pkt
new file mode 100644
index 000000000000..6740859a1360
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_sendfile_sendfile-simple.pkt
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+// Simplest possible test of open() and then sendfile().
+// We write some zeroes into a file (since packetdrill expects payloads
+// to be all zeroes) and then open() the file, then use sendfile()
+// and verify that the correct number of zeroes goes out.
+
+`./defaults.sh
+/bin/rm -f /tmp/testfile
+/bin/dd bs=1 count=5 if=/dev/zero of=/tmp/testfile status=none
+`
+
+// Initialize connection
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +0 < . 1:1(0) ack 1 win 514
+
+ +0 accept(3, ..., ...) = 4
+
+ +0 open("/tmp/testfile", O_RDONLY) = 5
+ +0 sendfile(4, 5, [0], 5) = 5
+ +0 > P. 1:6(5) ack 1
diff --git a/tools/testing/selftests/net/packetdrill/tcp_splice_tcp_splice_loop_test.pkt b/tools/testing/selftests/net/packetdrill/tcp_splice_tcp_splice_loop_test.pkt
new file mode 100644
index 000000000000..0cbd43253236
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_splice_tcp_splice_loop_test.pkt
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+`./defaults.sh`
+
+// Initialize a server socket
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 setsockopt(3, SOL_IP, IP_FREEBIND, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+// Connection should get accepted
+ +0 < S 0:0(0) win 32972 <mss 1460,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <...>
+ +0 < . 1:1(0) ack 1 win 257
+ +0 accept(3, ..., ...) = 4
+
+ +0 pipe([5, 6]) = 0
+ +0 < U. 1:101(100) ack 1 win 257 urg 100
+ +0 splice(4, NULL, 6, NULL, 99, 0) = 99
+ +0 splice(4, NULL, 6, NULL, 1, 0) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_fastopen-invalid-buf-ptr.pkt b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_fastopen-invalid-buf-ptr.pkt
new file mode 100644
index 000000000000..8940726a3ec2
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_fastopen-invalid-buf-ptr.pkt
@@ -0,0 +1,42 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test TCP fastopen behavior with NULL as buffer pointer, but a non-zero
+// buffer length.
+`./defaults.sh
+./set_sysctls.py /proc/sys/net/ipv4/tcp_timestamps=0`
+
+// Cache warmup: send a Fast Open cookie request
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
++0 setsockopt(3, SOL_TCP, TCP_FASTOPEN_CONNECT, [1], 4) = 0
++0 connect(3, ..., ...) = -1 EINPROGRESS (Operation is now in progress)
++0 > S 0:0(0) <mss 1460,nop,nop,sackOK,nop,wscale 8,FO,nop,nop>
++0 < S. 123:123(0) ack 1 win 14600 <mss 1460,nop,nop,sackOK,nop,wscale 6,FO abcd1234,nop,nop>
++0 > . 1:1(0) ack 1
++0 close(3) = 0
++0 > F. 1:1(0) ack 1
++0 < F. 1:1(0) ack 2 win 92
++0 > . 2:2(0) ack 2
+
+// Test with MSG_FASTOPEN without TCP_FASTOPEN_CONNECT.
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 4
++0 fcntl(4, F_SETFL, O_RDWR|O_NONBLOCK) = 0
++0 sendto(4, NULL, 1, MSG_FASTOPEN, ..., ...) = -1
++0 close(4) = 0
+
+// Test with TCP_FASTOPEN_CONNECT without MSG_FASTOPEN.
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 5
++0 fcntl(5, F_SETFL, O_RDWR|O_NONBLOCK) = 0
++0 setsockopt(5, SOL_TCP, TCP_FASTOPEN_CONNECT, [1], 4) = 0
++0 connect(5, ..., ...) = 0
++0 sendto(5, NULL, 1, 0, ..., ...) = -1
++0 close(5) = 0
+
+// Test with both TCP_FASTOPEN_CONNECT and MSG_FASTOPEN.
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 6
++0 fcntl(6, F_SETFL, O_RDWR|O_NONBLOCK) = 0
++0 setsockopt(6, SOL_TCP, TCP_FASTOPEN_CONNECT, [1], 4) = 0
++0 connect(6, ..., ...) = 0
++0 sendto(6, NULL, 1, MSG_FASTOPEN, ..., ...) = -1
++0 close(6) = 0
+
+`/tmp/sysctl_restore_${PPID}.sh`
diff --git a/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt
new file mode 100644
index 000000000000..b2b2cdf27e20
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_sendmsg-empty-iov.pkt
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test that we correctly skip zero-length IOVs.
+`./defaults.sh`
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_ZEROCOPY, [1], 4) = 0
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,wscale 8>
+ +.01 < . 1:1(0) ack 1 win 257
+ +0 accept(3, ..., ...) = 4
+ +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0
+
+ +0 sendmsg(4, {msg_name(...)=...,
+ msg_iov(4)=[{..., 0}, {..., 40}, {..., 0}, {..., 20}],
+ msg_flags=0}, 0) = 60
+ +0 > P. 1:61(60) ack 1
+ +.01 < . 1:1(0) ack 61 win 257
+
+ +0 sendmsg(4, {msg_name(...)=...,
+ msg_iov(4)=[{..., 0}, {..., 0}, {..., 0}, {..., 0}],
+ msg_flags=0}, MSG_ZEROCOPY) = 0
+
+ +0 sendmsg(4, {msg_name(...)=...,
+ msg_iov(4)=[{..., 0}, {..., 10}, {..., 0}, {..., 50}],
+ msg_flags=0}, MSG_ZEROCOPY) = 60
+ +0 > P. 61:121(60) ack 1
+ +.01 < . 1:1(0) ack 121 win 257
diff --git a/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_syscall-invalid-buf-ptr.pkt b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_syscall-invalid-buf-ptr.pkt
new file mode 100644
index 000000000000..59f5903f285c
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_syscall_bad_arg_syscall-invalid-buf-ptr.pkt
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test kernel behavior with NULL as buffer pointer
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.2 < . 1:1(0) ack 1 win 514
+
+ +0 accept(3, ..., ...) = 4
+
+ +0 write(4, NULL, 1000) = -1 EFAULT (Bad address)
+ +0 send(4, NULL, 1000, 0) = -1 EFAULT (Bad address)
+ +0 sendto(4, NULL, 1000, 0, ..., ...) = -1 EFAULT (Bad address)
+
+ +0 < . 1:1001(1000) ack 1 win 200
+ +0 read(4, NULL, 1000) = -1 EFAULT (Bad address)
+ +0 recv(4, NULL, 1000, 0) = -1 EFAULT (Bad address)
+ +0 recvfrom(4, NULL, 1000, 0, ..., ...) = -1 EFAULT (Bad address)
diff --git a/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-last_data_recv.pkt b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-last_data_recv.pkt
new file mode 100644
index 000000000000..d7fdb43a8e89
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-last_data_recv.pkt
@@ -0,0 +1,20 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test tcpi_last_data_recv for active session
+`./defaults.sh`
+
+// Create a socket and set it to non-blocking.
++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
++0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR)
++0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+
++0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
++0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
++.030 < S. 0:0(0) ack 1 win 10000 <mss 1000,sackOK,nop,nop,nop,wscale 8>
++0 > . 1:1(0) ack 1
+
++1 %{ assert 990 <= tcpi_last_data_recv <= 1010, tcpi_last_data_recv }%
+
++0 < . 1:1001(1000) ack 1 win 300
++0 > . 1:1(0) ack 1001
+
++0 %{ assert tcpi_last_data_recv <= 10, tcpi_last_data_recv }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-rwnd-limited.pkt b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-rwnd-limited.pkt
new file mode 100644
index 000000000000..a9bcd46f6cb6
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-rwnd-limited.pkt
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test rwnd limited time in tcp_info for client side.
+
+`./defaults.sh`
+
+// Create a socket and set it to non-blocking.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR)
+ +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+
+ +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+ +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
+
+// Server advertises 0 receive window.
+ +.01 < S. 0:0(0) ack 1 win 0 <mss 1000,nop,nop,sackOK>
+
+ +0 > . 1:1(0) ack 1
+ +0 getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0
+ +0 fcntl(3, F_SETFL, O_RDWR) = 0 // set back to blocking
+
+// Make sure that initial rwnd limited time is 0.
+ +0 %{ assert tcpi_rwnd_limited == 0, tcpi_rwnd_limited }%
+
+// Receive window limited time starts here.
+ +0 write(3, ..., 1000) = 1000
+
+// Check that rwnd limited time in tcp_info is around 0.1s.
+ +.1 %{ assert 98000 <= tcpi_rwnd_limited <= 110000, tcpi_rwnd_limited }%
+
+// Server opens the receive window.
+ +.1 < . 1:1(0) ack 1 win 2000
+
+// Check that rwnd limited time in tcp_info is around 0.2s.
+ +0 %{ assert 198000 <= tcpi_rwnd_limited <= 210000, tcpi_rwnd_limited }%
+
+ +0 > P. 1:1001(1000) ack 1
+
+// Server advertises a very small receive window.
+ +.03 < . 1:1(0) ack 1001 win 10
+
+// Receive window limited time starts again.
+ +0 write(3, ..., 1000) = 1000
+
+// Server opens the receive window again.
+ +.1 < . 1:1(0) ack 1001 win 2000
+// Check that rwnd limited time in tcp_info is around 0.3s
+// and busy time is 0.3 + 0.03 (server opened small window temporarily).
+ +0 %{ assert 298000 <= tcpi_rwnd_limited <= 310000, tcpi_rwnd_limited;\
+ assert 328000 <= tcpi_busy_time <= 340000, tcpi_busy_time;\
+}%
+
+ +0 > P. 1001:2001(1000) ack 1
+ +.02 < . 1:1(0) ack 2001 win 2000
+ +0 %{ assert 348000 <= tcpi_busy_time <= 360000, tcpi_busy_time }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-sndbuf-limited.pkt b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-sndbuf-limited.pkt
new file mode 100644
index 000000000000..f0de2acd0f8e
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_tcp_info_tcp-info-sndbuf-limited.pkt
@@ -0,0 +1,38 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test send-buffer-limited time in tcp_info for client side.
+`./defaults.sh`
+
+// Create a socket and set it to non-blocking.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR)
+ +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+
+ +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+ +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
+ +.01 < S. 0:0(0) ack 1 win 10000 <mss 1000,sackOK,nop,nop,nop,wscale 8>
+ +0 > . 1:1(0) ack 1
+ +0 getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0
+ +0 fcntl(3, F_SETFL, O_RDWR) = 0 // set back to blocking
+ +0 setsockopt(3, SOL_SOCKET, SO_SNDBUF, [10000], 4) = 0
+ +0 getsockopt(3, SOL_SOCKET, SO_SNDBUF, [20000], [4]) = 0
+
+ +.09...0.14 write(3, ..., 150000) = 150000
+
+ +.01 < . 1:1(0) ack 10001 win 10000
+
+ +.01 < . 1:1(0) ack 30001 win 10000
+
+// cwnd goes from 40(60KB) to 80(120KB), and that we hit the tiny sndbuf limit 10KB
+ +.01 < . 1:1(0) ack 70001 win 10000
+
+ +.02 < . 1:1(0) ack 95001 win 10000
+ +0 %{ assert 19000 <= tcpi_sndbuf_limited <= 21000, tcpi_sndbuf_limited; \
+ assert 49000 <= tcpi_busy_time <= 52000, tcpi_busy_time; \
+ assert 0 == tcpi_rwnd_limited, tcpi_rwnd_limited }%
+
+// This ack frees up enough buffer so we are no longer
+// buffer limited (socket flag SOCK_NOSPACE is cleared)
+ +.02 < . 1:1(0) ack 150001 win 10000
+ +0 %{ assert 19000 <= tcpi_sndbuf_limited <= 21000, tcpi_sndbuf_limited;\
+ assert 69000 <= tcpi_busy_time <= 73000, tcpi_busy_time;\
+ assert 0 == tcpi_rwnd_limited, tcpi_rwnd_limited }%
diff --git a/tools/testing/selftests/net/packetdrill/tcp_timestamping_client-only-last-byte.pkt b/tools/testing/selftests/net/packetdrill/tcp_timestamping_client-only-last-byte.pkt
new file mode 100644
index 000000000000..2087ec0c746a
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_timestamping_client-only-last-byte.pkt
@@ -0,0 +1,92 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test that tx timestamping sends timestamps only for
+// the last byte of each sendmsg.
+`./defaults.sh
+`
+
+// Create a socket and set it to non-blocking.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR)
+ +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+
+// Establish connection and verify that there was no error.
+ +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+ +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
+ +.01 < S. 0:0(0) ack 1 win 20000 <mss 1000,nop,nop,sackOK>
+ +0 > . 1:1(0) ack 1
+ +0 getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0
+ +0 fcntl(3, F_SETFL, O_RDWR) = 0 // set back to blocking
+
+ +0 setsockopt(3, SOL_SOCKET, SO_TIMESTAMPING,
+ [SOF_TIMESTAMPING_TX_SCHED | SOF_TIMESTAMPING_TX_SOFTWARE |
+ SOF_TIMESTAMPING_TX_ACK | SOF_TIMESTAMPING_SOFTWARE |
+ SOF_TIMESTAMPING_OPT_ID], 4) = 0
+
+ +0 write(3, ..., 11000) = 11000
+ +0 > P. 1:10001(10000) ack 1
+ +.01 < . 1:1(0) ack 10001 win 4000
+ +0 > P. 10001:11001(1000) ack 1
+ +.01 < . 1:1(0) ack 11001 win 4000
+
+// Make sure that internal TCP timestamps are not overwritten and we have sane
+// RTT measurement.
+ +0 %{
+assert 5000 <= tcpi_rtt <= 20000, 'srtt=%d us' % tcpi_rtt
+}%
+
+// SCM_TSTAMP_SCHED for the last byte should be received almost immediately
+// once 10001 is acked at t=20ms.
+// setsockopt(..., [SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_OPT_ID], ...)
+// is called after when SYN is acked. So, we expect the last byte of the first
+// chunk to have a timestamp key of 10999 (i.e., 11000 - 1).
+ +0 recvmsg(3, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE|MSG_TRUNC,
+ msg_control=[
+ {cmsg_level=SOL_SOCKET,
+ cmsg_type=SCM_TIMESTAMPING,
+ cmsg_data={scm_sec=0,scm_nsec=20000000}},
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=ENOMSG,
+ ee_origin=SO_EE_ORIGIN_TIMESTAMPING,
+ ee_type=0,
+ ee_code=0,
+ ee_info=SCM_TSTAMP_SCHED,
+ ee_data=10999}}
+ ]}, MSG_ERRQUEUE) = 0
+// SCM_TSTAMP_SND for the last byte should be received almost immediately
+// once 10001 is acked at t=20ms.
+ +0 recvmsg(3, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE|MSG_TRUNC,
+ msg_control=[
+ {cmsg_level=SOL_SOCKET,
+ cmsg_type=SCM_TIMESTAMPING,
+ cmsg_data={scm_sec=0,scm_nsec=20000000}},
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=ENOMSG,
+ ee_origin=SO_EE_ORIGIN_TIMESTAMPING,
+ ee_type=0,
+ ee_code=0,
+ ee_info=SCM_TSTAMP_SND,
+ ee_data=10999}}
+ ]}, MSG_ERRQUEUE) = 0
+// SCM_TSTAMP_ACK for the last byte should be received at t=30ms.
+ +0 recvmsg(3, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE|MSG_TRUNC,
+ msg_control=[
+ {cmsg_level=SOL_SOCKET,
+ cmsg_type=SCM_TIMESTAMPING,
+ cmsg_data={scm_sec=0,scm_nsec=30000000}},
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=ENOMSG,
+ ee_origin=SO_EE_ORIGIN_TIMESTAMPING,
+ ee_type=0,
+ ee_code=0,
+ ee_info=SCM_TSTAMP_ACK,
+ ee_data=10999}}
+ ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_timestamping_partial.pkt b/tools/testing/selftests/net/packetdrill/tcp_timestamping_partial.pkt
new file mode 100644
index 000000000000..876024a31110
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_timestamping_partial.pkt
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test tx timestamping for partial writes (IPv4).
+`./defaults.sh
+`
+
+// Create a socket and set it to non-blocking.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 fcntl(3, F_GETFL) = 0x2 (flags O_RDWR)
+ +0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0
+
+// Establish connection and verify that there was no error.
+ +0 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress)
+ +0 > S 0:0(0) <mss 1460,sackOK,TS val 100 ecr 0,nop,wscale 8>
+ +.01 < S. 0:0(0) ack 1 win 2000 <mss 1000,sackOK,TS val 700 ecr 100,nop,wscale 7>
+ +0 > . 1:1(0) ack 1 <nop,nop,TS val 200 ecr 700>
+ +0 getsockopt(3, SOL_SOCKET, SO_ERROR, [0], [4]) = 0
+
+ +0 setsockopt(3, SOL_SOCKET, SO_SNDBUF, [1000], 4) = 0
+ +0 setsockopt(3, SOL_SOCKET, SO_TIMESTAMPING,
+ [SOF_TIMESTAMPING_TX_SCHED | SOF_TIMESTAMPING_TX_SOFTWARE |
+ SOF_TIMESTAMPING_TX_ACK | SOF_TIMESTAMPING_SOFTWARE |
+ SOF_TIMESTAMPING_OPT_ID], 4) = 0
+
+// We have a partial write.
+ +0 write(3, ..., 10000) = 2964
+ +0 > . 1:989(988) ack 1 <nop,nop,TS val 110 ecr 700>
+ +0 > P. 989:1977(988) ack 1 <nop,nop,TS val 110 ecr 700>
+ +.01 < . 1:1(0) ack 1977 win 92 <nop,nop,TS val 800 ecr 200>
+ +0 > P. 1977:2965(988) ack 1 <nop,nop,TS val 114 ecr 800>
+ +.01 < . 1:1(0) ack 2965 win 92 <nop,nop,TS val 800 ecr 200>
+
+// Make sure that internal TCP timestamps are not overwritten and we have sane
+// RTT measurement.
+ +0 %{
+assert 5000 <= tcpi_rtt <= 20000, 'srtt=%d us' % tcpi_rtt
+}%
+
+// SCM_TSTAMP_SCHED for the first chunk should be received almost immediately
+// after the first ack at t=20ms.
+ +0 recvmsg(3, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE|MSG_TRUNC,
+ msg_control=[
+ {cmsg_level=SOL_SOCKET,
+ cmsg_type=SCM_TIMESTAMPING,
+ cmsg_data={scm_sec=0,scm_nsec=20000000}},
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=ENOMSG,
+ ee_origin=SO_EE_ORIGIN_TIMESTAMPING,
+ ee_type=0,
+ ee_code=0,
+ ee_info=SCM_TSTAMP_SCHED,
+ ee_data=2963}}
+ ]}, MSG_ERRQUEUE) = 0
+// SCM_TSTAMP_SND for the first chunk should be received almost immediately
+// after the first ack at t=20ms.
+ +0 recvmsg(3, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE|MSG_TRUNC,
+ msg_control=[
+ {cmsg_level=SOL_SOCKET,
+ cmsg_type=SCM_TIMESTAMPING,
+ cmsg_data={scm_sec=0,scm_nsec=20000000}},
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=ENOMSG,
+ ee_origin=SO_EE_ORIGIN_TIMESTAMPING,
+ ee_type=0,
+ ee_code=0,
+ ee_info=SCM_TSTAMP_SND,
+ ee_data=2963}}
+ ]}, MSG_ERRQUEUE) = 0
+// SCM_TSTAMP_ACK for the first chunk should be received after the last ack at
+// t=30ms.
+ +0 recvmsg(3, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE|MSG_TRUNC,
+ msg_control=[
+ {cmsg_level=SOL_SOCKET,
+ cmsg_type=SCM_TIMESTAMPING,
+ cmsg_data={scm_sec=0,scm_nsec=30000000}},
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=ENOMSG,
+ ee_origin=SO_EE_ORIGIN_TIMESTAMPING,
+ ee_type=0,
+ ee_code=0,
+ ee_info=SCM_TSTAMP_ACK,
+ ee_data=2963}}
+ ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_timestamping_server.pkt b/tools/testing/selftests/net/packetdrill/tcp_timestamping_server.pkt
new file mode 100644
index 000000000000..84d94780e6be
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_timestamping_server.pkt
@@ -0,0 +1,145 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test tx timestamping for server-side (IPv4).
+`./defaults.sh
+`
+
+// Initialize connection
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop,nop,wscale 10>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK,nop,wscale 8>
+ +.01 < . 1:1(0) ack 1 win 514
+
+ +0 accept(3, ..., ...) = 4
+ +0 setsockopt(4, SOL_SOCKET, SO_TIMESTAMPING,
+ [SOF_TIMESTAMPING_TX_SCHED | SOF_TIMESTAMPING_TX_SOFTWARE |
+ SOF_TIMESTAMPING_TX_ACK | SOF_TIMESTAMPING_SOFTWARE |
+ SOF_TIMESTAMPING_OPT_ID], 4) = 0
+
+// Write two 2KB chunks.
+// setsockopt(..., [SOF_TIMESTAMPING_SOFTWARE | SOF_TIMESTAMPING_OPT_ID], ...)
+// is called after when SYN is acked. So, we expect the last byte of the first
+// and the second chunks to have timestamp keys of 1999 (i.e., 2000 - 1) and
+// 3999 (i.e., 4000 - 1) respectively.
+ +0 write(4, ..., 2000) = 2000
+ +0 write(4, ..., 2000) = 2000
+ +0 > P. 1:2001(2000) ack 1
+ +0 > P. 2001:4001(2000) ack 1
+ +.01 < . 1:1(0) ack 2001 win 514
+ +.01 < . 1:1(0) ack 4001 win 514
+
+// Make sure that internal TCP timestamps are not overwritten and we have sane
+// RTT measurement.
+ +0 %{
+assert 5000 <= tcpi_rtt <= 20000, 'srtt=%d us' % tcpi_rtt
+}%
+
+// SCM_TSTAMP_SCHED for the first chunk should be received almost immediately
+// after write at t=10ms.
+ +0 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE|MSG_TRUNC,
+ msg_control=[
+ {cmsg_level=SOL_SOCKET,
+ cmsg_type=SCM_TIMESTAMPING,
+ cmsg_data={scm_sec=0,scm_nsec=10000000}},
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=ENOMSG,
+ ee_origin=SO_EE_ORIGIN_TIMESTAMPING,
+ ee_type=0,
+ ee_code=0,
+ ee_info=SCM_TSTAMP_SCHED,
+ ee_data=1999}}
+ ]}, MSG_ERRQUEUE) = 0
+// SCM_TSTAMP_SND for the first chunk should be received almost immediately
+// after write at t=10ms.
+ +0 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE|MSG_TRUNC,
+ msg_control=[
+ {cmsg_level=SOL_SOCKET,
+ cmsg_type=SCM_TIMESTAMPING,
+ cmsg_data={scm_sec=0,scm_nsec=10000000}},
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=ENOMSG,
+ ee_origin=SO_EE_ORIGIN_TIMESTAMPING,
+ ee_type=0,
+ ee_code=0,
+ ee_info=SCM_TSTAMP_SND,
+ ee_data=1999}}
+ ]}, MSG_ERRQUEUE) = 0
+// SCM_TSTAMP_SCHED for the second chunk should be received almost immediately
+// after that at t=10ms.
+ +0 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE|MSG_TRUNC,
+ msg_control=[
+ {cmsg_level=SOL_SOCKET,
+ cmsg_type=SCM_TIMESTAMPING,
+ cmsg_data={scm_sec=0,scm_nsec=10000000}},
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=ENOMSG,
+ ee_origin=SO_EE_ORIGIN_TIMESTAMPING,
+ ee_type=0,
+ ee_code=0,
+ ee_info=SCM_TSTAMP_SCHED,
+ ee_data=3999}}
+ ]}, MSG_ERRQUEUE) = 0
+// SCM_TSTAMP_SND for the second chunk should be received almost immediately
+// after that at t=10ms.
+ +0 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE|MSG_TRUNC,
+ msg_control=[
+ {cmsg_level=SOL_SOCKET,
+ cmsg_type=SCM_TIMESTAMPING,
+ cmsg_data={scm_sec=0,scm_nsec=10000000}},
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=ENOMSG,
+ ee_origin=SO_EE_ORIGIN_TIMESTAMPING,
+ ee_type=0,
+ ee_code=0,
+ ee_info=SCM_TSTAMP_SND,
+ ee_data=3999}}
+ ]}, MSG_ERRQUEUE) = 0
+// SCM_TSTAMP_ACK for the first chunk should be received at t=20ms.
+ +0 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE|MSG_TRUNC,
+ msg_control=[
+ {cmsg_level=SOL_SOCKET,
+ cmsg_type=SCM_TIMESTAMPING,
+ cmsg_data={scm_sec=0,scm_nsec=20000000}},
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=ENOMSG,
+ ee_origin=SO_EE_ORIGIN_TIMESTAMPING,
+ ee_type=0,
+ ee_code=0,
+ ee_info=SCM_TSTAMP_ACK,
+ ee_data=1999}}
+ ]}, MSG_ERRQUEUE) = 0
+// SCM_TSTAMP_ACK for the second chunk should be received at t=30ms.
+ +0 recvmsg(4, {msg_name(...)=...,
+ msg_iov(1)=[{...,0}],
+ msg_flags=MSG_ERRQUEUE|MSG_TRUNC,
+ msg_control=[
+ {cmsg_level=SOL_SOCKET,
+ cmsg_type=SCM_TIMESTAMPING,
+ cmsg_data={scm_sec=0,scm_nsec=30000000}},
+ {cmsg_level=CMSG_LEVEL_IP,
+ cmsg_type=CMSG_TYPE_RECVERR,
+ cmsg_data={ee_errno=ENOMSG,
+ ee_origin=SO_EE_ORIGIN_TIMESTAMPING,
+ ee_type=0,
+ ee_code=0,
+ ee_info=SCM_TSTAMP_ACK,
+ ee_data=3999}}
+ ]}, MSG_ERRQUEUE) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_ts_recent_fin_tsval.pkt b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_fin_tsval.pkt
new file mode 100644
index 000000000000..e61424a7bd0a
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_fin_tsval.pkt
@@ -0,0 +1,23 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test that we send FIN packet with correct TSval
+--tcp_ts_tick_usecs=1000
+--tolerance_usecs=7000
+
+`./defaults.sh`
+
+// Create a socket.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+// Establish a connection.
+ +0 < S 0:0(0) win 20000 <mss 1000,sackOK,TS val 100 ecr 0>
+ +0 > S. 0:0(0) ack 1 <mss 1460,sackOK,TS val 100 ecr 100>
+ +.1 < . 1:1(0) ack 1 win 20000 <nop,nop,TS val 200 ecr 100>
+ +0 accept(3, ..., ...) = 4
+
+ +1 close(4) = 0
+// Check that FIN TSval is updated properly, one second has passed since last sent packet.
+ +0 > F. 1:1(0) ack 1 <nop,nop,TS val 1200 ecr 200>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_ts_recent_invalid_ack.pkt b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_invalid_ack.pkt
new file mode 100644
index 000000000000..174ce9a1bfc0
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_invalid_ack.pkt
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test that we reject TS val updates on a packet with invalid ACK sequence
+
+`./defaults.sh
+`
+
+// Create a socket.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+// Establish a connection.
+ +.1 < S 0:0(0) win 20000 <mss 1000,sackOK,TS val 100 ecr 0>
+ +0 > S. 0:0(0) ack 1 <mss 1460,sackOK,TS val 100 ecr 100>
+ +.1 < . 1:1(0) ack 1 win 20000 <nop,nop,TS val 200 ecr 100>
+ +0 accept(3, ..., ...) = 4
+
+// bad packet with high tsval (its ACK sequence is above our sndnxt)
+ +0 < F. 1:1(0) ack 9999 win 20000 <nop,nop,TS val 200000 ecr 100>
+
+
+ +0 < . 1:1001(1000) ack 1 win 20000 <nop,nop,TS val 201 ecr 100>
+ +0 > . 1:1(0) ack 1001 <nop,nop,TS val 200 ecr 201>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_ts_recent_reset_tsval.pkt b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_reset_tsval.pkt
new file mode 100644
index 000000000000..2e3b3bb7493a
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_ts_recent_reset_tsval.pkt
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+// Test that we send RST packet with correct TSval
+--tcp_ts_tick_usecs=1000
+
+`./defaults.sh`
+
+// Create a socket.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+// Establish a connection.
+ +0 < S 0:0(0) win 20000 <mss 1000,sackOK,TS val 100 ecr 0>
+ +0 > S. 0:0(0) ack 1 <mss 1460,sackOK,TS val 100 ecr 100>
+ +.1 < . 1:1(0) ack 1 win 20000 <nop,nop,TS val 200 ecr 100>
+ +0 accept(3, ..., ...) = 4
+
+ +0 < . 1:1001(1000) ack 1 win 20000 <nop,nop,TS val 201 ecr 100>
+ +0 > . 1:1(0) ack 1001 <nop,nop,TS val 200 ecr 201>
+
+ +1 close(4) = 0
+// Check that RST TSval is updated properly, one second has passed since last sent packet.
+ +0 > R. 1:1(0) ack 1001 <nop,nop,TS val 1200 ecr 201>
diff --git a/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt
new file mode 100644
index 000000000000..183051ba0cae
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user-timeout-probe.pkt
@@ -0,0 +1,37 @@
+// SPDX-License-Identifier: GPL-2.0
+
+`./defaults.sh`
+
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+
+ +0 < S 0:0(0) win 0 <mss 1460>
+ +0 > S. 0:0(0) ack 1 <mss 1460>
+
+ +.1 < . 1:1(0) ack 1 win 65530
+ +0 accept(3, ..., ...) = 4
+
+ +0 setsockopt(4, SOL_TCP, TCP_USER_TIMEOUT, [3000], 4) = 0
+ +0 write(4, ..., 24) = 24
+ +0 > P. 1:25(24) ack 1
+ +.1 < . 1:1(0) ack 25 win 65530
+ +0 %{ assert tcpi_probes == 0, tcpi_probes; \
+ assert tcpi_backoff == 0, tcpi_backoff }%
+
+// install a qdisc dropping all packets
+ +0 `tc qdisc delete dev tun0 root 2>/dev/null ; tc qdisc add dev tun0 root pfifo limit 0`
+ +0 write(4, ..., 24) = 24
+ // When qdisc is congested we retry every 500ms
+ // (TCP_RESOURCE_PROBE_INTERVAL) and therefore
+ // we retry 6 times before hitting 3s timeout.
+ // First verify that the connection is alive:
++3.250 write(4, ..., 24) = 24
+ // Now verify that shortly after that the socket is dead:
+ +.100 write(4, ..., 24) = -1 ETIMEDOUT (Connection timed out)
+
+ +0 %{ assert tcpi_probes == 6, tcpi_probes; \
+ assert tcpi_backoff == 0, tcpi_backoff }%
+ +0 close(4) = 0
diff --git a/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user_timeout.pkt b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user_timeout.pkt
new file mode 100644
index 000000000000..2efe02bfba9c
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_user_timeout_user_timeout.pkt
@@ -0,0 +1,32 @@
+// SPDX-License-Identifier: GPL-2.0
+`./defaults.sh`
+
+// Initialize connection
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+ +0 < S 0:0(0) win 32792 <mss 1000,sackOK,nop,nop>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK>
+ +.1 < . 1:1(0) ack 1 win 32792
+
+
+ +0 accept(3, ..., ...) = 4
+
+// Okay, we received nothing, and decide to close this idle socket.
+// We set TCP_USER_TIMEOUT to 3 seconds because really it is not worth
+// trying hard to cleanly close this flow, at the price of keeping
+// a TCP structure in kernel for about 1 minute !
+ +2 setsockopt(4, SOL_TCP, TCP_USER_TIMEOUT, [3000], 4) = 0
+ +0 close(4) = 0
+
+ +0 > F. 1:1(0) ack 1
+ +.3~+.400 > F. 1:1(0) ack 1
+ +.3~+.400 > F. 1:1(0) ack 1
+ +.6~+.800 > F. 1:1(0) ack 1
+
+// We finally receive something from the peer, but it is way too late
+// Our socket vanished because TCP_USER_TIMEOUT was really small
+ +0 < . 1:2(1) ack 1 win 32792
+ +0 > R 1:1(0)
diff --git a/tools/testing/selftests/net/packetdrill/tcp_validate_validate-established-no-flags.pkt b/tools/testing/selftests/net/packetdrill/tcp_validate_validate-established-no-flags.pkt
new file mode 100644
index 000000000000..8bd60226ccfc
--- /dev/null
+++ b/tools/testing/selftests/net/packetdrill/tcp_validate_validate-established-no-flags.pkt
@@ -0,0 +1,24 @@
+// SPDX-License-Identifier: GPL-2.0
+// Verify that established connections drop a segment without the ACK flag set.
+
+`./defaults.sh`
+
+// Create a socket.
+ 0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3
+ +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0
+ +0 bind(3, ..., ...) = 0
+ +0 listen(3, 1) = 0
+
+// Establish a connection.
+ +0 < S 0:0(0) win 20000 <mss 1000,sackOK,nop,nop>
+ +0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,sackOK>
+ +.01 < . 1:1(0) ack 1 win 20000
+ +0 accept(3, ..., ...) = 4
+
+// Receive a segment with no flags set, verify that it's not enqueued.
+ +.01 < - 1:1001(1000) win 20000
+ +0 ioctl(4, SIOCINQ, [0]) = 0
+
+// Receive a segment with ACK flag set, verify that it is enqueued.
+ +.01 < . 1:1001(1000) ack 1 win 20000
+ +0 ioctl(4, SIOCINQ, [1000]) = 0
diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index 1a706d03bb6b..9a85f93c33d8 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -44,9 +44,11 @@ struct tls_crypto_info_keys {
};
static void tls_crypto_info_init(uint16_t tls_version, uint16_t cipher_type,
- struct tls_crypto_info_keys *tls12)
+ struct tls_crypto_info_keys *tls12,
+ char key_generation)
{
- memset(tls12, 0, sizeof(*tls12));
+ memset(tls12, key_generation, sizeof(*tls12));
+ memset(tls12, 0, sizeof(struct tls_crypto_info));
switch (cipher_type) {
case TLS_CIPHER_CHACHA20_POLY1305:
@@ -275,7 +277,7 @@ TEST_F(tls_basic, recseq_wrap)
if (self->notls)
SKIP(return, "no TLS support");
- tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_128, &tls12);
+ tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_128, &tls12, 0);
memset(&tls12.aes128.rec_seq, 0xff, sizeof(tls12.aes128.rec_seq));
ASSERT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0);
@@ -391,7 +393,7 @@ FIXTURE_SETUP(tls)
SKIP(return, "Unsupported cipher in FIPS mode");
tls_crypto_info_init(variant->tls_version, variant->cipher_type,
- &tls12);
+ &tls12, 0);
ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls);
@@ -1175,7 +1177,7 @@ TEST_F(tls, bidir)
struct tls_crypto_info_keys tls12;
tls_crypto_info_init(variant->tls_version, variant->cipher_type,
- &tls12);
+ &tls12, 0);
ret = setsockopt(self->fd, SOL_TLS, TLS_RX, &tls12,
tls12.len);
@@ -1614,7 +1616,7 @@ TEST_F(tls, getsockopt)
EXPECT_EQ(get.crypto_info.cipher_type, variant->cipher_type);
/* get the full crypto_info */
- tls_crypto_info_init(variant->tls_version, variant->cipher_type, &expect);
+ tls_crypto_info_init(variant->tls_version, variant->cipher_type, &expect, 0);
len = expect.len;
memrnd(&get, sizeof(get));
EXPECT_EQ(getsockopt(self->fd, SOL_TLS, TLS_TX, &get, &len), 0);
@@ -1668,6 +1670,464 @@ TEST_F(tls, recv_efault)
EXPECT_EQ(memcmp(rec2, recv_mem + 9, ret - 9), 0);
}
+#define TLS_RECORD_TYPE_HANDSHAKE 0x16
+/* key_update, length 1, update_not_requested */
+static const char key_update_msg[] = "\x18\x00\x00\x01\x00";
+static void tls_send_keyupdate(struct __test_metadata *_metadata, int fd)
+{
+ size_t len = sizeof(key_update_msg);
+
+ EXPECT_EQ(tls_send_cmsg(fd, TLS_RECORD_TYPE_HANDSHAKE,
+ (char *)key_update_msg, len, 0),
+ len);
+}
+
+static void tls_recv_keyupdate(struct __test_metadata *_metadata, int fd, int flags)
+{
+ char buf[100];
+
+ EXPECT_EQ(tls_recv_cmsg(_metadata, fd, TLS_RECORD_TYPE_HANDSHAKE, buf, sizeof(buf), flags),
+ sizeof(key_update_msg));
+ EXPECT_EQ(memcmp(buf, key_update_msg, sizeof(key_update_msg)), 0);
+}
+
+/* set the key to 0 then 1 for RX, immediately to 1 for TX */
+TEST_F(tls_basic, rekey_rx)
+{
+ struct tls_crypto_info_keys tls12_0, tls12_1;
+ char const *test_str = "test_message";
+ int send_len = strlen(test_str) + 1;
+ char buf[20];
+ int ret;
+
+ if (self->notls)
+ return;
+
+ tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128,
+ &tls12_0, 0);
+ tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128,
+ &tls12_1, 1);
+
+ ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_1, tls12_1.len);
+ ASSERT_EQ(ret, 0);
+
+ ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_0, tls12_0.len);
+ ASSERT_EQ(ret, 0);
+
+ ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_1, tls12_1.len);
+ EXPECT_EQ(ret, 0);
+
+ EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+ EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len);
+ EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
+}
+
+/* set the key to 0 then 1 for TX, immediately to 1 for RX */
+TEST_F(tls_basic, rekey_tx)
+{
+ struct tls_crypto_info_keys tls12_0, tls12_1;
+ char const *test_str = "test_message";
+ int send_len = strlen(test_str) + 1;
+ char buf[20];
+ int ret;
+
+ if (self->notls)
+ return;
+
+ tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128,
+ &tls12_0, 0);
+ tls_crypto_info_init(TLS_1_3_VERSION, TLS_CIPHER_AES_GCM_128,
+ &tls12_1, 1);
+
+ ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_0, tls12_0.len);
+ ASSERT_EQ(ret, 0);
+
+ ret = setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_1, tls12_1.len);
+ ASSERT_EQ(ret, 0);
+
+ ret = setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_1, tls12_1.len);
+ EXPECT_EQ(ret, 0);
+
+ EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+ EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len);
+ EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
+}
+
+TEST_F(tls, rekey)
+{
+ char const *test_str_1 = "test_message_before_rekey";
+ char const *test_str_2 = "test_message_after_rekey";
+ struct tls_crypto_info_keys tls12;
+ int send_len;
+ char buf[100];
+
+ if (variant->tls_version != TLS_1_3_VERSION)
+ return;
+
+ /* initial send/recv */
+ send_len = strlen(test_str_1) + 1;
+ EXPECT_EQ(send(self->fd, test_str_1, send_len, 0), send_len);
+ EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len);
+ EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0);
+
+ /* update TX key */
+ tls_send_keyupdate(_metadata, self->fd);
+ tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1);
+ EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0);
+
+ /* send after rekey */
+ send_len = strlen(test_str_2) + 1;
+ EXPECT_EQ(send(self->fd, test_str_2, send_len, 0), send_len);
+
+ /* can't receive the KeyUpdate without a control message */
+ EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1);
+
+ /* get KeyUpdate */
+ tls_recv_keyupdate(_metadata, self->cfd, 0);
+
+ /* recv blocking -> -EKEYEXPIRED */
+ EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), 0), -1);
+ EXPECT_EQ(errno, EKEYEXPIRED);
+
+ /* recv non-blocking -> -EKEYEXPIRED */
+ EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_DONTWAIT), -1);
+ EXPECT_EQ(errno, EKEYEXPIRED);
+
+ /* update RX key */
+ EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0);
+
+ /* recv after rekey */
+ EXPECT_NE(recv(self->cfd, buf, send_len, 0), -1);
+ EXPECT_EQ(memcmp(buf, test_str_2, send_len), 0);
+}
+
+TEST_F(tls, rekey_fail)
+{
+ char const *test_str_1 = "test_message_before_rekey";
+ char const *test_str_2 = "test_message_after_rekey";
+ struct tls_crypto_info_keys tls12;
+ int send_len;
+ char buf[100];
+
+ /* initial send/recv */
+ send_len = strlen(test_str_1) + 1;
+ EXPECT_EQ(send(self->fd, test_str_1, send_len, 0), send_len);
+ EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len);
+ EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0);
+
+ /* update TX key */
+ tls_send_keyupdate(_metadata, self->fd);
+
+ if (variant->tls_version != TLS_1_3_VERSION) {
+ /* just check that rekey is not supported and return */
+ tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1);
+ EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), -1);
+ EXPECT_EQ(errno, EBUSY);
+ return;
+ }
+
+ /* successful update */
+ tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1);
+ EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0);
+
+ /* invalid update: change of version */
+ tls_crypto_info_init(TLS_1_2_VERSION, variant->cipher_type, &tls12, 1);
+ EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ /* invalid update (RX socket): change of version */
+ tls_crypto_info_init(TLS_1_2_VERSION, variant->cipher_type, &tls12, 1);
+ EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ /* invalid update: change of cipher */
+ if (variant->cipher_type == TLS_CIPHER_AES_GCM_256)
+ tls_crypto_info_init(variant->tls_version, TLS_CIPHER_CHACHA20_POLY1305, &tls12, 1);
+ else
+ tls_crypto_info_init(variant->tls_version, TLS_CIPHER_AES_GCM_256, &tls12, 1);
+ EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ /* send after rekey, the invalid updates shouldn't have an effect */
+ send_len = strlen(test_str_2) + 1;
+ EXPECT_EQ(send(self->fd, test_str_2, send_len, 0), send_len);
+
+ /* can't receive the KeyUpdate without a control message */
+ EXPECT_EQ(recv(self->cfd, buf, send_len, 0), -1);
+
+ /* get KeyUpdate */
+ tls_recv_keyupdate(_metadata, self->cfd, 0);
+
+ /* recv blocking -> -EKEYEXPIRED */
+ EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), 0), -1);
+ EXPECT_EQ(errno, EKEYEXPIRED);
+
+ /* recv non-blocking -> -EKEYEXPIRED */
+ EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_DONTWAIT), -1);
+ EXPECT_EQ(errno, EKEYEXPIRED);
+
+ /* update RX key */
+ tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1);
+ EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0);
+
+ /* recv after rekey */
+ EXPECT_NE(recv(self->cfd, buf, send_len, 0), -1);
+ EXPECT_EQ(memcmp(buf, test_str_2, send_len), 0);
+}
+
+TEST_F(tls, rekey_peek)
+{
+ char const *test_str_1 = "test_message_before_rekey";
+ struct tls_crypto_info_keys tls12;
+ int send_len;
+ char buf[100];
+
+ if (variant->tls_version != TLS_1_3_VERSION)
+ return;
+
+ send_len = strlen(test_str_1) + 1;
+ EXPECT_EQ(send(self->fd, test_str_1, send_len, 0), send_len);
+
+ /* update TX key */
+ tls_send_keyupdate(_metadata, self->fd);
+ tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1);
+ EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0);
+
+ EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_PEEK), send_len);
+ EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0);
+
+ EXPECT_EQ(recv(self->cfd, buf, send_len, 0), send_len);
+ EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0);
+
+ /* can't receive the KeyUpdate without a control message */
+ EXPECT_EQ(recv(self->cfd, buf, send_len, MSG_PEEK), -1);
+
+ /* peek KeyUpdate */
+ tls_recv_keyupdate(_metadata, self->cfd, MSG_PEEK);
+
+ /* get KeyUpdate */
+ tls_recv_keyupdate(_metadata, self->cfd, 0);
+
+ /* update RX key */
+ EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0);
+}
+
+TEST_F(tls, splice_rekey)
+{
+ int send_len = TLS_PAYLOAD_MAX_LEN / 2;
+ char mem_send[TLS_PAYLOAD_MAX_LEN];
+ char mem_recv[TLS_PAYLOAD_MAX_LEN];
+ struct tls_crypto_info_keys tls12;
+ int p[2];
+
+ if (variant->tls_version != TLS_1_3_VERSION)
+ return;
+
+ memrnd(mem_send, sizeof(mem_send));
+
+ ASSERT_GE(pipe(p), 0);
+ EXPECT_EQ(send(self->fd, mem_send, send_len, 0), send_len);
+
+ /* update TX key */
+ tls_send_keyupdate(_metadata, self->fd);
+ tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1);
+ EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0);
+
+ EXPECT_EQ(send(self->fd, mem_send, send_len, 0), send_len);
+
+ EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), send_len);
+ EXPECT_EQ(read(p[0], mem_recv, send_len), send_len);
+ EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
+
+ /* can't splice the KeyUpdate */
+ EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), -1);
+ EXPECT_EQ(errno, EINVAL);
+
+ /* peek KeyUpdate */
+ tls_recv_keyupdate(_metadata, self->cfd, MSG_PEEK);
+
+ /* get KeyUpdate */
+ tls_recv_keyupdate(_metadata, self->cfd, 0);
+
+ /* can't splice before updating the key */
+ EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), -1);
+ EXPECT_EQ(errno, EKEYEXPIRED);
+
+ /* update RX key */
+ EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0);
+
+ EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), send_len);
+ EXPECT_EQ(read(p[0], mem_recv, send_len), send_len);
+ EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0);
+}
+
+TEST_F(tls, rekey_peek_splice)
+{
+ char const *test_str_1 = "test_message_before_rekey";
+ struct tls_crypto_info_keys tls12;
+ int send_len;
+ char buf[100];
+ char mem_recv[TLS_PAYLOAD_MAX_LEN];
+ int p[2];
+
+ if (variant->tls_version != TLS_1_3_VERSION)
+ return;
+
+ ASSERT_GE(pipe(p), 0);
+
+ send_len = strlen(test_str_1) + 1;
+ EXPECT_EQ(send(self->fd, test_str_1, send_len, 0), send_len);
+
+ /* update TX key */
+ tls_send_keyupdate(_metadata, self->fd);
+ tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1);
+ EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0);
+
+ EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_PEEK), send_len);
+ EXPECT_EQ(memcmp(buf, test_str_1, send_len), 0);
+
+ EXPECT_EQ(splice(self->cfd, NULL, p[1], NULL, TLS_PAYLOAD_MAX_LEN, 0), send_len);
+ EXPECT_EQ(read(p[0], mem_recv, send_len), send_len);
+ EXPECT_EQ(memcmp(mem_recv, test_str_1, send_len), 0);
+}
+
+TEST_F(tls, rekey_getsockopt)
+{
+ struct tls_crypto_info_keys tls12;
+ struct tls_crypto_info_keys tls12_get;
+ socklen_t len;
+
+ tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 0);
+
+ len = tls12.len;
+ EXPECT_EQ(getsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_get, &len), 0);
+ EXPECT_EQ(len, tls12.len);
+ EXPECT_EQ(memcmp(&tls12_get, &tls12, tls12.len), 0);
+
+ len = tls12.len;
+ EXPECT_EQ(getsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_get, &len), 0);
+ EXPECT_EQ(len, tls12.len);
+ EXPECT_EQ(memcmp(&tls12_get, &tls12, tls12.len), 0);
+
+ if (variant->tls_version != TLS_1_3_VERSION)
+ return;
+
+ tls_send_keyupdate(_metadata, self->fd);
+ tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1);
+ EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0);
+
+ tls_recv_keyupdate(_metadata, self->cfd, 0);
+ EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0);
+
+ len = tls12.len;
+ EXPECT_EQ(getsockopt(self->fd, SOL_TLS, TLS_TX, &tls12_get, &len), 0);
+ EXPECT_EQ(len, tls12.len);
+ EXPECT_EQ(memcmp(&tls12_get, &tls12, tls12.len), 0);
+
+ len = tls12.len;
+ EXPECT_EQ(getsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12_get, &len), 0);
+ EXPECT_EQ(len, tls12.len);
+ EXPECT_EQ(memcmp(&tls12_get, &tls12, tls12.len), 0);
+}
+
+TEST_F(tls, rekey_poll_pending)
+{
+ char const *test_str = "test_message_after_rekey";
+ struct tls_crypto_info_keys tls12;
+ struct pollfd pfd = { };
+ int send_len;
+ int ret;
+
+ if (variant->tls_version != TLS_1_3_VERSION)
+ return;
+
+ /* update TX key */
+ tls_send_keyupdate(_metadata, self->fd);
+ tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1);
+ EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0);
+
+ /* get KeyUpdate */
+ tls_recv_keyupdate(_metadata, self->cfd, 0);
+
+ /* send immediately after rekey */
+ send_len = strlen(test_str) + 1;
+ EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+
+ /* key hasn't been updated, expect cfd to be non-readable */
+ pfd.fd = self->cfd;
+ pfd.events = POLLIN;
+ EXPECT_EQ(poll(&pfd, 1, 0), 0);
+
+ ret = fork();
+ ASSERT_GE(ret, 0);
+
+ if (ret) {
+ int pid2, status;
+
+ /* wait before installing the new key */
+ sleep(1);
+
+ /* update RX key while poll() is sleeping */
+ EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0);
+
+ pid2 = wait(&status);
+ EXPECT_EQ(pid2, ret);
+ EXPECT_EQ(status, 0);
+ } else {
+ pfd.fd = self->cfd;
+ pfd.events = POLLIN;
+ EXPECT_EQ(poll(&pfd, 1, 5000), 1);
+
+ exit(!__test_passed(_metadata));
+ }
+}
+
+TEST_F(tls, rekey_poll_delay)
+{
+ char const *test_str = "test_message_after_rekey";
+ struct tls_crypto_info_keys tls12;
+ struct pollfd pfd = { };
+ int send_len;
+ int ret;
+
+ if (variant->tls_version != TLS_1_3_VERSION)
+ return;
+
+ /* update TX key */
+ tls_send_keyupdate(_metadata, self->fd);
+ tls_crypto_info_init(variant->tls_version, variant->cipher_type, &tls12, 1);
+ EXPECT_EQ(setsockopt(self->fd, SOL_TLS, TLS_TX, &tls12, tls12.len), 0);
+
+ /* get KeyUpdate */
+ tls_recv_keyupdate(_metadata, self->cfd, 0);
+
+ ret = fork();
+ ASSERT_GE(ret, 0);
+
+ if (ret) {
+ int pid2, status;
+
+ /* wait before installing the new key */
+ sleep(1);
+
+ /* update RX key while poll() is sleeping */
+ EXPECT_EQ(setsockopt(self->cfd, SOL_TLS, TLS_RX, &tls12, tls12.len), 0);
+
+ sleep(1);
+ send_len = strlen(test_str) + 1;
+ EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len);
+
+ pid2 = wait(&status);
+ EXPECT_EQ(pid2, ret);
+ EXPECT_EQ(status, 0);
+ } else {
+ pfd.fd = self->cfd;
+ pfd.events = POLLIN;
+ EXPECT_EQ(poll(&pfd, 1, 5000), 1);
+ exit(!__test_passed(_metadata));
+ }
+}
+
FIXTURE(tls_err)
{
int fd, cfd;
@@ -1696,7 +2156,7 @@ FIXTURE_SETUP(tls_err)
int ret;
tls_crypto_info_init(variant->tls_version, TLS_CIPHER_AES_GCM_128,
- &tls12);
+ &tls12, 0);
ulp_sock_pair(_metadata, &self->fd, &self->cfd, &self->notls);
ulp_sock_pair(_metadata, &self->fd2, &self->cfd2, &self->notls);
@@ -2118,7 +2578,7 @@ TEST(tls_v6ops) {
int sfd, ret, fd;
socklen_t len, len2;
- tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_128, &tls12);
+ tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_128, &tls12, 0);
addr.sin6_family = AF_INET6;
addr.sin6_addr = in6addr_any;
@@ -2177,7 +2637,7 @@ TEST(prequeue) {
len = sizeof(addr);
memrnd(buf, sizeof(buf));
- tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_256, &tls12);
+ tls_crypto_info_init(TLS_1_2_VERSION, TLS_CIPHER_AES_GCM_256, &tls12, 0);
addr.sin_family = AF_INET;
addr.sin_addr.s_addr = htonl(INADDR_ANY);
diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh
index 640bc43452fa..88fa1d53ba2b 100755
--- a/tools/testing/selftests/net/udpgso_bench.sh
+++ b/tools/testing/selftests/net/udpgso_bench.sh
@@ -92,6 +92,9 @@ run_udp() {
echo "udp"
run_in_netns ${args}
+ echo "udp sendmmsg"
+ run_in_netns ${args} -m
+
echo "udp gso"
run_in_netns ${args} -S 0
diff --git a/tools/testing/selftests/net/vlan_bridge_binding.sh b/tools/testing/selftests/net/vlan_bridge_binding.sh
new file mode 100755
index 000000000000..e7cb8c678bde
--- /dev/null
+++ b/tools/testing/selftests/net/vlan_bridge_binding.sh
@@ -0,0 +1,256 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+source lib.sh
+
+ALL_TESTS="
+ test_binding_on
+ test_binding_off
+ test_binding_toggle_on
+ test_binding_toggle_off
+ test_binding_toggle_on_when_upper_down
+ test_binding_toggle_off_when_upper_down
+ test_binding_toggle_on_when_lower_down
+ test_binding_toggle_off_when_lower_down
+"
+
+setup_prepare()
+{
+ local port
+
+ ip_link_add br up type bridge vlan_filtering 1
+
+ for port in d1 d2 d3; do
+ ip_link_add $port type veth peer name r$port
+ ip_link_set_up $port
+ ip_link_set_up r$port
+ ip_link_set_master $port br
+ done
+
+ bridge_vlan_add vid 11 dev br self
+ bridge_vlan_add vid 11 dev d1 master
+
+ bridge_vlan_add vid 12 dev br self
+ bridge_vlan_add vid 12 dev d2 master
+
+ bridge_vlan_add vid 13 dev br self
+ bridge_vlan_add vid 13 dev d1 master
+ bridge_vlan_add vid 13 dev d2 master
+
+ bridge_vlan_add vid 14 dev br self
+ bridge_vlan_add vid 14 dev d1 master
+ bridge_vlan_add vid 14 dev d2 master
+ bridge_vlan_add vid 14 dev d3 master
+}
+
+operstate_is()
+{
+ local dev=$1; shift
+ local expect=$1; shift
+
+ local operstate=$(ip -j link show $dev | jq -r .[].operstate)
+ if [[ $operstate == UP ]]; then
+ operstate=1
+ elif [[ $operstate == DOWN || $operstate == LOWERLAYERDOWN ]]; then
+ operstate=0
+ fi
+ echo -n $operstate
+ [[ $operstate == $expect ]]
+}
+
+check_operstate()
+{
+ local dev=$1; shift
+ local expect=$1; shift
+ local operstate
+
+ operstate=$(busywait 1000 \
+ operstate_is "$dev" "$expect")
+ check_err $? "Got operstate of $operstate, expected $expect"
+}
+
+add_one_vlan()
+{
+ local link=$1; shift
+ local id=$1; shift
+
+ ip_link_add $link.$id link $link type vlan id $id "$@"
+}
+
+add_vlans()
+{
+ add_one_vlan br 11 "$@"
+ add_one_vlan br 12 "$@"
+ add_one_vlan br 13 "$@"
+ add_one_vlan br 14 "$@"
+}
+
+set_vlans()
+{
+ ip link set dev br.11 "$@"
+ ip link set dev br.12 "$@"
+ ip link set dev br.13 "$@"
+ ip link set dev br.14 "$@"
+}
+
+down_netdevs()
+{
+ local dev
+
+ for dev in "$@"; do
+ ip_link_set_down $dev
+ done
+}
+
+check_operstates()
+{
+ local opst_11=$1; shift
+ local opst_12=$1; shift
+ local opst_13=$1; shift
+ local opst_14=$1; shift
+
+ check_operstate br.11 $opst_11
+ check_operstate br.12 $opst_12
+ check_operstate br.13 $opst_13
+ check_operstate br.14 $opst_14
+}
+
+do_test_binding()
+{
+ local inject=$1; shift
+ local what=$1; shift
+ local opsts_d1=$1; shift
+ local opsts_d2=$1; shift
+ local opsts_d12=$1; shift
+ local opsts_d123=$1; shift
+
+ RET=0
+
+ defer_scope_push
+ down_netdevs d1
+ $inject
+ check_operstates $opsts_d1
+ defer_scope_pop
+
+ defer_scope_push
+ down_netdevs d2
+ $inject
+ check_operstates $opsts_d2
+ defer_scope_pop
+
+ defer_scope_push
+ down_netdevs d1 d2
+ $inject
+ check_operstates $opsts_d12
+ defer_scope_pop
+
+ defer_scope_push
+ down_netdevs d1 d2 d3
+ $inject
+ check_operstates $opsts_d123
+ defer_scope_pop
+
+ log_test "Test bridge_binding $what"
+}
+
+do_test_binding_on()
+{
+ local inject=$1; shift
+ local what=$1; shift
+
+ do_test_binding "$inject" "$what" \
+ "0 1 1 1" \
+ "1 0 1 1" \
+ "0 0 0 1" \
+ "0 0 0 0"
+}
+
+do_test_binding_off()
+{
+ local inject=$1; shift
+ local what=$1; shift
+
+ do_test_binding "$inject" "$what" \
+ "1 1 1 1" \
+ "1 1 1 1" \
+ "1 1 1 1" \
+ "0 0 0 0"
+}
+
+test_binding_on()
+{
+ add_vlans bridge_binding on
+ set_vlans up
+ do_test_binding_on : "on"
+}
+
+test_binding_off()
+{
+ add_vlans bridge_binding off
+ set_vlans up
+ do_test_binding_off : "off"
+}
+
+test_binding_toggle_on()
+{
+ add_vlans bridge_binding off
+ set_vlans up
+ set_vlans type vlan bridge_binding on
+ do_test_binding_on : "off->on"
+}
+
+test_binding_toggle_off()
+{
+ add_vlans bridge_binding on
+ set_vlans up
+ set_vlans type vlan bridge_binding off
+ do_test_binding_off : "on->off"
+}
+
+dfr_set_binding_on()
+{
+ set_vlans type vlan bridge_binding on
+ defer set_vlans type vlan bridge_binding off
+}
+
+dfr_set_binding_off()
+{
+ set_vlans type vlan bridge_binding off
+ defer set_vlans type vlan bridge_binding on
+}
+
+test_binding_toggle_on_when_lower_down()
+{
+ add_vlans bridge_binding off
+ set_vlans up
+ do_test_binding_on dfr_set_binding_on "off->on when lower down"
+}
+
+test_binding_toggle_off_when_lower_down()
+{
+ add_vlans bridge_binding on
+ set_vlans up
+ do_test_binding_off dfr_set_binding_off "on->off when lower down"
+}
+
+test_binding_toggle_on_when_upper_down()
+{
+ add_vlans bridge_binding off
+ set_vlans type vlan bridge_binding on
+ set_vlans up
+ do_test_binding_on : "off->on when upper down"
+}
+
+test_binding_toggle_off_when_upper_down()
+{
+ add_vlans bridge_binding on
+ set_vlans type vlan bridge_binding off
+ set_vlans up
+ do_test_binding_off : "on->off when upper down"
+}
+
+trap defer_scopes_cleanup EXIT
+setup_prepare
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/ynl.mk b/tools/testing/selftests/net/ynl.mk
index d43afe243779..12e7cae251be 100644
--- a/tools/testing/selftests/net/ynl.mk
+++ b/tools/testing/selftests/net/ynl.mk
@@ -31,7 +31,8 @@ $(OUTPUT)/libynl.a: $(YNL_SPECS) $(OUTPUT)/.libynl-$(YNL_GENS_HASH).sig
$(Q)cp $(top_srcdir)/tools/net/ynl/libynl.a $(OUTPUT)/libynl.a
EXTRA_CLEAN += \
- $(top_srcdir)/tools/net/ynl/lib/__pycache__ \
+ $(top_srcdir)/tools/net/ynl/pyynl/__pycache__ \
+ $(top_srcdir)/tools/net/ynl/pyynl/lib/__pycache__ \
$(top_srcdir)/tools/net/ynl/lib/*.[ado] \
$(OUTPUT)/.libynl-*.sig \
$(OUTPUT)/libynl.a
diff --git a/tools/testing/selftests/pid_namespace/.gitignore b/tools/testing/selftests/pid_namespace/.gitignore
index 93ab9d7e5b7e..5118f0f3edf4 100644
--- a/tools/testing/selftests/pid_namespace/.gitignore
+++ b/tools/testing/selftests/pid_namespace/.gitignore
@@ -1 +1,2 @@
+pid_max
regression_enomem
diff --git a/tools/testing/selftests/pid_namespace/Makefile b/tools/testing/selftests/pid_namespace/Makefile
index 9286a1d22cd3..b972f55d07ae 100644
--- a/tools/testing/selftests/pid_namespace/Makefile
+++ b/tools/testing/selftests/pid_namespace/Makefile
@@ -1,7 +1,7 @@
# SPDX-License-Identifier: GPL-2.0
CFLAGS += -g $(KHDR_INCLUDES)
-TEST_GEN_PROGS = regression_enomem
+TEST_GEN_PROGS = regression_enomem pid_max
LOCAL_HDRS += $(selfdir)/pidfd/pidfd.h
diff --git a/tools/testing/selftests/pid_namespace/pid_max.c b/tools/testing/selftests/pid_namespace/pid_max.c
new file mode 100644
index 000000000000..51c414faabb0
--- /dev/null
+++ b/tools/testing/selftests/pid_namespace/pid_max.c
@@ -0,0 +1,358 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#define _GNU_SOURCE
+#include <assert.h>
+#include <errno.h>
+#include <fcntl.h>
+#include <linux/types.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syscall.h>
+#include <sys/wait.h>
+
+#include "../kselftest_harness.h"
+#include "../pidfd/pidfd.h"
+
+#define __STACK_SIZE (8 * 1024 * 1024)
+static pid_t do_clone(int (*fn)(void *), void *arg, int flags)
+{
+ char *stack;
+ pid_t ret;
+
+ stack = malloc(__STACK_SIZE);
+ if (!stack)
+ return -ENOMEM;
+
+#ifdef __ia64__
+ ret = __clone2(fn, stack, __STACK_SIZE, flags | SIGCHLD, arg);
+#else
+ ret = clone(fn, stack + __STACK_SIZE, flags | SIGCHLD, arg);
+#endif
+ free(stack);
+ return ret;
+}
+
+static int pid_max_cb(void *data)
+{
+ int fd, ret;
+ pid_t pid;
+
+ ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0);
+ if (ret) {
+ fprintf(stderr, "%m - Failed to make rootfs private mount\n");
+ return -1;
+ }
+
+ umount2("/proc", MNT_DETACH);
+
+ ret = mount("proc", "/proc", "proc", 0, NULL);
+ if (ret) {
+ fprintf(stderr, "%m - Failed to mount proc\n");
+ return -1;
+ }
+
+ fd = open("/proc/sys/kernel/pid_max", O_RDWR | O_CLOEXEC | O_NOCTTY);
+ if (fd < 0) {
+ fprintf(stderr, "%m - Failed to open pid_max\n");
+ return -1;
+ }
+
+ ret = write(fd, "500", sizeof("500") - 1);
+ if (ret < 0) {
+ fprintf(stderr, "%m - Failed to write pid_max\n");
+ return -1;
+ }
+
+ for (int i = 0; i < 501; i++) {
+ pid = fork();
+ if (pid == 0)
+ exit(EXIT_SUCCESS);
+ wait_for_pid(pid);
+ if (pid > 500) {
+ fprintf(stderr, "Managed to create pid number beyond limit\n");
+ return -1;
+ }
+ }
+
+ return 0;
+}
+
+static int pid_max_nested_inner(void *data)
+{
+ int fret = -1;
+ pid_t pids[2];
+ int fd, i, ret;
+
+ ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0);
+ if (ret) {
+ fprintf(stderr, "%m - Failed to make rootfs private mount\n");
+ return fret;
+ }
+
+ umount2("/proc", MNT_DETACH);
+
+ ret = mount("proc", "/proc", "proc", 0, NULL);
+ if (ret) {
+ fprintf(stderr, "%m - Failed to mount proc\n");
+ return fret;
+ }
+
+ fd = open("/proc/sys/kernel/pid_max", O_RDWR | O_CLOEXEC | O_NOCTTY);
+ if (fd < 0) {
+ fprintf(stderr, "%m - Failed to open pid_max\n");
+ return fret;
+ }
+
+ ret = write(fd, "500", sizeof("500") - 1);
+ close(fd);
+ if (ret < 0) {
+ fprintf(stderr, "%m - Failed to write pid_max\n");
+ return fret;
+ }
+
+ pids[0] = fork();
+ if (pids[0] < 0) {
+ fprintf(stderr, "Failed to create first new process\n");
+ return fret;
+ }
+
+ if (pids[0] == 0)
+ exit(EXIT_SUCCESS);
+
+ pids[1] = fork();
+ wait_for_pid(pids[0]);
+ if (pids[1] >= 0) {
+ if (pids[1] == 0)
+ exit(EXIT_SUCCESS);
+ wait_for_pid(pids[1]);
+
+ fprintf(stderr, "Managed to create process even though ancestor pid namespace had a limit\n");
+ return fret;
+ }
+
+ /* Now make sure that we wrap pids at 400. */
+ for (i = 0; i < 510; i++) {
+ pid_t pid;
+
+ pid = fork();
+ if (pid < 0)
+ return fret;
+
+ if (pid == 0)
+ exit(EXIT_SUCCESS);
+
+ wait_for_pid(pid);
+ if (pid >= 500) {
+ fprintf(stderr, "Managed to create process with pid %d beyond configured limit\n", pid);
+ return fret;
+ }
+ }
+
+ return 0;
+}
+
+static int pid_max_nested_outer(void *data)
+{
+ int fret = -1, nr_procs = 400;
+ pid_t pids[1000];
+ int fd, i, ret;
+ pid_t pid;
+
+ ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0);
+ if (ret) {
+ fprintf(stderr, "%m - Failed to make rootfs private mount\n");
+ return fret;
+ }
+
+ umount2("/proc", MNT_DETACH);
+
+ ret = mount("proc", "/proc", "proc", 0, NULL);
+ if (ret) {
+ fprintf(stderr, "%m - Failed to mount proc\n");
+ return fret;
+ }
+
+ fd = open("/proc/sys/kernel/pid_max", O_RDWR | O_CLOEXEC | O_NOCTTY);
+ if (fd < 0) {
+ fprintf(stderr, "%m - Failed to open pid_max\n");
+ return fret;
+ }
+
+ ret = write(fd, "400", sizeof("400") - 1);
+ close(fd);
+ if (ret < 0) {
+ fprintf(stderr, "%m - Failed to write pid_max\n");
+ return fret;
+ }
+
+ /*
+ * Create 397 processes. This leaves room for do_clone() (398) and
+ * one more 399. So creating another process needs to fail.
+ */
+ for (nr_procs = 0; nr_procs < 396; nr_procs++) {
+ pid = fork();
+ if (pid < 0)
+ goto reap;
+
+ if (pid == 0)
+ exit(EXIT_SUCCESS);
+
+ pids[nr_procs] = pid;
+ }
+
+ pid = do_clone(pid_max_nested_inner, NULL, CLONE_NEWPID | CLONE_NEWNS);
+ if (pid < 0) {
+ fprintf(stderr, "%m - Failed to clone nested pidns\n");
+ goto reap;
+ }
+
+ if (wait_for_pid(pid)) {
+ fprintf(stderr, "%m - Nested pid_max failed\n");
+ goto reap;
+ }
+
+ fret = 0;
+
+reap:
+ for (int i = 0; i < nr_procs; i++)
+ wait_for_pid(pids[i]);
+
+ return fret;
+}
+
+static int pid_max_nested_limit_inner(void *data)
+{
+ int fret = -1, nr_procs = 400;
+ int fd, ret;
+ pid_t pid;
+ pid_t pids[1000];
+
+ ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0);
+ if (ret) {
+ fprintf(stderr, "%m - Failed to make rootfs private mount\n");
+ return fret;
+ }
+
+ umount2("/proc", MNT_DETACH);
+
+ ret = mount("proc", "/proc", "proc", 0, NULL);
+ if (ret) {
+ fprintf(stderr, "%m - Failed to mount proc\n");
+ return fret;
+ }
+
+ fd = open("/proc/sys/kernel/pid_max", O_RDWR | O_CLOEXEC | O_NOCTTY);
+ if (fd < 0) {
+ fprintf(stderr, "%m - Failed to open pid_max\n");
+ return fret;
+ }
+
+ ret = write(fd, "500", sizeof("500") - 1);
+ close(fd);
+ if (ret < 0) {
+ fprintf(stderr, "%m - Failed to write pid_max\n");
+ return fret;
+ }
+
+ for (nr_procs = 0; nr_procs < 500; nr_procs++) {
+ pid = fork();
+ if (pid < 0)
+ break;
+
+ if (pid == 0)
+ exit(EXIT_SUCCESS);
+
+ pids[nr_procs] = pid;
+ }
+
+ if (nr_procs >= 400) {
+ fprintf(stderr, "Managed to create processes beyond the configured outer limit\n");
+ goto reap;
+ }
+
+ fret = 0;
+
+reap:
+ for (int i = 0; i < nr_procs; i++)
+ wait_for_pid(pids[i]);
+
+ return fret;
+}
+
+static int pid_max_nested_limit_outer(void *data)
+{
+ int fd, ret;
+ pid_t pid;
+
+ ret = mount("", "/", NULL, MS_PRIVATE | MS_REC, 0);
+ if (ret) {
+ fprintf(stderr, "%m - Failed to make rootfs private mount\n");
+ return -1;
+ }
+
+ umount2("/proc", MNT_DETACH);
+
+ ret = mount("proc", "/proc", "proc", 0, NULL);
+ if (ret) {
+ fprintf(stderr, "%m - Failed to mount proc\n");
+ return -1;
+ }
+
+ fd = open("/proc/sys/kernel/pid_max", O_RDWR | O_CLOEXEC | O_NOCTTY);
+ if (fd < 0) {
+ fprintf(stderr, "%m - Failed to open pid_max\n");
+ return -1;
+ }
+
+ ret = write(fd, "400", sizeof("400") - 1);
+ close(fd);
+ if (ret < 0) {
+ fprintf(stderr, "%m - Failed to write pid_max\n");
+ return -1;
+ }
+
+ pid = do_clone(pid_max_nested_limit_inner, NULL, CLONE_NEWPID | CLONE_NEWNS);
+ if (pid < 0) {
+ fprintf(stderr, "%m - Failed to clone nested pidns\n");
+ return -1;
+ }
+
+ if (wait_for_pid(pid)) {
+ fprintf(stderr, "%m - Nested pid_max failed\n");
+ return -1;
+ }
+
+ return 0;
+}
+
+TEST(pid_max_simple)
+{
+ pid_t pid;
+
+
+ pid = do_clone(pid_max_cb, NULL, CLONE_NEWPID | CLONE_NEWNS);
+ ASSERT_GT(pid, 0);
+ ASSERT_EQ(0, wait_for_pid(pid));
+}
+
+TEST(pid_max_nested_limit)
+{
+ pid_t pid;
+
+ pid = do_clone(pid_max_nested_limit_outer, NULL, CLONE_NEWPID | CLONE_NEWNS);
+ ASSERT_GT(pid, 0);
+ ASSERT_EQ(0, wait_for_pid(pid));
+}
+
+TEST(pid_max_nested)
+{
+ pid_t pid;
+
+ pid = do_clone(pid_max_nested_outer, NULL, CLONE_NEWPID | CLONE_NEWNS);
+ ASSERT_GT(pid, 0);
+ ASSERT_EQ(0, wait_for_pid(pid));
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/pidfd/.gitignore b/tools/testing/selftests/pidfd/.gitignore
index 973198a3ec3d..bf92481f925c 100644
--- a/tools/testing/selftests/pidfd/.gitignore
+++ b/tools/testing/selftests/pidfd/.gitignore
@@ -6,3 +6,5 @@ pidfd_wait
pidfd_fdinfo_test
pidfd_getfd_test
pidfd_setns_test
+pidfd_file_handle_test
+pidfd_bind_mount
diff --git a/tools/testing/selftests/pidfd/Makefile b/tools/testing/selftests/pidfd/Makefile
index d731e3e76d5b..301343a11b62 100644
--- a/tools/testing/selftests/pidfd/Makefile
+++ b/tools/testing/selftests/pidfd/Makefile
@@ -2,7 +2,8 @@
CFLAGS += -g $(KHDR_INCLUDES) -pthread -Wall
TEST_GEN_PROGS := pidfd_test pidfd_fdinfo_test pidfd_open_test \
- pidfd_poll_test pidfd_wait pidfd_getfd_test pidfd_setns_test
+ pidfd_poll_test pidfd_wait pidfd_getfd_test pidfd_setns_test \
+ pidfd_file_handle_test pidfd_bind_mount
include ../lib.mk
diff --git a/tools/testing/selftests/pidfd/pidfd.h b/tools/testing/selftests/pidfd/pidfd.h
index 88d6830ee004..0b96ac4b8ce5 100644
--- a/tools/testing/selftests/pidfd/pidfd.h
+++ b/tools/testing/selftests/pidfd/pidfd.h
@@ -12,11 +12,11 @@
#include <stdlib.h>
#include <string.h>
#include <syscall.h>
-#include <sys/mount.h>
#include <sys/types.h>
#include <sys/wait.h>
#include "../kselftest.h"
+#include "../clone3/clone3_selftests.h"
#ifndef P_PIDFD
#define P_PIDFD 3
@@ -68,6 +68,11 @@
#define PIDFD_SKIP 3
#define PIDFD_XFAIL 4
+static inline int sys_waitid(int which, pid_t pid, siginfo_t *info, int options)
+{
+ return syscall(__NR_waitid, which, pid, info, options, NULL);
+}
+
static inline int wait_for_pid(pid_t pid)
{
int status, ret;
@@ -114,4 +119,37 @@ static inline int sys_memfd_create(const char *name, unsigned int flags)
return syscall(__NR_memfd_create, name, flags);
}
+static inline pid_t create_child(int *pidfd, unsigned flags)
+{
+ struct __clone_args args = {
+ .flags = CLONE_PIDFD | flags,
+ .exit_signal = SIGCHLD,
+ .pidfd = ptr_to_u64(pidfd),
+ };
+
+ return sys_clone3(&args, sizeof(struct __clone_args));
+}
+
+static inline ssize_t read_nointr(int fd, void *buf, size_t count)
+{
+ ssize_t ret;
+
+ do {
+ ret = read(fd, buf, count);
+ } while (ret < 0 && errno == EINTR);
+
+ return ret;
+}
+
+static inline ssize_t write_nointr(int fd, const void *buf, size_t count)
+{
+ ssize_t ret;
+
+ do {
+ ret = write(fd, buf, count);
+ } while (ret < 0 && errno == EINTR);
+
+ return ret;
+}
+
#endif /* __PIDFD_H */
diff --git a/tools/testing/selftests/pidfd/pidfd_bind_mount.c b/tools/testing/selftests/pidfd/pidfd_bind_mount.c
new file mode 100644
index 000000000000..7822dd080258
--- /dev/null
+++ b/tools/testing/selftests/pidfd/pidfd_bind_mount.c
@@ -0,0 +1,188 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+// Copyright (c) 2024 Christian Brauner <brauner@kernel.org>
+
+#define _GNU_SOURCE
+#include <fcntl.h>
+#include <limits.h>
+#include <sched.h>
+#include <stdio.h>
+#include <string.h>
+#include <linux/fs.h>
+#include <sys/ioctl.h>
+#include <sys/stat.h>
+#include <sys/mount.h>
+#include <unistd.h>
+
+#include "pidfd.h"
+#include "../kselftest_harness.h"
+
+#ifndef __NR_open_tree
+ #if defined __alpha__
+ #define __NR_open_tree 538
+ #elif defined _MIPS_SIM
+ #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
+ #define __NR_open_tree 4428
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
+ #define __NR_open_tree 6428
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
+ #define __NR_open_tree 5428
+ #endif
+ #elif defined __ia64__
+ #define __NR_open_tree (428 + 1024)
+ #else
+ #define __NR_open_tree 428
+ #endif
+#endif
+
+#ifndef __NR_move_mount
+ #if defined __alpha__
+ #define __NR_move_mount 539
+ #elif defined _MIPS_SIM
+ #if _MIPS_SIM == _MIPS_SIM_ABI32 /* o32 */
+ #define __NR_move_mount 4429
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_NABI32 /* n32 */
+ #define __NR_move_mount 6429
+ #endif
+ #if _MIPS_SIM == _MIPS_SIM_ABI64 /* n64 */
+ #define __NR_move_mount 5429
+ #endif
+ #elif defined __ia64__
+ #define __NR_move_mount (428 + 1024)
+ #else
+ #define __NR_move_mount 429
+ #endif
+#endif
+
+#ifndef MOVE_MOUNT_F_EMPTY_PATH
+#define MOVE_MOUNT_F_EMPTY_PATH 0x00000004 /* Empty from path permitted */
+#endif
+
+#ifndef MOVE_MOUNT_F_EMPTY_PATH
+#define MOVE_MOUNT_T_EMPTY_PATH 0x00000040 /* Empty to path permitted */
+#endif
+
+static inline int sys_move_mount(int from_dfd, const char *from_pathname,
+ int to_dfd, const char *to_pathname,
+ unsigned int flags)
+{
+ return syscall(__NR_move_mount, from_dfd, from_pathname, to_dfd,
+ to_pathname, flags);
+}
+
+#ifndef OPEN_TREE_CLONE
+#define OPEN_TREE_CLONE 1
+#endif
+
+#ifndef OPEN_TREE_CLOEXEC
+#define OPEN_TREE_CLOEXEC O_CLOEXEC
+#endif
+
+#ifndef AT_RECURSIVE
+#define AT_RECURSIVE 0x8000 /* Apply to the entire subtree */
+#endif
+
+static inline int sys_open_tree(int dfd, const char *filename, unsigned int flags)
+{
+ return syscall(__NR_open_tree, dfd, filename, flags);
+}
+
+FIXTURE(pidfd_bind_mount) {
+ char template[PATH_MAX];
+ int fd_tmp;
+ int pidfd;
+ struct stat st1;
+ struct stat st2;
+ __u32 gen1;
+ __u32 gen2;
+ bool must_unmount;
+};
+
+FIXTURE_SETUP(pidfd_bind_mount)
+{
+ self->fd_tmp = -EBADF;
+ self->must_unmount = false;
+ ASSERT_EQ(unshare(CLONE_NEWNS), 0);
+ ASSERT_LE(snprintf(self->template, PATH_MAX, "%s", P_tmpdir "/pidfd_bind_mount_XXXXXX"), PATH_MAX);
+ self->fd_tmp = mkstemp(self->template);
+ ASSERT_GE(self->fd_tmp, 0);
+ self->pidfd = sys_pidfd_open(getpid(), 0);
+ ASSERT_GE(self->pidfd, 0);
+ ASSERT_GE(fstat(self->pidfd, &self->st1), 0);
+ ASSERT_EQ(ioctl(self->pidfd, FS_IOC_GETVERSION, &self->gen1), 0);
+}
+
+FIXTURE_TEARDOWN(pidfd_bind_mount)
+{
+ ASSERT_EQ(close(self->fd_tmp), 0);
+ if (self->must_unmount)
+ ASSERT_EQ(umount2(self->template, 0), 0);
+ ASSERT_EQ(unlink(self->template), 0);
+}
+
+/*
+ * Test that a detached mount can be created for a pidfd and then
+ * attached to the filesystem hierarchy.
+ */
+TEST_F(pidfd_bind_mount, bind_mount)
+{
+ int fd_tree;
+
+ fd_tree = sys_open_tree(self->pidfd, "", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC | AT_EMPTY_PATH);
+ ASSERT_GE(fd_tree, 0);
+
+ ASSERT_EQ(move_mount(fd_tree, "", self->fd_tmp, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0);
+ self->must_unmount = true;
+
+ ASSERT_EQ(close(fd_tree), 0);
+}
+
+/* Test that a pidfd can be reopened through procfs. */
+TEST_F(pidfd_bind_mount, reopen)
+{
+ int pidfd;
+ char proc_path[PATH_MAX];
+
+ sprintf(proc_path, "/proc/self/fd/%d", self->pidfd);
+ pidfd = open(proc_path, O_RDONLY | O_NOCTTY | O_CLOEXEC);
+ ASSERT_GE(pidfd, 0);
+
+ ASSERT_GE(fstat(self->pidfd, &self->st2), 0);
+ ASSERT_EQ(ioctl(self->pidfd, FS_IOC_GETVERSION, &self->gen2), 0);
+
+ ASSERT_TRUE(self->st1.st_dev == self->st2.st_dev && self->st1.st_ino == self->st2.st_ino);
+ ASSERT_TRUE(self->gen1 == self->gen2);
+
+ ASSERT_EQ(close(pidfd), 0);
+}
+
+/*
+ * Test that a detached mount can be created for a pidfd and then
+ * attached to the filesystem hierarchy and reopened.
+ */
+TEST_F(pidfd_bind_mount, bind_mount_reopen)
+{
+ int fd_tree, fd_pidfd_mnt;
+
+ fd_tree = sys_open_tree(self->pidfd, "", OPEN_TREE_CLONE | OPEN_TREE_CLOEXEC | AT_EMPTY_PATH);
+ ASSERT_GE(fd_tree, 0);
+
+ ASSERT_EQ(move_mount(fd_tree, "", self->fd_tmp, "", MOVE_MOUNT_F_EMPTY_PATH | MOVE_MOUNT_T_EMPTY_PATH), 0);
+ self->must_unmount = true;
+
+ fd_pidfd_mnt = openat(-EBADF, self->template, O_RDONLY | O_NOCTTY | O_CLOEXEC);
+ ASSERT_GE(fd_pidfd_mnt, 0);
+
+ ASSERT_GE(fstat(fd_tree, &self->st2), 0);
+ ASSERT_EQ(ioctl(fd_pidfd_mnt, FS_IOC_GETVERSION, &self->gen2), 0);
+
+ ASSERT_TRUE(self->st1.st_dev == self->st2.st_dev && self->st1.st_ino == self->st2.st_ino);
+ ASSERT_TRUE(self->gen1 == self->gen2);
+
+ ASSERT_EQ(close(fd_tree), 0);
+ ASSERT_EQ(close(fd_pidfd_mnt), 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/pidfd/pidfd_file_handle_test.c b/tools/testing/selftests/pidfd/pidfd_file_handle_test.c
new file mode 100644
index 000000000000..439b9c6c0457
--- /dev/null
+++ b/tools/testing/selftests/pidfd/pidfd_file_handle_test.c
@@ -0,0 +1,503 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+#include <errno.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/types.h>
+#include <poll.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <syscall.h>
+#include <sys/prctl.h>
+#include <sys/wait.h>
+#include <unistd.h>
+#include <sys/socket.h>
+#include <linux/kcmp.h>
+#include <sys/stat.h>
+
+#include "pidfd.h"
+#include "../kselftest_harness.h"
+
+FIXTURE(file_handle)
+{
+ pid_t pid;
+ int pidfd;
+
+ pid_t child_pid1;
+ int child_pidfd1;
+
+ pid_t child_pid2;
+ int child_pidfd2;
+
+ pid_t child_pid3;
+ int child_pidfd3;
+};
+
+FIXTURE_SETUP(file_handle)
+{
+ int ret;
+ int ipc_sockets[2];
+ char c;
+
+ self->pid = getpid();
+ self->pidfd = sys_pidfd_open(self->pid, 0);
+ ASSERT_GE(self->pidfd, 0);
+
+ ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ EXPECT_EQ(ret, 0);
+
+ self->child_pid1 = create_child(&self->child_pidfd1, CLONE_NEWUSER);
+ EXPECT_GE(self->child_pid1, 0);
+
+ if (self->child_pid1 == 0) {
+ close(ipc_sockets[0]);
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0)
+ _exit(EXIT_FAILURE);
+
+ close(ipc_sockets[1]);
+
+ pause();
+ _exit(EXIT_SUCCESS);
+ }
+
+ close(ipc_sockets[1]);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ close(ipc_sockets[0]);
+
+ ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ EXPECT_EQ(ret, 0);
+
+ self->child_pid2 = create_child(&self->child_pidfd2, CLONE_NEWUSER | CLONE_NEWPID);
+ EXPECT_GE(self->child_pid2, 0);
+
+ if (self->child_pid2 == 0) {
+ close(ipc_sockets[0]);
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0)
+ _exit(EXIT_FAILURE);
+
+ close(ipc_sockets[1]);
+
+ pause();
+ _exit(EXIT_SUCCESS);
+ }
+
+ close(ipc_sockets[1]);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ close(ipc_sockets[0]);
+
+ ret = socketpair(AF_LOCAL, SOCK_STREAM | SOCK_CLOEXEC, 0, ipc_sockets);
+ EXPECT_EQ(ret, 0);
+
+ self->child_pid3 = create_child(&self->child_pidfd3, CLONE_NEWUSER | CLONE_NEWPID);
+ EXPECT_GE(self->child_pid3, 0);
+
+ if (self->child_pid3 == 0) {
+ close(ipc_sockets[0]);
+
+ if (write_nointr(ipc_sockets[1], "1", 1) < 0)
+ _exit(EXIT_FAILURE);
+
+ close(ipc_sockets[1]);
+
+ pause();
+ _exit(EXIT_SUCCESS);
+ }
+
+ close(ipc_sockets[1]);
+ ASSERT_EQ(read_nointr(ipc_sockets[0], &c, 1), 1);
+ close(ipc_sockets[0]);
+}
+
+FIXTURE_TEARDOWN(file_handle)
+{
+ EXPECT_EQ(close(self->pidfd), 0);
+
+ EXPECT_EQ(sys_pidfd_send_signal(self->child_pidfd1, SIGKILL, NULL, 0), 0);
+ if (self->child_pidfd1 >= 0)
+ EXPECT_EQ(0, close(self->child_pidfd1));
+
+ EXPECT_EQ(sys_waitid(P_PID, self->child_pid1, NULL, WEXITED), 0);
+
+ EXPECT_EQ(sys_pidfd_send_signal(self->child_pidfd2, SIGKILL, NULL, 0), 0);
+ if (self->child_pidfd2 >= 0)
+ EXPECT_EQ(0, close(self->child_pidfd2));
+
+ EXPECT_EQ(sys_waitid(P_PID, self->child_pid2, NULL, WEXITED), 0);
+
+ if (self->child_pidfd3 >= 0) {
+ EXPECT_EQ(sys_pidfd_send_signal(self->child_pidfd3, SIGKILL, NULL, 0), 0);
+ EXPECT_EQ(0, close(self->child_pidfd3));
+ EXPECT_EQ(sys_waitid(P_PID, self->child_pid3, NULL, WEXITED), 0);
+ }
+}
+
+/*
+ * Test that we can decode a pidfs file handle in the same pid
+ * namespace.
+ */
+TEST_F(file_handle, file_handle_same_pidns)
+{
+ int mnt_id;
+ struct file_handle *fh;
+ int pidfd = -EBADF;
+ struct stat st1, st2;
+
+ fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ);
+ ASSERT_NE(fh, NULL);
+ memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ);
+ fh->handle_bytes = MAX_HANDLE_SZ;
+
+ ASSERT_EQ(name_to_handle_at(self->child_pidfd1, "", fh, &mnt_id, AT_EMPTY_PATH), 0);
+
+ ASSERT_EQ(fstat(self->child_pidfd1, &st1), 0);
+
+ pidfd = open_by_handle_at(self->pidfd, fh, 0);
+ ASSERT_GE(pidfd, 0);
+
+ ASSERT_EQ(fstat(pidfd, &st2), 0);
+ ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino);
+
+ ASSERT_EQ(close(pidfd), 0);
+
+ pidfd = open_by_handle_at(self->pidfd, fh, O_CLOEXEC);
+ ASSERT_GE(pidfd, 0);
+
+ ASSERT_EQ(fstat(pidfd, &st2), 0);
+ ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino);
+
+ ASSERT_EQ(close(pidfd), 0);
+
+ pidfd = open_by_handle_at(self->pidfd, fh, O_NONBLOCK);
+ ASSERT_GE(pidfd, 0);
+
+ ASSERT_EQ(fstat(pidfd, &st2), 0);
+ ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino);
+
+ ASSERT_EQ(close(pidfd), 0);
+
+ free(fh);
+}
+
+/*
+ * Test that we can decode a pidfs file handle from a child pid
+ * namespace.
+ */
+TEST_F(file_handle, file_handle_child_pidns)
+{
+ int mnt_id;
+ struct file_handle *fh;
+ int pidfd = -EBADF;
+ struct stat st1, st2;
+
+ fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ);
+ ASSERT_NE(fh, NULL);
+ memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ);
+ fh->handle_bytes = MAX_HANDLE_SZ;
+
+ ASSERT_EQ(name_to_handle_at(self->child_pidfd2, "", fh, &mnt_id, AT_EMPTY_PATH), 0);
+
+ ASSERT_EQ(fstat(self->child_pidfd2, &st1), 0);
+
+ pidfd = open_by_handle_at(self->pidfd, fh, 0);
+ ASSERT_GE(pidfd, 0);
+
+ ASSERT_EQ(fstat(pidfd, &st2), 0);
+ ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino);
+
+ ASSERT_EQ(close(pidfd), 0);
+
+ pidfd = open_by_handle_at(self->pidfd, fh, O_CLOEXEC);
+ ASSERT_GE(pidfd, 0);
+
+ ASSERT_EQ(fstat(pidfd, &st2), 0);
+ ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino);
+
+ ASSERT_EQ(close(pidfd), 0);
+
+ pidfd = open_by_handle_at(self->pidfd, fh, O_NONBLOCK);
+ ASSERT_GE(pidfd, 0);
+
+ ASSERT_EQ(fstat(pidfd, &st2), 0);
+ ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino);
+
+ ASSERT_EQ(close(pidfd), 0);
+
+ free(fh);
+}
+
+/*
+ * Test that we fail to decode a pidfs file handle from an ancestor
+ * child pid namespace.
+ */
+TEST_F(file_handle, file_handle_foreign_pidns)
+{
+ int mnt_id;
+ struct file_handle *fh;
+ pid_t pid;
+
+ fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ);
+ ASSERT_NE(fh, NULL);
+ memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ);
+ fh->handle_bytes = MAX_HANDLE_SZ;
+
+ ASSERT_EQ(name_to_handle_at(self->pidfd, "", fh, &mnt_id, AT_EMPTY_PATH), 0);
+
+ ASSERT_EQ(setns(self->child_pidfd2, CLONE_NEWUSER | CLONE_NEWPID), 0);
+
+ pid = fork();
+ ASSERT_GE(pid, 0);
+
+ if (pid == 0) {
+ int pidfd = open_by_handle_at(self->pidfd, fh, 0);
+ if (pidfd >= 0) {
+ TH_LOG("Managed to open pidfd outside of the caller's pid namespace hierarchy");
+ _exit(1);
+ }
+ _exit(0);
+ }
+
+ ASSERT_EQ(wait_for_pid(pid), 0);
+
+ free(fh);
+}
+
+/*
+ * Test that we can decode a pidfs file handle of a process that has
+ * exited but not been reaped.
+ */
+TEST_F(file_handle, pid_has_exited)
+{
+ int mnt_id, pidfd, child_pidfd3;
+ struct file_handle *fh;
+ struct stat st1, st2;
+
+ fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ);
+ ASSERT_NE(fh, NULL);
+ memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ);
+ fh->handle_bytes = MAX_HANDLE_SZ;
+
+ ASSERT_EQ(name_to_handle_at(self->child_pidfd3, "", fh, &mnt_id, AT_EMPTY_PATH), 0);
+
+ ASSERT_EQ(fstat(self->child_pidfd3, &st1), 0);
+
+ pidfd = open_by_handle_at(self->pidfd, fh, 0);
+ ASSERT_GE(pidfd, 0);
+
+ ASSERT_EQ(fstat(pidfd, &st2), 0);
+ ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino);
+
+ ASSERT_EQ(close(pidfd), 0);
+
+ child_pidfd3 = self->child_pidfd3;
+ self->child_pidfd3 = -EBADF;
+ EXPECT_EQ(sys_pidfd_send_signal(child_pidfd3, SIGKILL, NULL, 0), 0);
+ EXPECT_EQ(close(child_pidfd3), 0);
+ EXPECT_EQ(sys_waitid(P_PID, self->child_pid3, NULL, WEXITED | WNOWAIT), 0);
+
+ pidfd = open_by_handle_at(self->pidfd, fh, 0);
+ ASSERT_GE(pidfd, 0);
+
+ EXPECT_EQ(sys_waitid(P_PID, self->child_pid3, NULL, WEXITED), 0);
+}
+
+/*
+ * Test that we fail to decode a pidfs file handle of a process that has
+ * already been reaped.
+ */
+TEST_F(file_handle, pid_has_been_reaped)
+{
+ int mnt_id, pidfd, child_pidfd3;
+ struct file_handle *fh;
+ struct stat st1, st2;
+
+ fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ);
+ ASSERT_NE(fh, NULL);
+ memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ);
+ fh->handle_bytes = MAX_HANDLE_SZ;
+
+ ASSERT_EQ(name_to_handle_at(self->child_pidfd3, "", fh, &mnt_id, AT_EMPTY_PATH), 0);
+
+ ASSERT_EQ(fstat(self->child_pidfd3, &st1), 0);
+
+ pidfd = open_by_handle_at(self->pidfd, fh, 0);
+ ASSERT_GE(pidfd, 0);
+
+ ASSERT_EQ(fstat(pidfd, &st2), 0);
+ ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino);
+
+ ASSERT_EQ(close(pidfd), 0);
+
+ child_pidfd3 = self->child_pidfd3;
+ self->child_pidfd3 = -EBADF;
+ EXPECT_EQ(sys_pidfd_send_signal(child_pidfd3, SIGKILL, NULL, 0), 0);
+ EXPECT_EQ(close(child_pidfd3), 0);
+ EXPECT_EQ(sys_waitid(P_PID, self->child_pid3, NULL, WEXITED), 0);
+
+ pidfd = open_by_handle_at(self->pidfd, fh, 0);
+ ASSERT_LT(pidfd, 0);
+}
+
+/*
+ * Test valid flags to open a pidfd file handle. Note, that
+ * PIDFD_NONBLOCK is defined as O_NONBLOCK and O_NONBLOCK is an alias to
+ * O_NDELAY. Also note that PIDFD_THREAD is an alias for O_EXCL.
+ */
+TEST_F(file_handle, open_by_handle_at_valid_flags)
+{
+ int mnt_id;
+ struct file_handle *fh;
+ int pidfd = -EBADF;
+ struct stat st1, st2;
+
+ fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ);
+ ASSERT_NE(fh, NULL);
+ memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ);
+ fh->handle_bytes = MAX_HANDLE_SZ;
+
+ ASSERT_EQ(name_to_handle_at(self->child_pidfd2, "", fh, &mnt_id, AT_EMPTY_PATH), 0);
+
+ ASSERT_EQ(fstat(self->child_pidfd2, &st1), 0);
+
+ pidfd = open_by_handle_at(self->pidfd, fh,
+ O_RDONLY |
+ O_WRONLY |
+ O_RDWR |
+ O_NONBLOCK |
+ O_NDELAY |
+ O_CLOEXEC |
+ O_EXCL);
+ ASSERT_GE(pidfd, 0);
+
+ ASSERT_EQ(fstat(pidfd, &st2), 0);
+ ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino);
+
+ ASSERT_EQ(close(pidfd), 0);
+}
+
+/*
+ * Test that invalid flags passed to open a pidfd file handle are
+ * rejected.
+ */
+TEST_F(file_handle, open_by_handle_at_invalid_flags)
+{
+ int mnt_id;
+ struct file_handle *fh;
+ int pidfd = -EBADF;
+ static const struct invalid_pidfs_file_handle_flags {
+ int oflag;
+ const char *oflag_name;
+ } invalid_pidfs_file_handle_flags[] = {
+ { FASYNC, "FASYNC" },
+ { O_CREAT, "O_CREAT" },
+ { O_NOCTTY, "O_NOCTTY" },
+ { O_CREAT, "O_CREAT" },
+ { O_TRUNC, "O_TRUNC" },
+ { O_APPEND, "O_APPEND" },
+ { O_SYNC, "O_SYNC" },
+ { O_DSYNC, "O_DSYNC" },
+ { O_DIRECT, "O_DIRECT" },
+ { O_DIRECTORY, "O_DIRECTORY" },
+ { O_NOFOLLOW, "O_NOFOLLOW" },
+ { O_NOATIME, "O_NOATIME" },
+ { O_PATH, "O_PATH" },
+ { O_TMPFILE, "O_TMPFILE" },
+ /*
+ * O_LARGEFILE is added implicitly by
+ * open_by_handle_at() so pidfs simply masks it off.
+ */
+ };
+
+ fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ);
+ ASSERT_NE(fh, NULL);
+ memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ);
+ fh->handle_bytes = MAX_HANDLE_SZ;
+
+ ASSERT_EQ(name_to_handle_at(self->child_pidfd2, "", fh, &mnt_id, AT_EMPTY_PATH), 0);
+
+ for (int i = 0; i < ARRAY_SIZE(invalid_pidfs_file_handle_flags); i++) {
+ pidfd = open_by_handle_at(self->pidfd, fh, invalid_pidfs_file_handle_flags[i].oflag);
+ ASSERT_LT(pidfd, 0) {
+ TH_LOG("open_by_handle_at() succeeded with invalid flags: %s", invalid_pidfs_file_handle_flags[i].oflag_name);
+ }
+ }
+}
+
+/* Test that lookup fails. */
+TEST_F(file_handle, lookup_must_fail)
+{
+ int mnt_id;
+ struct file_handle *fh;
+
+ fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ);
+ ASSERT_NE(fh, NULL);
+ memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ);
+ fh->handle_bytes = MAX_HANDLE_SZ;
+
+ ASSERT_NE(name_to_handle_at(self->child_pidfd2, "lookup-is-not-possible-with-pidfs", fh, &mnt_id, AT_EMPTY_PATH), 0);
+ ASSERT_EQ(errno, ENOTDIR);
+ ASSERT_NE(name_to_handle_at(self->child_pidfd2, "lookup-is-not-possible-with-pidfs", fh, &mnt_id, 0), 0);
+ ASSERT_EQ(errno, ENOTDIR);
+}
+
+#ifndef AT_HANDLE_CONNECTABLE
+#define AT_HANDLE_CONNECTABLE 0x002
+#endif
+
+/*
+ * Test that AT_HANDLE_CONNECTABLE is rejected. Connectable file handles
+ * don't make sense for pidfs. Note that currently AT_HANDLE_CONNECTABLE
+ * is rejected because it is incompatible with AT_EMPTY_PATH which is
+ * required with pidfds as we don't support lookup.
+ */
+TEST_F(file_handle, invalid_name_to_handle_at_flags)
+{
+ int mnt_id;
+ struct file_handle *fh;
+
+ fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ);
+ ASSERT_NE(fh, NULL);
+ memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ);
+ fh->handle_bytes = MAX_HANDLE_SZ;
+
+ ASSERT_NE(name_to_handle_at(self->child_pidfd2, "", fh, &mnt_id, AT_EMPTY_PATH | AT_HANDLE_CONNECTABLE), 0);
+}
+
+#ifndef AT_HANDLE_FID
+#define AT_HANDLE_FID 0x200
+#endif
+
+/*
+ * Test that a request with AT_HANDLE_FID always leads to decodable file
+ * handle as pidfs always provides export operations.
+ */
+TEST_F(file_handle, valid_name_to_handle_at_flags)
+{
+ int mnt_id, pidfd;
+ struct file_handle *fh;
+ struct stat st1, st2;
+
+ fh = malloc(sizeof(struct file_handle) + MAX_HANDLE_SZ);
+ ASSERT_NE(fh, NULL);
+ memset(fh, 0, sizeof(struct file_handle) + MAX_HANDLE_SZ);
+ fh->handle_bytes = MAX_HANDLE_SZ;
+
+ ASSERT_EQ(name_to_handle_at(self->child_pidfd2, "", fh, &mnt_id, AT_EMPTY_PATH | AT_HANDLE_FID), 0);
+
+ ASSERT_EQ(fstat(self->child_pidfd2, &st1), 0);
+
+ pidfd = open_by_handle_at(self->pidfd, fh, 0);
+ ASSERT_GE(pidfd, 0);
+
+ ASSERT_EQ(fstat(pidfd, &st2), 0);
+ ASSERT_TRUE(st1.st_dev == st2.st_dev && st1.st_ino == st2.st_ino);
+
+ ASSERT_EQ(close(pidfd), 0);
+}
+
+TEST_HARNESS_MAIN
diff --git a/tools/testing/selftests/pidfd/pidfd_setns_test.c b/tools/testing/selftests/pidfd/pidfd_setns_test.c
index 7c2a4349170a..222f8131283b 100644
--- a/tools/testing/selftests/pidfd/pidfd_setns_test.c
+++ b/tools/testing/selftests/pidfd/pidfd_setns_test.c
@@ -19,7 +19,6 @@
#include <linux/ioctl.h>
#include "pidfd.h"
-#include "../clone3/clone3_selftests.h"
#include "../kselftest_harness.h"
#ifndef PIDFS_IOCTL_MAGIC
@@ -118,22 +117,6 @@ FIXTURE(current_nsset)
int child_pidfd_derived_nsfds2[PIDFD_NS_MAX];
};
-static int sys_waitid(int which, pid_t pid, int options)
-{
- return syscall(__NR_waitid, which, pid, NULL, options, NULL);
-}
-
-pid_t create_child(int *pidfd, unsigned flags)
-{
- struct __clone_args args = {
- .flags = CLONE_PIDFD | flags,
- .exit_signal = SIGCHLD,
- .pidfd = ptr_to_u64(pidfd),
- };
-
- return sys_clone3(&args, sizeof(struct clone_args));
-}
-
static bool switch_timens(void)
{
int fd, ret;
@@ -150,28 +133,6 @@ static bool switch_timens(void)
return ret == 0;
}
-static ssize_t read_nointr(int fd, void *buf, size_t count)
-{
- ssize_t ret;
-
- do {
- ret = read(fd, buf, count);
- } while (ret < 0 && errno == EINTR);
-
- return ret;
-}
-
-static ssize_t write_nointr(int fd, const void *buf, size_t count)
-{
- ssize_t ret;
-
- do {
- ret = write(fd, buf, count);
- } while (ret < 0 && errno == EINTR);
-
- return ret;
-}
-
FIXTURE_SETUP(current_nsset)
{
int i, proc_fd, ret;
@@ -229,7 +190,7 @@ FIXTURE_SETUP(current_nsset)
_exit(EXIT_SUCCESS);
}
- ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED | WNOWAIT), 0);
+ ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, NULL, WEXITED | WNOWAIT), 0);
self->pidfd = sys_pidfd_open(self->pid, 0);
EXPECT_GE(self->pidfd, 0) {
@@ -432,9 +393,9 @@ FIXTURE_TEARDOWN(current_nsset)
EXPECT_EQ(0, close(self->child_pidfd1));
if (self->child_pidfd2 >= 0)
EXPECT_EQ(0, close(self->child_pidfd2));
- ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, WEXITED), 0);
- ASSERT_EQ(sys_waitid(P_PID, self->child_pid1, WEXITED), 0);
- ASSERT_EQ(sys_waitid(P_PID, self->child_pid2, WEXITED), 0);
+ ASSERT_EQ(sys_waitid(P_PID, self->child_pid_exited, NULL, WEXITED), 0);
+ ASSERT_EQ(sys_waitid(P_PID, self->child_pid1, NULL, WEXITED), 0);
+ ASSERT_EQ(sys_waitid(P_PID, self->child_pid2, NULL, WEXITED), 0);
}
static int preserve_ns(const int pid, const char *ns)
diff --git a/tools/testing/selftests/pidfd/pidfd_wait.c b/tools/testing/selftests/pidfd/pidfd_wait.c
index 0dcb8365ddc3..1e2d49751cde 100644
--- a/tools/testing/selftests/pidfd/pidfd_wait.c
+++ b/tools/testing/selftests/pidfd/pidfd_wait.c
@@ -26,22 +26,11 @@
#define SKIP(s, ...) XFAIL(s, ##__VA_ARGS__)
#endif
-static pid_t sys_clone3(struct clone_args *args)
-{
- return syscall(__NR_clone3, args, sizeof(struct clone_args));
-}
-
-static int sys_waitid(int which, pid_t pid, siginfo_t *info, int options,
- struct rusage *ru)
-{
- return syscall(__NR_waitid, which, pid, info, options, ru);
-}
-
TEST(wait_simple)
{
int pidfd = -1;
pid_t parent_tid = -1;
- struct clone_args args = {
+ struct __clone_args args = {
.parent_tid = ptr_to_u64(&parent_tid),
.pidfd = ptr_to_u64(&pidfd),
.flags = CLONE_PIDFD | CLONE_PARENT_SETTID,
@@ -55,7 +44,7 @@ TEST(wait_simple)
pidfd = open("/proc/self", O_DIRECTORY | O_RDONLY | O_CLOEXEC);
ASSERT_GE(pidfd, 0);
- pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL);
+ pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED);
ASSERT_NE(pid, 0);
EXPECT_EQ(close(pidfd), 0);
pidfd = -1;
@@ -63,18 +52,18 @@ TEST(wait_simple)
pidfd = open("/dev/null", O_RDONLY | O_CLOEXEC);
ASSERT_GE(pidfd, 0);
- pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL);
+ pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED);
ASSERT_NE(pid, 0);
EXPECT_EQ(close(pidfd), 0);
pidfd = -1;
- pid = sys_clone3(&args);
+ pid = sys_clone3(&args, sizeof(args));
ASSERT_GE(pid, 0);
if (pid == 0)
exit(EXIT_SUCCESS);
- pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL);
+ pid = sys_waitid(P_PIDFD, pidfd, &info, WEXITED);
ASSERT_GE(pid, 0);
ASSERT_EQ(WIFEXITED(info.si_status), true);
ASSERT_EQ(WEXITSTATUS(info.si_status), 0);
@@ -89,7 +78,7 @@ TEST(wait_states)
{
int pidfd = -1;
pid_t parent_tid = -1;
- struct clone_args args = {
+ struct __clone_args args = {
.parent_tid = ptr_to_u64(&parent_tid),
.pidfd = ptr_to_u64(&pidfd),
.flags = CLONE_PIDFD | CLONE_PARENT_SETTID,
@@ -102,7 +91,7 @@ TEST(wait_states)
};
ASSERT_EQ(pipe(pfd), 0);
- pid = sys_clone3(&args);
+ pid = sys_clone3(&args, sizeof(args));
ASSERT_GE(pid, 0);
if (pid == 0) {
@@ -117,28 +106,28 @@ TEST(wait_states)
}
close(pfd[0]);
- ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED, NULL), 0);
+ ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED), 0);
ASSERT_EQ(info.si_signo, SIGCHLD);
ASSERT_EQ(info.si_code, CLD_STOPPED);
ASSERT_EQ(info.si_pid, parent_tid);
ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGCONT, NULL, 0), 0);
- ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WCONTINUED, NULL), 0);
+ ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WCONTINUED), 0);
ASSERT_EQ(write(pfd[1], "C", 1), 1);
close(pfd[1]);
ASSERT_EQ(info.si_signo, SIGCHLD);
ASSERT_EQ(info.si_code, CLD_CONTINUED);
ASSERT_EQ(info.si_pid, parent_tid);
- ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WUNTRACED, NULL), 0);
+ ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WUNTRACED), 0);
ASSERT_EQ(info.si_signo, SIGCHLD);
ASSERT_EQ(info.si_code, CLD_STOPPED);
ASSERT_EQ(info.si_pid, parent_tid);
ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGKILL, NULL, 0), 0);
- ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL), 0);
+ ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WEXITED), 0);
ASSERT_EQ(info.si_signo, SIGCHLD);
ASSERT_EQ(info.si_code, CLD_KILLED);
ASSERT_EQ(info.si_pid, parent_tid);
@@ -151,7 +140,7 @@ TEST(wait_nonblock)
int pidfd;
unsigned int flags = 0;
pid_t parent_tid = -1;
- struct clone_args args = {
+ struct __clone_args args = {
.parent_tid = ptr_to_u64(&parent_tid),
.flags = CLONE_PARENT_SETTID,
.exit_signal = SIGCHLD,
@@ -173,12 +162,12 @@ TEST(wait_nonblock)
SKIP(return, "Skipping PIDFD_NONBLOCK test");
}
- ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL);
+ ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED);
ASSERT_LT(ret, 0);
ASSERT_EQ(errno, ECHILD);
EXPECT_EQ(close(pidfd), 0);
- pid = sys_clone3(&args);
+ pid = sys_clone3(&args, sizeof(args));
ASSERT_GE(pid, 0);
if (pid == 0) {
@@ -201,7 +190,7 @@ TEST(wait_nonblock)
* Callers need to see EAGAIN/EWOULDBLOCK with non-blocking pidfd when
* child processes exist but none have exited.
*/
- ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL);
+ ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED);
ASSERT_LT(ret, 0);
ASSERT_EQ(errno, EAGAIN);
@@ -210,19 +199,19 @@ TEST(wait_nonblock)
* WNOHANG raised explicitly when child processes exist but none have
* exited.
*/
- ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED | WNOHANG, NULL);
+ ret = sys_waitid(P_PIDFD, pidfd, &info, WEXITED | WNOHANG);
ASSERT_EQ(ret, 0);
ASSERT_EQ(fcntl(pidfd, F_SETFL, (flags & ~O_NONBLOCK)), 0);
- ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED, NULL), 0);
+ ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WSTOPPED), 0);
ASSERT_EQ(info.si_signo, SIGCHLD);
ASSERT_EQ(info.si_code, CLD_STOPPED);
ASSERT_EQ(info.si_pid, parent_tid);
ASSERT_EQ(sys_pidfd_send_signal(pidfd, SIGCONT, NULL, 0), 0);
- ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WEXITED, NULL), 0);
+ ASSERT_EQ(sys_waitid(P_PIDFD, pidfd, &info, WEXITED), 0);
ASSERT_EQ(info.si_signo, SIGCHLD);
ASSERT_EQ(info.si_code, CLD_EXITED);
ASSERT_EQ(info.si_pid, parent_tid);
diff --git a/tools/testing/selftests/powerpc/benchmarks/gettimeofday.c b/tools/testing/selftests/powerpc/benchmarks/gettimeofday.c
index 580fcac0a09f..b71ef8a493ed 100644
--- a/tools/testing/selftests/powerpc/benchmarks/gettimeofday.c
+++ b/tools/testing/selftests/powerpc/benchmarks/gettimeofday.c
@@ -20,7 +20,7 @@ static int test_gettimeofday(void)
gettimeofday(&tv_end, NULL);
}
- timersub(&tv_start, &tv_end, &tv_diff);
+ timersub(&tv_end, &tv_start, &tv_diff);
printf("time = %.6f\n", tv_diff.tv_sec + (tv_diff.tv_usec) * 1e-6);
diff --git a/tools/testing/selftests/powerpc/include/pkeys.h b/tools/testing/selftests/powerpc/include/pkeys.h
index 51729d9a7111..3a0129467de6 100644
--- a/tools/testing/selftests/powerpc/include/pkeys.h
+++ b/tools/testing/selftests/powerpc/include/pkeys.h
@@ -35,10 +35,18 @@
#define __NR_pkey_alloc 384
#define __NR_pkey_free 385
+#ifndef NT_PPC_PKEY
+#define NT_PPC_PKEY 0x110
+#endif
+
#define PKEY_BITS_PER_PKEY 2
#define NR_PKEYS 32
#define PKEY_BITS_MASK ((1UL << PKEY_BITS_PER_PKEY) - 1)
+#define AMR_BITS_PER_PKEY 2
+#define PKEY_REG_BITS (sizeof(u64) * 8)
+#define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey + 1) * AMR_BITS_PER_PKEY))
+
inline unsigned long pkeyreg_get(void)
{
return mfspr(SPRN_AMR);
diff --git a/tools/testing/selftests/powerpc/ptrace/core-pkey.c b/tools/testing/selftests/powerpc/ptrace/core-pkey.c
index f6da4cb30cd6..f061434af452 100644
--- a/tools/testing/selftests/powerpc/ptrace/core-pkey.c
+++ b/tools/testing/selftests/powerpc/ptrace/core-pkey.c
@@ -16,26 +16,7 @@
#include <unistd.h>
#include "ptrace.h"
#include "child.h"
-
-#ifndef __NR_pkey_alloc
-#define __NR_pkey_alloc 384
-#endif
-
-#ifndef __NR_pkey_free
-#define __NR_pkey_free 385
-#endif
-
-#ifndef NT_PPC_PKEY
-#define NT_PPC_PKEY 0x110
-#endif
-
-#ifndef PKEY_DISABLE_EXECUTE
-#define PKEY_DISABLE_EXECUTE 0x4
-#endif
-
-#define AMR_BITS_PER_PKEY 2
-#define PKEY_REG_BITS (sizeof(u64) * 8)
-#define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey + 1) * AMR_BITS_PER_PKEY))
+#include "pkeys.h"
#define CORE_FILE_LIMIT (5 * 1024 * 1024) /* 5 MB should be enough */
@@ -61,16 +42,6 @@ struct shared_info {
time_t core_time;
};
-static int sys_pkey_alloc(unsigned long flags, unsigned long init_access_rights)
-{
- return syscall(__NR_pkey_alloc, flags, init_access_rights);
-}
-
-static int sys_pkey_free(int pkey)
-{
- return syscall(__NR_pkey_free, pkey);
-}
-
static int increase_core_file_limit(void)
{
struct rlimit rlim;
diff --git a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
index d89474377f11..fc633014424f 100644
--- a/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
+++ b/tools/testing/selftests/powerpc/ptrace/ptrace-pkey.c
@@ -7,26 +7,7 @@
*/
#include "ptrace.h"
#include "child.h"
-
-#ifndef __NR_pkey_alloc
-#define __NR_pkey_alloc 384
-#endif
-
-#ifndef __NR_pkey_free
-#define __NR_pkey_free 385
-#endif
-
-#ifndef NT_PPC_PKEY
-#define NT_PPC_PKEY 0x110
-#endif
-
-#ifndef PKEY_DISABLE_EXECUTE
-#define PKEY_DISABLE_EXECUTE 0x4
-#endif
-
-#define AMR_BITS_PER_PKEY 2
-#define PKEY_REG_BITS (sizeof(u64) * 8)
-#define pkeyshift(pkey) (PKEY_REG_BITS - ((pkey + 1) * AMR_BITS_PER_PKEY))
+#include "pkeys.h"
static const char user_read[] = "[User Read (Running)]";
static const char user_write[] = "[User Write (Running)]";
@@ -61,11 +42,6 @@ struct shared_info {
unsigned long invalid_uamor;
};
-static int sys_pkey_alloc(unsigned long flags, unsigned long init_access_rights)
-{
- return syscall(__NR_pkey_alloc, flags, init_access_rights);
-}
-
static int child(struct shared_info *info)
{
unsigned long reg;
diff --git a/tools/testing/selftests/powerpc/vphn/test-vphn.c b/tools/testing/selftests/powerpc/vphn/test-vphn.c
index 81d3069ffb84..f348f54914a9 100644
--- a/tools/testing/selftests/powerpc/vphn/test-vphn.c
+++ b/tools/testing/selftests/powerpc/vphn/test-vphn.c
@@ -275,7 +275,7 @@ static struct test {
}
},
{
- /* Parse a 32-bit value split accross two consecutives 64-bit
+ /* Parse a 32-bit value split across two consecutives 64-bit
* input values.
*/
"vphn: 16-bit value followed by 2 x 32-bit values",
diff --git a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
index 134cdef5a6e0..48a8052d5dae 100755
--- a/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
+++ b/tools/testing/selftests/rcutorture/bin/kvm-remote.sh
@@ -181,10 +181,11 @@ done
# Function to check for presence of a file on the specified system.
# Complain if the system cannot be reached, and retry after a wait.
-# Currently just waits forever if a machine disappears.
+# Currently just waits 15 minutes if a machine disappears.
#
# Usage: checkremotefile system pathname
checkremotefile () {
+ local nsshfails=0
local ret
local sleeptime=60
@@ -195,6 +196,11 @@ checkremotefile () {
if test "$ret" -eq 255
then
echo " ---" ssh failure to $1 checking for file $2, retry after $sleeptime seconds. `date` | tee -a "$oldrun/remote-log"
+ nsshfails=$((nsshfails+1))
+ if ((nsshfails > 15))
+ then
+ return 255
+ fi
elif test "$ret" -eq 0
then
return 0
@@ -268,12 +274,23 @@ echo All batches started. `date` | tee -a "$oldrun/remote-log"
for i in $systems
do
echo " ---" Waiting for $i `date` | tee -a "$oldrun/remote-log"
- while checkremotefile "$i" "$resdir/$ds/remote.run"
+ while :
do
+ checkremotefile "$i" "$resdir/$ds/remote.run"
+ ret=$?
+ if test "$ret" -eq 1
+ then
+ echo " ---" Collecting results from $i `date` | tee -a "$oldrun/remote-log"
+ ( cd "$oldrun"; ssh -o BatchMode=yes $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu[_-]pid */qemu-retval */qemu-affinity; rm -rf $T > /dev/null 2>&1" | tar -xzf - )
+ break;
+ fi
+ if test "$ret" -eq 255
+ then
+ echo System $i persistent ssh failure, lost results `date` | tee -a "$oldrun/remote-log"
+ break;
+ fi
sleep 30
done
- echo " ---" Collecting results from $i `date` | tee -a "$oldrun/remote-log"
- ( cd "$oldrun"; ssh -o BatchMode=yes $i "cd $rundir; tar -czf - kvm-remote-*.sh.out */console.log */kvm-test-1-run*.sh.out */qemu[_-]pid */qemu-retval */qemu-affinity; rm -rf $T > /dev/null 2>&1" | tar -xzf - )
done
( kvm-end-run-stats.sh "$oldrun" "$starttime"; echo $? > $T/exitcode ) | tee -a "$oldrun/remote-log"
diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
index 8e50bfd4b710..90318591dae2 100644
--- a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
+++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot
@@ -5,3 +5,4 @@ rcutree.gp_cleanup_delay=3
rcutree.kthread_prio=2
threadirqs
rcutree.use_softirq=0
+rcutorture.preempt_duration=10
diff --git a/tools/testing/selftests/riscv/abi/pointer_masking.c b/tools/testing/selftests/riscv/abi/pointer_masking.c
index dee41b7ee3e3..059d2e87eb1f 100644
--- a/tools/testing/selftests/riscv/abi/pointer_masking.c
+++ b/tools/testing/selftests/riscv/abi/pointer_masking.c
@@ -185,8 +185,20 @@ static void test_fork_exec(void)
}
}
+static bool pwrite_wrapper(int fd, void *buf, size_t count, const char *msg)
+{
+ int ret = pwrite(fd, buf, count, 0);
+
+ if (ret != count) {
+ ksft_perror(msg);
+ return false;
+ }
+ return true;
+}
+
static void test_tagged_addr_abi_sysctl(void)
{
+ char *err_pwrite_msg = "failed to write to /proc/sys/abi/tagged_addr_disabled\n";
char value;
int fd;
@@ -200,14 +212,18 @@ static void test_tagged_addr_abi_sysctl(void)
}
value = '1';
- pwrite(fd, &value, 1, 0);
- ksft_test_result(set_tagged_addr_ctrl(min_pmlen, true) == -EINVAL,
- "sysctl disabled\n");
+ if (!pwrite_wrapper(fd, &value, 1, "write '1'"))
+ ksft_test_result_fail(err_pwrite_msg);
+ else
+ ksft_test_result(set_tagged_addr_ctrl(min_pmlen, true) == -EINVAL,
+ "sysctl disabled\n");
value = '0';
- pwrite(fd, &value, 1, 0);
- ksft_test_result(set_tagged_addr_ctrl(min_pmlen, true) == 0,
- "sysctl enabled\n");
+ if (!pwrite_wrapper(fd, &value, 1, "write '0'"))
+ ksft_test_result_fail(err_pwrite_msg);
+ else
+ ksft_test_result(set_tagged_addr_ctrl(min_pmlen, true) == 0,
+ "sysctl enabled\n");
set_tagged_addr_ctrl(0, false);
diff --git a/tools/testing/selftests/riscv/vector/v_initval_nolibc.c b/tools/testing/selftests/riscv/vector/v_initval_nolibc.c
index 1dd94197da30..6174ffe016dc 100644
--- a/tools/testing/selftests/riscv/vector/v_initval_nolibc.c
+++ b/tools/testing/selftests/riscv/vector/v_initval_nolibc.c
@@ -25,6 +25,8 @@ int main(void)
unsigned long vl;
char *datap, *tmp;
+ ksft_set_plan(1);
+
datap = malloc(MAX_VSIZE);
if (!datap) {
ksft_test_result_fail("fail to allocate memory for size = %d\n", MAX_VSIZE);
@@ -63,6 +65,8 @@ int main(void)
}
free(datap);
+
+ ksft_test_result_pass("tests for v_initval_nolibc pass\n");
ksft_exit_pass();
return 0;
}
diff --git a/tools/testing/selftests/riscv/vector/vstate_prctl.c b/tools/testing/selftests/riscv/vector/vstate_prctl.c
index 895177f6bf4c..40b3bffcbb40 100644
--- a/tools/testing/selftests/riscv/vector/vstate_prctl.c
+++ b/tools/testing/selftests/riscv/vector/vstate_prctl.c
@@ -76,6 +76,8 @@ int main(void)
long flag, expected;
long rc;
+ ksft_set_plan(1);
+
pair.key = RISCV_HWPROBE_KEY_IMA_EXT_0;
rc = riscv_hwprobe(&pair, 1, 0, NULL, 0);
if (rc < 0) {
diff --git a/tools/testing/selftests/tc-testing/scripts/sfq_rejects_limit_1.py b/tools/testing/selftests/tc-testing/scripts/sfq_rejects_limit_1.py
new file mode 100755
index 000000000000..0f44a6199495
--- /dev/null
+++ b/tools/testing/selftests/tc-testing/scripts/sfq_rejects_limit_1.py
@@ -0,0 +1,21 @@
+#!/usr/bin/env python3
+# SPDX-License-Identifier: GPL-2.0
+#
+# Script that checks that SFQ rejects a limit of 1 at the kernel
+# level. We can't use iproute2's tc because it does not accept a limit
+# of 1.
+
+import sys
+import os
+
+from pyroute2 import IPRoute
+from pyroute2.netlink.exceptions import NetlinkError
+
+ip = IPRoute()
+ifidx = ip.link_lookup(ifname=sys.argv[1])
+
+try:
+ ip.tc('add', 'sfq', ifidx, limit=1)
+ sys.exit(1)
+except NetlinkError:
+ sys.exit(0)
diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/flow.json b/tools/testing/selftests/tc-testing/tc-tests/filters/flow.json
index 996448afe31b..91d120548bf5 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/flow.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/flow.json
@@ -78,10 +78,10 @@
"setup": [
"$TC qdisc add dev $DEV1 ingress"
],
- "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key dst rshift 0xff",
+ "cmdUnderTest": "$TC filter add dev $DEV1 parent ffff: handle 1 prio 1 protocol ip flow map key dst rshift 0x1f",
"expExitCode": "0",
"verifyCmd": "$TC filter get dev $DEV1 parent ffff: handle 1 protocol ip prio 1 flow",
- "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys dst rshift 255 baseclass",
+ "matchPattern": "filter parent ffff: protocol ip pref 1 flow chain [0-9]+ handle 0x1 map keys dst rshift 31 baseclass",
"matchCount": "1",
"teardown": [
"$TC qdisc del dev $DEV1 ingress"
diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json
index 16d51936b385..50e8d72781cb 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/sfq.json
@@ -208,5 +208,25 @@
"teardown": [
"$TC qdisc del dev $DUMMY handle 1: root"
]
+ },
+ {
+ "id": "4d6f",
+ "name": "Check that limit of 1 is rejected",
+ "category": [
+ "qdisc",
+ "sfq"
+ ],
+ "plugins": {
+ "requires": "nsPlugin"
+ },
+ "setup": [
+ ],
+ "cmdUnderTest": "./scripts/sfq_rejects_limit_1.py $DUMMY",
+ "expExitCode": "0",
+ "verifyCmd": "$TC qdisc show dev $DUMMY",
+ "matchPattern": "sfq",
+ "matchCount": "0",
+ "teardown": [
+ ]
}
]
diff --git a/tools/testing/shared/linux/maple_tree.h b/tools/testing/shared/linux/maple_tree.h
index 06c89bdcc515..f67d47d32857 100644
--- a/tools/testing/shared/linux/maple_tree.h
+++ b/tools/testing/shared/linux/maple_tree.h
@@ -2,6 +2,6 @@
#define atomic_t int32_t
#define atomic_inc(x) uatomic_inc(x)
#define atomic_read(x) uatomic_read(x)
-#define atomic_set(x, y) do {} while (0)
+#define atomic_set(x, y) uatomic_set(x, y)
#define U8_MAX UCHAR_MAX
#include "../../../../include/linux/maple_tree.h"
diff --git a/tools/testing/vma/linux/atomic.h b/tools/testing/vma/linux/atomic.h
index e01f66f98982..3e1b6adc027b 100644
--- a/tools/testing/vma/linux/atomic.h
+++ b/tools/testing/vma/linux/atomic.h
@@ -6,7 +6,7 @@
#define atomic_t int32_t
#define atomic_inc(x) uatomic_inc(x)
#define atomic_read(x) uatomic_read(x)
-#define atomic_set(x, y) do {} while (0)
+#define atomic_set(x, y) uatomic_set(x, y)
#define U8_MAX UCHAR_MAX
#endif /* _LINUX_ATOMIC_H */
diff --git a/tools/testing/vma/vma.c b/tools/testing/vma/vma.c
index 8fab5e13c7c3..9bcf1736bf18 100644
--- a/tools/testing/vma/vma.c
+++ b/tools/testing/vma/vma.c
@@ -89,7 +89,7 @@ static struct vm_area_struct *alloc_and_link_vma(struct mm_struct *mm,
* begun. Linking to the tree will have caused this to be incremented,
* which means we will get a false positive otherwise.
*/
- vma->vm_lock_seq = -1;
+ vma->vm_lock_seq = UINT_MAX;
return vma;
}
@@ -214,7 +214,7 @@ static bool vma_write_started(struct vm_area_struct *vma)
int seq = vma->vm_lock_seq;
/* We reset after each check. */
- vma->vm_lock_seq = -1;
+ vma->vm_lock_seq = UINT_MAX;
/* The vma_start_write() stub simply increments this value. */
return seq > -1;
diff --git a/tools/testing/vma/vma_internal.h b/tools/testing/vma/vma_internal.h
index e76ff579e1fd..1d9fc97b8e80 100644
--- a/tools/testing/vma/vma_internal.h
+++ b/tools/testing/vma/vma_internal.h
@@ -241,7 +241,7 @@ struct vm_area_struct {
* counter reuse can only lead to occasional unnecessary use of the
* slowpath.
*/
- int vm_lock_seq;
+ unsigned int vm_lock_seq;
struct vma_lock *vm_lock;
#endif
@@ -416,7 +416,7 @@ static inline bool vma_lock_alloc(struct vm_area_struct *vma)
return false;
init_rwsem(&vma->vm_lock->lock);
- vma->vm_lock_seq = -1;
+ vma->vm_lock_seq = UINT_MAX;
return true;
}
diff --git a/tools/testing/vsock/README b/tools/testing/vsock/README
index 84ee217ba8ee..680ce666ceb5 100644
--- a/tools/testing/vsock/README
+++ b/tools/testing/vsock/README
@@ -36,6 +36,21 @@ Invoke test binaries in both directions as follows:
--control-port=1234 \
--peer-cid=3
+Some tests are designed to produce kernel memory leaks. Leaks detection,
+however, is deferred to Kernel Memory Leak Detector. It is recommended to enable
+kmemleak (CONFIG_DEBUG_KMEMLEAK=y) and explicitly trigger a scan after each test
+suite run, e.g.
+
+ # echo clear > /sys/kernel/debug/kmemleak
+ # $TEST_BINARY ...
+ # echo "wait for any grace periods" && sleep 2
+ # echo scan > /sys/kernel/debug/kmemleak
+ # echo "wait for kmemleak" && sleep 5
+ # echo scan > /sys/kernel/debug/kmemleak
+ # cat /sys/kernel/debug/kmemleak
+
+For more information see Documentation/dev-tools/kmemleak.rst.
+
vsock_perf utility
-------------------
'vsock_perf' is a simple tool to measure vsock performance. It works in
diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c
index 34e9dac0a105..7058dc614c25 100644
--- a/tools/testing/vsock/util.c
+++ b/tools/testing/vsock/util.c
@@ -401,7 +401,7 @@ void recv_buf(int fd, void *buf, size_t len, int flags, ssize_t expected_ret)
*/
void send_byte(int fd, int expected_ret, int flags)
{
- const uint8_t byte = 'A';
+ static const uint8_t byte = 'A';
send_buf(fd, &byte, sizeof(byte), flags, expected_ret);
}
@@ -420,7 +420,7 @@ void recv_byte(int fd, int expected_ret, int flags)
recv_buf(fd, &byte, sizeof(byte), flags, expected_ret);
if (byte != 'A') {
- fprintf(stderr, "unexpected byte read %c\n", byte);
+ fprintf(stderr, "unexpected byte read 0x%02x\n", byte);
exit(EXIT_FAILURE);
}
}
@@ -486,8 +486,7 @@ void list_tests(const struct test_case *test_cases)
exit(EXIT_FAILURE);
}
-void skip_test(struct test_case *test_cases, size_t test_cases_len,
- const char *test_id_str)
+static unsigned long parse_test_id(const char *test_id_str, size_t test_cases_len)
{
unsigned long test_id;
char *endptr = NULL;
@@ -505,9 +504,35 @@ void skip_test(struct test_case *test_cases, size_t test_cases_len,
exit(EXIT_FAILURE);
}
+ return test_id;
+}
+
+void skip_test(struct test_case *test_cases, size_t test_cases_len,
+ const char *test_id_str)
+{
+ unsigned long test_id = parse_test_id(test_id_str, test_cases_len);
test_cases[test_id].skip = true;
}
+void pick_test(struct test_case *test_cases, size_t test_cases_len,
+ const char *test_id_str)
+{
+ static bool skip_all = true;
+ unsigned long test_id;
+
+ if (skip_all) {
+ unsigned long i;
+
+ for (i = 0; i < test_cases_len; ++i)
+ test_cases[i].skip = true;
+
+ skip_all = false;
+ }
+
+ test_id = parse_test_id(test_id_str, test_cases_len);
+ test_cases[test_id].skip = false;
+}
+
unsigned long hash_djb2(const void *data, size_t len)
{
unsigned long hash = 5381;
diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h
index ba84d296d8b7..e62f46b2b92a 100644
--- a/tools/testing/vsock/util.h
+++ b/tools/testing/vsock/util.h
@@ -62,6 +62,8 @@ void run_tests(const struct test_case *test_cases,
void list_tests(const struct test_case *test_cases);
void skip_test(struct test_case *test_cases, size_t test_cases_len,
const char *test_id_str);
+void pick_test(struct test_case *test_cases, size_t test_cases_len,
+ const char *test_id_str);
unsigned long hash_djb2(const void *data, size_t len);
size_t iovec_bytes(const struct iovec *iov, size_t iovnum);
unsigned long iovec_hash_djb2(const struct iovec *iov, size_t iovnum);
diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c
index 48f17641ca50..1eebbc0d5f61 100644
--- a/tools/testing/vsock/vsock_test.c
+++ b/tools/testing/vsock/vsock_test.c
@@ -22,12 +22,17 @@
#include <signal.h>
#include <sys/ioctl.h>
#include <linux/sockios.h>
+#include <linux/time64.h>
#include "vsock_test_zerocopy.h"
#include "timeout.h"
#include "control.h"
#include "util.h"
+/* Basic messages for control_writeulong(), control_readulong() */
+#define CONTROL_CONTINUE 1
+#define CONTROL_DONE 0
+
static void test_stream_connection_reset(const struct test_opts *opts)
{
union {
@@ -559,7 +564,7 @@ static time_t current_nsec(void)
exit(EXIT_FAILURE);
}
- return (ts.tv_sec * 1000000000ULL) + ts.tv_nsec;
+ return (ts.tv_sec * NSEC_PER_SEC) + ts.tv_nsec;
}
#define RCVTIMEO_TIMEOUT_SEC 1
@@ -599,7 +604,7 @@ static void test_seqpacket_timeout_client(const struct test_opts *opts)
}
read_overhead_ns = current_nsec() - read_enter_ns -
- 1000000000ULL * RCVTIMEO_TIMEOUT_SEC;
+ NSEC_PER_SEC * RCVTIMEO_TIMEOUT_SEC;
if (read_overhead_ns > READ_OVERHEAD_NSEC) {
fprintf(stderr,
@@ -1473,6 +1478,236 @@ static void test_stream_cred_upd_on_set_rcvlowat(const struct test_opts *opts)
test_stream_credit_update_test(opts, false);
}
+/* The goal of test leak_acceptq is to stress the race between connect() and
+ * close(listener). Implementation of client/server loops boils down to:
+ *
+ * client server
+ * ------ ------
+ * write(CONTINUE)
+ * expect(CONTINUE)
+ * listen()
+ * write(LISTENING)
+ * expect(LISTENING)
+ * connect() close()
+ */
+#define ACCEPTQ_LEAK_RACE_TIMEOUT 2 /* seconds */
+
+static void test_stream_leak_acceptq_client(const struct test_opts *opts)
+{
+ time_t tout;
+ int fd;
+
+ tout = current_nsec() + ACCEPTQ_LEAK_RACE_TIMEOUT * NSEC_PER_SEC;
+ do {
+ control_writeulong(CONTROL_CONTINUE);
+
+ fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
+ if (fd >= 0)
+ close(fd);
+ } while (current_nsec() < tout);
+
+ control_writeulong(CONTROL_DONE);
+}
+
+/* Test for a memory leak. User is expected to run kmemleak scan, see README. */
+static void test_stream_leak_acceptq_server(const struct test_opts *opts)
+{
+ int fd;
+
+ while (control_readulong() == CONTROL_CONTINUE) {
+ fd = vsock_stream_listen(VMADDR_CID_ANY, opts->peer_port);
+ control_writeln("LISTENING");
+ close(fd);
+ }
+}
+
+/* Test for a memory leak. User is expected to run kmemleak scan, see README. */
+static void test_stream_msgzcopy_leak_errq_client(const struct test_opts *opts)
+{
+ struct pollfd fds = { 0 };
+ int fd;
+
+ fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
+ if (fd < 0) {
+ perror("connect");
+ exit(EXIT_FAILURE);
+ }
+
+ enable_so_zerocopy_check(fd);
+ send_byte(fd, 1, MSG_ZEROCOPY);
+
+ fds.fd = fd;
+ fds.events = 0;
+ if (poll(&fds, 1, -1) < 0) {
+ perror("poll");
+ exit(EXIT_FAILURE);
+ }
+
+ close(fd);
+}
+
+static void test_stream_msgzcopy_leak_errq_server(const struct test_opts *opts)
+{
+ int fd;
+
+ fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
+ if (fd < 0) {
+ perror("accept");
+ exit(EXIT_FAILURE);
+ }
+
+ recv_byte(fd, 1, 0);
+ vsock_wait_remote_close(fd);
+ close(fd);
+}
+
+/* Test msgzcopy_leak_zcskb is meant to exercise sendmsg() error handling path,
+ * that might leak an skb. The idea is to fail virtio_transport_init_zcopy_skb()
+ * by hitting net.core.optmem_max limit in sock_omalloc(), specifically
+ *
+ * vsock_connectible_sendmsg
+ * virtio_transport_stream_enqueue
+ * virtio_transport_send_pkt_info
+ * virtio_transport_init_zcopy_skb
+ * . msg_zerocopy_realloc
+ * . msg_zerocopy_alloc
+ * . sock_omalloc
+ * . sk_omem_alloc + size > sysctl_optmem_max
+ * return -ENOMEM
+ *
+ * We abuse the implementation detail of net/socket.c:____sys_sendmsg().
+ * sk_omem_alloc can be precisely bumped by sock_kmalloc(), as it is used to
+ * fetch user-provided control data.
+ *
+ * While this approach works for now, it relies on assumptions regarding the
+ * implementation and configuration (for example, order of net.core.optmem_max
+ * can not exceed MAX_PAGE_ORDER), which may not hold in the future. A more
+ * resilient testing could be implemented by leveraging the Fault injection
+ * framework (CONFIG_FAULT_INJECTION), e.g.
+ *
+ * client# echo N > /sys/kernel/debug/failslab/ignore-gfp-wait
+ * client# echo 0 > /sys/kernel/debug/failslab/verbose
+ *
+ * void client(const struct test_opts *opts)
+ * {
+ * char buf[16];
+ * int f, s, i;
+ *
+ * f = open("/proc/self/fail-nth", O_WRONLY);
+ *
+ * for (i = 1; i < 32; i++) {
+ * control_writeulong(CONTROL_CONTINUE);
+ *
+ * s = vsock_stream_connect(opts->peer_cid, opts->peer_port);
+ * enable_so_zerocopy_check(s);
+ *
+ * sprintf(buf, "%d", i);
+ * write(f, buf, strlen(buf));
+ *
+ * send(s, &(char){ 0 }, 1, MSG_ZEROCOPY);
+ *
+ * write(f, "0", 1);
+ * close(s);
+ * }
+ *
+ * control_writeulong(CONTROL_DONE);
+ * close(f);
+ * }
+ *
+ * void server(const struct test_opts *opts)
+ * {
+ * int fd;
+ *
+ * while (control_readulong() == CONTROL_CONTINUE) {
+ * fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
+ * vsock_wait_remote_close(fd);
+ * close(fd);
+ * }
+ * }
+ *
+ * Refer to Documentation/fault-injection/fault-injection.rst.
+ */
+#define MAX_PAGE_ORDER 10 /* usually */
+#define PAGE_SIZE 4096
+
+/* Test for a memory leak. User is expected to run kmemleak scan, see README. */
+static void test_stream_msgzcopy_leak_zcskb_client(const struct test_opts *opts)
+{
+ size_t optmem_max, ctl_len, chunk_size;
+ struct msghdr msg = { 0 };
+ struct iovec iov;
+ char *chunk;
+ int fd, res;
+ FILE *f;
+
+ f = fopen("/proc/sys/net/core/optmem_max", "r");
+ if (!f) {
+ perror("fopen(optmem_max)");
+ exit(EXIT_FAILURE);
+ }
+
+ if (fscanf(f, "%zu", &optmem_max) != 1) {
+ fprintf(stderr, "fscanf(optmem_max) failed\n");
+ exit(EXIT_FAILURE);
+ }
+
+ fclose(f);
+
+ fd = vsock_stream_connect(opts->peer_cid, opts->peer_port);
+ if (fd < 0) {
+ perror("connect");
+ exit(EXIT_FAILURE);
+ }
+
+ enable_so_zerocopy_check(fd);
+
+ ctl_len = optmem_max - 1;
+ if (ctl_len > PAGE_SIZE << MAX_PAGE_ORDER) {
+ fprintf(stderr, "Try with net.core.optmem_max = 100000\n");
+ exit(EXIT_FAILURE);
+ }
+
+ chunk_size = CMSG_SPACE(ctl_len);
+ chunk = malloc(chunk_size);
+ if (!chunk) {
+ perror("malloc");
+ exit(EXIT_FAILURE);
+ }
+ memset(chunk, 0, chunk_size);
+
+ iov.iov_base = &(char){ 0 };
+ iov.iov_len = 1;
+
+ msg.msg_iov = &iov;
+ msg.msg_iovlen = 1;
+ msg.msg_control = chunk;
+ msg.msg_controllen = ctl_len;
+
+ errno = 0;
+ res = sendmsg(fd, &msg, MSG_ZEROCOPY);
+ if (res >= 0 || errno != ENOMEM) {
+ fprintf(stderr, "Expected ENOMEM, got errno=%d res=%d\n",
+ errno, res);
+ exit(EXIT_FAILURE);
+ }
+
+ close(fd);
+}
+
+static void test_stream_msgzcopy_leak_zcskb_server(const struct test_opts *opts)
+{
+ int fd;
+
+ fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL);
+ if (fd < 0) {
+ perror("accept");
+ exit(EXIT_FAILURE);
+ }
+
+ vsock_wait_remote_close(fd);
+ close(fd);
+}
+
static struct test_case test_cases[] = {
{
.name = "SOCK_STREAM connection reset",
@@ -1603,6 +1838,21 @@ static struct test_case test_cases[] = {
.run_client = test_seqpacket_unsent_bytes_client,
.run_server = test_seqpacket_unsent_bytes_server,
},
+ {
+ .name = "SOCK_STREAM leak accept queue",
+ .run_client = test_stream_leak_acceptq_client,
+ .run_server = test_stream_leak_acceptq_server,
+ },
+ {
+ .name = "SOCK_STREAM MSG_ZEROCOPY leak MSG_ERRQUEUE",
+ .run_client = test_stream_msgzcopy_leak_errq_client,
+ .run_server = test_stream_msgzcopy_leak_errq_server,
+ },
+ {
+ .name = "SOCK_STREAM MSG_ZEROCOPY leak completion skb",
+ .run_client = test_stream_msgzcopy_leak_zcskb_client,
+ .run_server = test_stream_msgzcopy_leak_zcskb_server,
+ },
{},
};
@@ -1644,6 +1894,11 @@ static const struct option longopts[] = {
.val = 's',
},
{
+ .name = "pick",
+ .has_arg = required_argument,
+ .val = 't',
+ },
+ {
.name = "help",
.has_arg = no_argument,
.val = '?',
@@ -1680,6 +1935,8 @@ static void usage(void)
" --peer-cid <cid> CID of the other side\n"
" --peer-port <port> AF_VSOCK port used for the test [default: %d]\n"
" --list List of tests that will be executed\n"
+ " --pick <test_id> Test ID to execute selectively;\n"
+ " use multiple --pick options to select more tests\n"
" --skip <test_id> Test ID to skip;\n"
" use multiple --skip options to skip more tests\n",
DEFAULT_PEER_PORT
@@ -1736,6 +1993,10 @@ int main(int argc, char **argv)
skip_test(test_cases, ARRAY_SIZE(test_cases) - 1,
optarg);
break;
+ case 't':
+ pick_test(test_cases, ARRAY_SIZE(test_cases) - 1,
+ optarg);
+ break;
case '?':
default:
usage();