39 files changed, 6296 insertions, 64 deletions
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore
index c612d6e38c62..128e548aa377 100644
--- a/tools/testing/selftests/net/.gitignore
+++ b/tools/testing/selftests/net/.gitignore
@@ -1,9 +1,14 @@
 msg_zerocopy
 socket
 psock_fanout
+psock_snd
 psock_tpacket
 reuseport_bpf
 reuseport_bpf_cpu
 reuseport_bpf_numa
 reuseport_dualstack
 reuseaddr_conflict
+tcp_mmap
+udpgso
+udpgso_bench_rx
+udpgso_bench_tx
diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 3ff81a478dbe..663e11e85727 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -5,13 +5,18 @@ CFLAGS =  -Wall -Wl,--no-as-needed -O2 -g
 CFLAGS += -I../../../../usr/include/
 
 TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh netdevice.sh rtnetlink.sh
-TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh
+TEST_PROGS += fib_tests.sh fib-onlink-tests.sh pmtu.sh udpgso.sh
+TEST_PROGS += udpgso_bench.sh fib_rule_tests.sh msg_zerocopy.sh psock_snd.sh
 TEST_PROGS_EXTENDED := in_netns.sh
 TEST_GEN_FILES =  socket
 TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy
+TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd
+TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx
 TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa
 TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict
 
 include ../lib.mk
 
 $(OUTPUT)/reuseport_bpf_numa: LDFLAGS += -lnuma
+$(OUTPUT)/tcp_mmap: LDFLAGS += -lpthread
+$(OUTPUT)/tcp_inq: LDFLAGS += -lpthread
diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh
new file mode 100755
index 000000000000..d4cfb6a7a086
--- /dev/null
+++ b/tools/testing/selftests/net/fib_rule_tests.sh
@@ -0,0 +1,248 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test is for checking IPv4 and IPv6 FIB rules API
+
+ret=0
+
+PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+IP="ip -netns testns"
+
+RTABLE=100
+GW_IP4=192.51.100.2
+SRC_IP=192.51.100.3
+GW_IP6=2001:db8:1::2
+SRC_IP6=2001:db8:1::3
+
+DEV_ADDR=192.51.100.1
+DEV=dummy0
+
+log_test()
+{
+	local rc=$1
+	local expected=$2
+	local msg="$3"
+
+	if [ ${rc} -eq ${expected} ]; then
+		nsuccess=$((nsuccess+1))
+		printf "\n    TEST: %-50s  [ OK ]\n" "${msg}"
+	else
+		nfail=$((nfail+1))
+		printf "\n    TEST: %-50s  [FAIL]\n" "${msg}"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+			echo
+			echo "hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+	fi
+}
+
+log_section()
+{
+	echo
+	echo "######################################################################"
+	echo "TEST SECTION: $*"
+	echo "######################################################################"
+}
+
+setup()
+{
+	set -e
+	ip netns add testns
+	$IP link set dev lo up
+
+	$IP link add dummy0 type dummy
+	$IP link set dev dummy0 up
+	$IP address add 198.51.100.1/24 dev dummy0
+	$IP -6 address add 2001:db8:1::1/64 dev dummy0
+
+	set +e
+}
+
+cleanup()
+{
+	$IP link del dev dummy0 &> /dev/null
+	ip netns del testns
+}
+
+fib_check_iproute_support()
+{
+	ip rule help 2>&1 | grep -q $1
+	if [ $? -ne 0 ]; then
+		echo "SKIP: iproute2 iprule too old, missing $1 match"
+		return 1
+	fi
+
+	ip route get help 2>&1 | grep -q $2
+	if [ $? -ne 0 ]; then
+		echo "SKIP: iproute2 get route too old, missing $2 match"
+		return 1
+	fi
+
+	return 0
+}
+
+fib_rule6_del()
+{
+	$IP -6 rule del $1
+	log_test $? 0 "rule6 del $1"
+}
+
+fib_rule6_del_by_pref()
+{
+	pref=$($IP -6 rule show | grep "$1 lookup $TABLE" | cut -d ":" -f 1)
+	$IP -6 rule del pref $pref
+}
+
+fib_rule6_test_match_n_redirect()
+{
+	local match="$1"
+	local getmatch="$2"
+
+	$IP -6 rule add $match table $RTABLE
+	$IP -6 route get $GW_IP6 $getmatch | grep -q "table $RTABLE"
+	log_test $? 0 "rule6 check: $1"
+
+	fib_rule6_del_by_pref "$match"
+	log_test $? 0 "rule6 del by pref: $match"
+}
+
+fib_rule6_test()
+{
+	# setup the fib rule redirect route
+	$IP -6 route add table $RTABLE default via $GW_IP6 dev $DEV onlink
+
+	match="oif $DEV"
+	fib_rule6_test_match_n_redirect "$match" "$match" "oif redirect to table"
+
+	match="from $SRC_IP6 iif $DEV"
+	fib_rule6_test_match_n_redirect "$match" "$match" "iif redirect to table"
+
+	match="tos 0x10"
+	fib_rule6_test_match_n_redirect "$match" "$match" "tos redirect to table"
+
+	match="fwmark 0x64"
+	getmatch="mark 0x64"
+	fib_rule6_test_match_n_redirect "$match" "$getmatch" "fwmark redirect to table"
+
+	fib_check_iproute_support "uidrange" "uid"
+	if [ $? -eq 0 ]; then
+		match="uidrange 100-100"
+		getmatch="uid 100"
+		fib_rule6_test_match_n_redirect "$match" "$getmatch" "uid redirect to table"
+	fi
+
+	fib_check_iproute_support "sport" "sport"
+	if [ $? -eq 0 ]; then
+		match="sport 666 dport 777"
+		fib_rule6_test_match_n_redirect "$match" "$match" "sport and dport redirect to table"
+	fi
+
+	fib_check_iproute_support "ipproto" "ipproto"
+	if [ $? -eq 0 ]; then
+		match="ipproto tcp"
+		fib_rule6_test_match_n_redirect "$match" "$match" "ipproto match"
+	fi
+
+	fib_check_iproute_support "ipproto" "ipproto"
+	if [ $? -eq 0 ]; then
+		match="ipproto icmp"
+		fib_rule6_test_match_n_redirect "$match" "$match" "ipproto icmp match"
+	fi
+}
+
+fib_rule4_del()
+{
+	$IP rule del $1
+	log_test $? 0 "del $1"
+}
+
+fib_rule4_del_by_pref()
+{
+	pref=$($IP rule show | grep "$1 lookup $TABLE" | cut -d ":" -f 1)
+	$IP rule del pref $pref
+}
+
+fib_rule4_test_match_n_redirect()
+{
+	local match="$1"
+	local getmatch="$2"
+
+	$IP rule add $match table $RTABLE
+	$IP route get $GW_IP4 $getmatch | grep -q "table $RTABLE"
+	log_test $? 0 "rule4 check: $1"
+
+	fib_rule4_del_by_pref "$match"
+	log_test $? 0 "rule4 del by pref: $match"
+}
+
+fib_rule4_test()
+{
+	# setup the fib rule redirect route
+	$IP route add table $RTABLE default via $GW_IP4 dev $DEV onlink
+
+	match="oif $DEV"
+	fib_rule4_test_match_n_redirect "$match" "$match" "oif redirect to table"
+
+	match="from $SRC_IP iif $DEV"
+	fib_rule4_test_match_n_redirect "$match" "$match" "iif redirect to table"
+
+	match="tos 0x10"
+	fib_rule4_test_match_n_redirect "$match" "$match" "tos redirect to table"
+
+	match="fwmark 0x64"
+	getmatch="mark 0x64"
+	fib_rule4_test_match_n_redirect "$match" "$getmatch" "fwmark redirect to table"
+
+	fib_check_iproute_support "uidrange" "uid"
+	if [ $? -eq 0 ]; then
+		match="uidrange 100-100"
+		getmatch="uid 100"
+		fib_rule4_test_match_n_redirect "$match" "$getmatch" "uid redirect to table"
+	fi
+
+	fib_check_iproute_support "sport" "sport"
+	if [ $? -eq 0 ]; then
+		match="sport 666 dport 777"
+		fib_rule4_test_match_n_redirect "$match" "$match" "sport and dport redirect to table"
+	fi
+
+	fib_check_iproute_support "ipproto" "ipproto"
+	if [ $? -eq 0 ]; then
+		match="ipproto tcp"
+		fib_rule4_test_match_n_redirect "$match" "$match" "ipproto tcp match"
+	fi
+
+	fib_check_iproute_support "ipproto" "ipproto"
+	if [ $? -eq 0 ]; then
+		match="ipproto icmp"
+		fib_rule4_test_match_n_redirect "$match" "$match" "ipproto icmp match"
+	fi
+}
+
+run_fibrule_tests()
+{
+	log_section "IPv4 fib rule"
+	fib_rule4_test
+	log_section "IPv6 fib rule"
+	fib_rule6_test
+}
+
+if [ "$(id -u)" -ne 0 ];then
+	echo "SKIP: Need root privileges"
+	exit 0
+fi
+
+if [ ! -x "$(command -v ip)" ]; then
+	echo "SKIP: Could not run test without ip tool"
+	exit 0
+fi
+
+# start clean
+cleanup &> /dev/null
+setup
+run_fibrule_tests
+cleanup
+
+exit $ret
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index 5baac82b9287..78245d60d8bc 100755..100644
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -8,8 +8,11 @@ ret=0
 # Kselftest framework requirement - SKIP code is 4.
 ksft_skip=4
 
-VERBOSE=${VERBOSE:=0}
-PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no}
+# all tests in this script. Can be overridden with -t option
+TESTS="unregister down carrier nexthop ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric"
+VERBOSE=0
+PAUSE_ON_FAIL=no
+PAUSE=no
 IP="ip -netns testns"
 
 log_test()
@@ -20,8 +23,10 @@ log_test()
 
 	if [ ${rc} -eq ${expected} ]; then
 		printf "    TEST: %-60s  [ OK ]\n" "${msg}"
+		nsuccess=$((nsuccess+1))
 	else
 		ret=1
+		nfail=$((nfail+1))
 		printf "    TEST: %-60s  [FAIL]\n" "${msg}"
 		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
 		echo
@@ -30,6 +35,13 @@ log_test()
 			[ "$a" = "q" ] && exit 1
 		fi
 	fi
+
+	if [ "${PAUSE}" = "yes" ]; then
+		echo
+		echo "hit enter to continue, 'q' to quit"
+		read a
+		[ "$a" = "q" ] && exit 1
+	fi
 }
 
 setup()
@@ -565,20 +577,825 @@ fib_nexthop_test()
 }
 
 ################################################################################
-#
+# Tests on route add and replace
+
+run_cmd()
+{
+	local cmd="$1"
+	local out
+	local stderr="2>/dev/null"
+
+	if [ "$VERBOSE" = "1" ]; then
+		printf "    COMMAND: $cmd\n"
+		stderr=
+	fi
+
+	out=$(eval $cmd $stderr)
+	rc=$?
+	if [ "$VERBOSE" = "1" -a -n "$out" ]; then
+		echo "    $out"
+	fi
+
+	[ "$VERBOSE" = "1" ] && echo
+
+	return $rc
+}
+
+# add route for a prefix, flushing any existing routes first
+# expected to be the first step of a test
+add_route6()
+{
+	local pfx="$1"
+	local nh="$2"
+	local out
+
+	if [ "$VERBOSE" = "1" ]; then
+		echo
+		echo "    ##################################################"
+		echo
+	fi
+
+	run_cmd "$IP -6 ro flush ${pfx}"
+	[ $? -ne 0 ] && exit 1
+
+	out=$($IP -6 ro ls match ${pfx})
+	if [ -n "$out" ]; then
+		echo "Failed to flush routes for prefix used for tests."
+		exit 1
+	fi
+
+	run_cmd "$IP -6 ro add ${pfx} ${nh}"
+	if [ $? -ne 0 ]; then
+		echo "Failed to add initial route for test."
+		exit 1
+	fi
+}
+
+# add initial route - used in replace route tests
+add_initial_route6()
+{
+	add_route6 "2001:db8:104::/64" "$1"
+}
+
+check_route6()
+{
+	local pfx="2001:db8:104::/64"
+	local expected="$1"
+	local out
+	local rc=0
+
+	out=$($IP -6 ro ls match ${pfx} | sed -e 's/ pref medium//')
+	[ "${out}" = "${expected}" ] && return 0
+
+	if [ -z "${out}" ]; then
+		if [ "$VERBOSE" = "1" ]; then
+			printf "\nNo route entry found\n"
+			printf "Expected:\n"
+			printf "    ${expected}\n"
+		fi
+		return 1
+	fi
+
+	# tricky way to convert output to 1-line without ip's
+	# messy '\'; this drops all extra white space
+	out=$(echo ${out})
+	if [ "${out}" != "${expected}" ]; then
+		rc=1
+		if [ "${VERBOSE}" = "1" ]; then
+			printf "    Unexpected route entry. Have:\n"
+			printf "        ${out}\n"
+			printf "    Expected:\n"
+			printf "        ${expected}\n\n"
+		fi
+	fi
+
+	return $rc
+}
+
+route_cleanup()
+{
+	$IP li del red 2>/dev/null
+	$IP li del dummy1 2>/dev/null
+	$IP li del veth1 2>/dev/null
+	$IP li del veth3 2>/dev/null
+
+	cleanup &> /dev/null
+}
+
+route_setup()
+{
+	route_cleanup
+	setup
+
+	[ "${VERBOSE}" = "1" ] && set -x
+	set -e
+
+	$IP li add red up type vrf table 101
+	$IP li add veth1 type veth peer name veth2
+	$IP li add veth3 type veth peer name veth4
+
+	$IP li set veth1 up
+	$IP li set veth3 up
+	$IP li set veth2 vrf red up
+	$IP li set veth4 vrf red up
+	$IP li add dummy1 type dummy
+	$IP li set dummy1 vrf red up
+
+	$IP -6 addr add 2001:db8:101::1/64 dev veth1
+	$IP -6 addr add 2001:db8:101::2/64 dev veth2
+	$IP -6 addr add 2001:db8:103::1/64 dev veth3
+	$IP -6 addr add 2001:db8:103::2/64 dev veth4
+	$IP -6 addr add 2001:db8:104::1/64 dev dummy1
+
+	$IP addr add 172.16.101.1/24 dev veth1
+	$IP addr add 172.16.101.2/24 dev veth2
+	$IP addr add 172.16.103.1/24 dev veth3
+	$IP addr add 172.16.103.2/24 dev veth4
+	$IP addr add 172.16.104.1/24 dev dummy1
+
+	set +ex
+}
+
+# assumption is that basic add of a single path route works
+# otherwise just adding an address on an interface is broken
+ipv6_rt_add()
+{
+	local rc
+
+	echo
+	echo "IPv6 route add / append tests"
+
+	# route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL
+	add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
+	run_cmd "$IP -6 ro add 2001:db8:104::/64 via 2001:db8:103::2"
+	log_test $? 2 "Attempt to add duplicate route - gw"
+
+	# route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL
+	add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
+	run_cmd "$IP -6 ro add 2001:db8:104::/64 dev veth3"
+	log_test $? 2 "Attempt to add duplicate route - dev only"
+
+	# route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL
+	add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
+	run_cmd "$IP -6 ro add unreachable 2001:db8:104::/64"
+	log_test $? 2 "Attempt to add duplicate route - reject route"
+
+	# iproute2 prepend only sets NLM_F_CREATE
+	# - adds a new route; does NOT convert existing route to ECMP
+	add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
+	run_cmd "$IP -6 ro prepend 2001:db8:104::/64 via 2001:db8:103::2"
+	check_route6 "2001:db8:104::/64 via 2001:db8:101::2 dev veth1 metric 1024 2001:db8:104::/64 via 2001:db8:103::2 dev veth3 metric 1024"
+	log_test $? 0 "Add new route for existing prefix (w/o NLM_F_EXCL)"
+
+	# route append with same prefix adds a new route
+	# - iproute2 sets NLM_F_CREATE | NLM_F_APPEND
+	add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
+	run_cmd "$IP -6 ro append 2001:db8:104::/64 via 2001:db8:103::2"
+	check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
+	log_test $? 0 "Append nexthop to existing route - gw"
+
+	add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
+	run_cmd "$IP -6 ro append 2001:db8:104::/64 dev veth3"
+	check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop dev veth3 weight 1"
+	log_test $? 0 "Append nexthop to existing route - dev only"
+
+	# multipath route can not have a nexthop that is a reject route
+	add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
+	run_cmd "$IP -6 ro append unreachable 2001:db8:104::/64"
+	log_test $? 2 "Append nexthop to existing route - reject route"
+
+	# reject route can not be converted to multipath route
+	run_cmd "$IP -6 ro flush 2001:db8:104::/64"
+	run_cmd "$IP -6 ro add unreachable 2001:db8:104::/64"
+	run_cmd "$IP -6 ro append 2001:db8:104::/64 via 2001:db8:103::2"
+	log_test $? 2 "Append nexthop to existing reject route - gw"
+
+	run_cmd "$IP -6 ro flush 2001:db8:104::/64"
+	run_cmd "$IP -6 ro add unreachable 2001:db8:104::/64"
+	run_cmd "$IP -6 ro append 2001:db8:104::/64 dev veth3"
+	log_test $? 2 "Append nexthop to existing reject route - dev only"
+
+	# insert mpath directly
+	add_route6 "2001:db8:104::/64" "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+	check_route6  "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
+	log_test $? 0 "Add multipath route"
+
+	add_route6 "2001:db8:104::/64" "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+	run_cmd "$IP -6 ro add 2001:db8:104::/64 nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+	log_test $? 2 "Attempt to add duplicate multipath route"
+
+	# insert of a second route without append but different metric
+	add_route6 "2001:db8:104::/64" "via 2001:db8:101::2"
+	run_cmd "$IP -6 ro add 2001:db8:104::/64 via 2001:db8:103::2 metric 512"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		run_cmd "$IP -6 ro add 2001:db8:104::/64 via 2001:db8:103::3 metric 256"
+		rc=$?
+	fi
+	log_test $rc 0 "Route add with different metrics"
+
+	run_cmd "$IP -6 ro del 2001:db8:104::/64 metric 512"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route6 "2001:db8:104::/64 via 2001:db8:103::3 dev veth3 metric 256 2001:db8:104::/64 via 2001:db8:101::2 dev veth1 metric 1024"
+		rc=$?
+	fi
+	log_test $rc 0 "Route delete with metric"
+}
 
-fib_test()
+ipv6_rt_replace_single()
 {
-	if [ -n "$TEST" ]; then
-		eval $TEST
+	# single path with single path
+	#
+	add_initial_route6 "via 2001:db8:101::2"
+	run_cmd "$IP -6 ro replace 2001:db8:104::/64 via 2001:db8:103::2"
+	check_route6 "2001:db8:104::/64 via 2001:db8:103::2 dev veth3 metric 1024"
+	log_test $? 0 "Single path with single path"
+
+	# single path with multipath
+	#
+	add_initial_route6 "nexthop via 2001:db8:101::2"
+	run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:101::3 nexthop via 2001:db8:103::2"
+	check_route6 "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::3 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
+	log_test $? 0 "Single path with multipath"
+
+	# single path with reject
+	#
+	add_initial_route6 "nexthop via 2001:db8:101::2"
+	run_cmd "$IP -6 ro replace unreachable 2001:db8:104::/64"
+	check_route6 "unreachable 2001:db8:104::/64 dev lo metric 1024"
+	log_test $? 0 "Single path with reject route"
+
+	# single path with single path using MULTIPATH attribute
+	#
+	add_initial_route6 "via 2001:db8:101::2"
+	run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:103::2"
+	check_route6 "2001:db8:104::/64 via 2001:db8:103::2 dev veth3 metric 1024"
+	log_test $? 0 "Single path with single path via multipath attribute"
+
+	# route replace fails - invalid nexthop
+	add_initial_route6 "via 2001:db8:101::2"
+	run_cmd "$IP -6 ro replace 2001:db8:104::/64 via 2001:db8:104::2"
+	if [ $? -eq 0 ]; then
+		# previous command is expected to fail so if it returns 0
+		# that means the test failed.
+		log_test 0 1 "Invalid nexthop"
 	else
-		fib_unreg_test
-		fib_down_test
-		fib_carrier_test
-		fib_nexthop_test
+		check_route6 "2001:db8:104::/64 via 2001:db8:101::2 dev veth1 metric 1024"
+		log_test $? 0 "Invalid nexthop"
 	fi
+
+	# replace non-existent route
+	# - note use of change versus replace since ip adds NLM_F_CREATE
+	#   for replace
+	add_initial_route6 "via 2001:db8:101::2"
+	run_cmd "$IP -6 ro change 2001:db8:105::/64 via 2001:db8:101::2"
+	log_test $? 2 "Single path - replace of non-existent route"
+}
+
+ipv6_rt_replace_mpath()
+{
+	# multipath with multipath
+	add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+	run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:101::3 nexthop via 2001:db8:103::3"
+	check_route6  "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::3 dev veth1 weight 1 nexthop via 2001:db8:103::3 dev veth3 weight 1"
+	log_test $? 0 "Multipath with multipath"
+
+	# multipath with single
+	add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+	run_cmd "$IP -6 ro replace 2001:db8:104::/64 via 2001:db8:101::3"
+	check_route6  "2001:db8:104::/64 via 2001:db8:101::3 dev veth1 metric 1024"
+	log_test $? 0 "Multipath with single path"
+
+	# multipath with single
+	add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+	run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:101::3"
+	check_route6 "2001:db8:104::/64 via 2001:db8:101::3 dev veth1 metric 1024"
+	log_test $? 0 "Multipath with single path via multipath attribute"
+
+	# multipath with reject
+	add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+	run_cmd "$IP -6 ro replace unreachable 2001:db8:104::/64"
+	check_route6 "unreachable 2001:db8:104::/64 dev lo metric 1024"
+	log_test $? 0 "Multipath with reject route"
+
+	# route replace fails - invalid nexthop 1
+	add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+	run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:111::3 nexthop via 2001:db8:103::3"
+	check_route6  "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
+	log_test $? 0 "Multipath - invalid first nexthop"
+
+	# route replace fails - invalid nexthop 2
+	add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+	run_cmd "$IP -6 ro replace 2001:db8:104::/64 nexthop via 2001:db8:101::3 nexthop via 2001:db8:113::3"
+	check_route6  "2001:db8:104::/64 metric 1024 nexthop via 2001:db8:101::2 dev veth1 weight 1 nexthop via 2001:db8:103::2 dev veth3 weight 1"
+	log_test $? 0 "Multipath - invalid second nexthop"
+
+	# multipath non-existent route
+	add_initial_route6 "nexthop via 2001:db8:101::2 nexthop via 2001:db8:103::2"
+	run_cmd "$IP -6 ro change 2001:db8:105::/64 nexthop via 2001:db8:101::3 nexthop via 2001:db8:103::3"
+	log_test $? 2 "Multipath - replace of non-existent route"
+}
+
+ipv6_rt_replace()
+{
+	echo
+	echo "IPv6 route replace tests"
+
+	ipv6_rt_replace_single
+	ipv6_rt_replace_mpath
+}
+
+ipv6_route_test()
+{
+	route_setup
+
+	ipv6_rt_add
+	ipv6_rt_replace
+
+	route_cleanup
 }
 
+ip_addr_metric_check()
+{
+	ip addr help 2>&1 | grep -q metric
+	if [ $? -ne 0 ]; then
+		echo "iproute2 command does not support metric for addresses. Skipping test"
+		return 1
+	fi
+
+	return 0
+}
+
+ipv6_addr_metric_test()
+{
+	local rc
+
+	echo
+	echo "IPv6 prefix route tests"
+
+	ip_addr_metric_check || return 1
+
+	setup
+
+	set -e
+	$IP li add dummy1 type dummy
+	$IP li add dummy2 type dummy
+	$IP li set dummy1 up
+	$IP li set dummy2 up
+
+	# default entry is metric 256
+	run_cmd "$IP -6 addr add dev dummy1 2001:db8:104::1/64"
+	run_cmd "$IP -6 addr add dev dummy2 2001:db8:104::2/64"
+	set +e
+
+	check_route6 "2001:db8:104::/64 dev dummy1 proto kernel metric 256 2001:db8:104::/64 dev dummy2 proto kernel metric 256"
+	log_test $? 0 "Default metric"
+
+	set -e
+	run_cmd "$IP -6 addr flush dev dummy1"
+	run_cmd "$IP -6 addr add dev dummy1 2001:db8:104::1/64 metric 257"
+	set +e
+
+	check_route6 "2001:db8:104::/64 dev dummy2 proto kernel metric 256 2001:db8:104::/64 dev dummy1 proto kernel metric 257"
+	log_test $? 0 "User specified metric on first device"
+
+	set -e
+	run_cmd "$IP -6 addr flush dev dummy2"
+	run_cmd "$IP -6 addr add dev dummy2 2001:db8:104::2/64 metric 258"
+	set +e
+
+	check_route6 "2001:db8:104::/64 dev dummy1 proto kernel metric 257 2001:db8:104::/64 dev dummy2 proto kernel metric 258"
+	log_test $? 0 "User specified metric on second device"
+
+	run_cmd "$IP -6 addr del dev dummy1 2001:db8:104::1/64 metric 257"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route6 "2001:db8:104::/64 dev dummy2 proto kernel metric 258"
+		rc=$?
+	fi
+	log_test $rc 0 "Delete of address on first device"
+
+	run_cmd "$IP -6 addr change dev dummy2 2001:db8:104::2/64 metric 259"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route6 "2001:db8:104::/64 dev dummy2 proto kernel metric 259"
+		rc=$?
+	fi
+	log_test $rc 0 "Modify metric of address"
+
+	# verify prefix route removed on down
+	run_cmd "ip netns exec testns sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1"
+	run_cmd "$IP li set dev dummy2 down"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route6 ""
+		rc=$?
+	fi
+	log_test $rc 0 "Prefix route removed on link down"
+
+	# verify prefix route re-inserted with assigned metric
+	run_cmd "$IP li set dev dummy2 up"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route6 "2001:db8:104::/64 dev dummy2 proto kernel metric 259"
+		rc=$?
+	fi
+	log_test $rc 0 "Prefix route with metric on link up"
+
+	$IP li del dummy1
+	$IP li del dummy2
+	cleanup
+}
+
+# add route for a prefix, flushing any existing routes first
+# expected to be the first step of a test
+add_route()
+{
+	local pfx="$1"
+	local nh="$2"
+	local out
+
+	if [ "$VERBOSE" = "1" ]; then
+		echo
+		echo "    ##################################################"
+		echo
+	fi
+
+	run_cmd "$IP ro flush ${pfx}"
+	[ $? -ne 0 ] && exit 1
+
+	out=$($IP ro ls match ${pfx})
+	if [ -n "$out" ]; then
+		echo "Failed to flush routes for prefix used for tests."
+		exit 1
+	fi
+
+	run_cmd "$IP ro add ${pfx} ${nh}"
+	if [ $? -ne 0 ]; then
+		echo "Failed to add initial route for test."
+		exit 1
+	fi
+}
+
+# add initial route - used in replace route tests
+add_initial_route()
+{
+	add_route "172.16.104.0/24" "$1"
+}
+
+check_route()
+{
+	local pfx="172.16.104.0/24"
+	local expected="$1"
+	local out
+	local rc=0
+
+	out=$($IP ro ls match ${pfx})
+	[ "${out}" = "${expected}" ] && return 0
+
+	if [ -z "${out}" ]; then
+		if [ "$VERBOSE" = "1" ]; then
+			printf "\nNo route entry found\n"
+			printf "Expected:\n"
+			printf "    ${expected}\n"
+		fi
+		return 1
+	fi
+
+	# tricky way to convert output to 1-line without ip's
+	# messy '\'; this drops all extra white space
+	out=$(echo ${out})
+	if [ "${out}" != "${expected}" ]; then
+		rc=1
+		if [ "${VERBOSE}" = "1" ]; then
+			printf "    Unexpected route entry. Have:\n"
+			printf "        ${out}\n"
+			printf "    Expected:\n"
+			printf "        ${expected}\n\n"
+		fi
+	fi
+
+	return $rc
+}
+
+# assumption is that basic add of a single path route works
+# otherwise just adding an address on an interface is broken
+ipv4_rt_add()
+{
+	local rc
+
+	echo
+	echo "IPv4 route add / append tests"
+
+	# route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL
+	add_route "172.16.104.0/24" "via 172.16.101.2"
+	run_cmd "$IP ro add 172.16.104.0/24 via 172.16.103.2"
+	log_test $? 2 "Attempt to add duplicate route - gw"
+
+	# route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL
+	add_route "172.16.104.0/24" "via 172.16.101.2"
+	run_cmd "$IP ro add 172.16.104.0/24 dev veth3"
+	log_test $? 2 "Attempt to add duplicate route - dev only"
+
+	# route add same prefix - fails with EEXISTS b/c ip adds NLM_F_EXCL
+	add_route "172.16.104.0/24" "via 172.16.101.2"
+	run_cmd "$IP ro add unreachable 172.16.104.0/24"
+	log_test $? 2 "Attempt to add duplicate route - reject route"
+
+	# iproute2 prepend only sets NLM_F_CREATE
+	# - adds a new route; does NOT convert existing route to ECMP
+	add_route "172.16.104.0/24" "via 172.16.101.2"
+	run_cmd "$IP ro prepend 172.16.104.0/24 via 172.16.103.2"
+	check_route "172.16.104.0/24 via 172.16.103.2 dev veth3 172.16.104.0/24 via 172.16.101.2 dev veth1"
+	log_test $? 0 "Add new nexthop for existing prefix"
+
+	# route append with same prefix adds a new route
+	# - iproute2 sets NLM_F_CREATE | NLM_F_APPEND
+	add_route "172.16.104.0/24" "via 172.16.101.2"
+	run_cmd "$IP ro append 172.16.104.0/24 via 172.16.103.2"
+	check_route "172.16.104.0/24 via 172.16.101.2 dev veth1 172.16.104.0/24 via 172.16.103.2 dev veth3"
+	log_test $? 0 "Append nexthop to existing route - gw"
+
+	add_route "172.16.104.0/24" "via 172.16.101.2"
+	run_cmd "$IP ro append 172.16.104.0/24 dev veth3"
+	check_route "172.16.104.0/24 via 172.16.101.2 dev veth1 172.16.104.0/24 dev veth3 scope link"
+	log_test $? 0 "Append nexthop to existing route - dev only"
+
+	add_route "172.16.104.0/24" "via 172.16.101.2"
+	run_cmd "$IP ro append unreachable 172.16.104.0/24"
+	check_route "172.16.104.0/24 via 172.16.101.2 dev veth1 unreachable 172.16.104.0/24"
+	log_test $? 0 "Append nexthop to existing route - reject route"
+
+	run_cmd "$IP ro flush 172.16.104.0/24"
+	run_cmd "$IP ro add unreachable 172.16.104.0/24"
+	run_cmd "$IP ro append 172.16.104.0/24 via 172.16.103.2"
+	check_route "unreachable 172.16.104.0/24 172.16.104.0/24 via 172.16.103.2 dev veth3"
+	log_test $? 0 "Append nexthop to existing reject route - gw"
+
+	run_cmd "$IP ro flush 172.16.104.0/24"
+	run_cmd "$IP ro add unreachable 172.16.104.0/24"
+	run_cmd "$IP ro append 172.16.104.0/24 dev veth3"
+	check_route "unreachable 172.16.104.0/24 172.16.104.0/24 dev veth3 scope link"
+	log_test $? 0 "Append nexthop to existing reject route - dev only"
+
+	# insert mpath directly
+	add_route "172.16.104.0/24" "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+	check_route  "172.16.104.0/24 nexthop via 172.16.101.2 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1"
+	log_test $? 0 "add multipath route"
+
+	add_route "172.16.104.0/24" "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+	run_cmd "$IP ro add 172.16.104.0/24 nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+	log_test $? 2 "Attempt to add duplicate multipath route"
+
+	# insert of a second route without append but different metric
+	add_route "172.16.104.0/24" "via 172.16.101.2"
+	run_cmd "$IP ro add 172.16.104.0/24 via 172.16.103.2 metric 512"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		run_cmd "$IP ro add 172.16.104.0/24 via 172.16.103.3 metric 256"
+		rc=$?
+	fi
+	log_test $rc 0 "Route add with different metrics"
+
+	run_cmd "$IP ro del 172.16.104.0/24 metric 512"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route "172.16.104.0/24 via 172.16.101.2 dev veth1 172.16.104.0/24 via 172.16.103.3 dev veth3 metric 256"
+		rc=$?
+	fi
+	log_test $rc 0 "Route delete with metric"
+}
+
+ipv4_rt_replace_single()
+{
+	# single path with single path
+	#
+	add_initial_route "via 172.16.101.2"
+	run_cmd "$IP ro replace 172.16.104.0/24 via 172.16.103.2"
+	check_route "172.16.104.0/24 via 172.16.103.2 dev veth3"
+	log_test $? 0 "Single path with single path"
+
+	# single path with multipath
+	#
+	add_initial_route "nexthop via 172.16.101.2"
+	run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.101.3 nexthop via 172.16.103.2"
+	check_route "172.16.104.0/24 nexthop via 172.16.101.3 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1"
+	log_test $? 0 "Single path with multipath"
+
+	# single path with reject
+	#
+	add_initial_route "nexthop via 172.16.101.2"
+	run_cmd "$IP ro replace unreachable 172.16.104.0/24"
+	check_route "unreachable 172.16.104.0/24"
+	log_test $? 0 "Single path with reject route"
+
+	# single path with single path using MULTIPATH attribute
+	#
+	add_initial_route "via 172.16.101.2"
+	run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.103.2"
+	check_route "172.16.104.0/24 via 172.16.103.2 dev veth3"
+	log_test $? 0 "Single path with single path via multipath attribute"
+
+	# route replace fails - invalid nexthop
+	add_initial_route "via 172.16.101.2"
+	run_cmd "$IP ro replace 172.16.104.0/24 via 2001:db8:104::2"
+	if [ $? -eq 0 ]; then
+		# previous command is expected to fail so if it returns 0
+		# that means the test failed.
+		log_test 0 1 "Invalid nexthop"
+	else
+		check_route "172.16.104.0/24 via 172.16.101.2 dev veth1"
+		log_test $? 0 "Invalid nexthop"
+	fi
+
+	# replace non-existent route
+	# - note use of change versus replace since ip adds NLM_F_CREATE
+	#   for replace
+	add_initial_route "via 172.16.101.2"
+	run_cmd "$IP ro change 172.16.105.0/24 via 172.16.101.2"
+	log_test $? 2 "Single path - replace of non-existent route"
+}
+
+ipv4_rt_replace_mpath()
+{
+	# multipath with multipath
+	add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+	run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.101.3 nexthop via 172.16.103.3"
+	check_route  "172.16.104.0/24 nexthop via 172.16.101.3 dev veth1 weight 1 nexthop via 172.16.103.3 dev veth3 weight 1"
+	log_test $? 0 "Multipath with multipath"
+
+	# multipath with single
+	add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+	run_cmd "$IP ro replace 172.16.104.0/24 via 172.16.101.3"
+	check_route  "172.16.104.0/24 via 172.16.101.3 dev veth1"
+	log_test $? 0 "Multipath with single path"
+
+	# multipath with single
+	add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+	run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.101.3"
+	check_route "172.16.104.0/24 via 172.16.101.3 dev veth1"
+	log_test $? 0 "Multipath with single path via multipath attribute"
+
+	# multipath with reject
+	add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+	run_cmd "$IP ro replace unreachable 172.16.104.0/24"
+	check_route "unreachable 172.16.104.0/24"
+	log_test $? 0 "Multipath with reject route"
+
+	# route replace fails - invalid nexthop 1
+	add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+	run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.111.3 nexthop via 172.16.103.3"
+	check_route  "172.16.104.0/24 nexthop via 172.16.101.2 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1"
+	log_test $? 0 "Multipath - invalid first nexthop"
+
+	# route replace fails - invalid nexthop 2
+	add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+	run_cmd "$IP ro replace 172.16.104.0/24 nexthop via 172.16.101.3 nexthop via 172.16.113.3"
+	check_route  "172.16.104.0/24 nexthop via 172.16.101.2 dev veth1 weight 1 nexthop via 172.16.103.2 dev veth3 weight 1"
+	log_test $? 0 "Multipath - invalid second nexthop"
+
+	# multipath non-existent route
+	add_initial_route "nexthop via 172.16.101.2 nexthop via 172.16.103.2"
+	run_cmd "$IP ro change 172.16.105.0/24 nexthop via 172.16.101.3 nexthop via 172.16.103.3"
+	log_test $? 2 "Multipath - replace of non-existent route"
+}
+
+ipv4_rt_replace()
+{
+	echo
+	echo "IPv4 route replace tests"
+
+	ipv4_rt_replace_single
+	ipv4_rt_replace_mpath
+}
+
+ipv4_route_test()
+{
+	route_setup
+
+	ipv4_rt_add
+	ipv4_rt_replace
+
+	route_cleanup
+}
+
+ipv4_addr_metric_test()
+{
+	local rc
+
+	echo
+	echo "IPv4 prefix route tests"
+
+	ip_addr_metric_check || return 1
+
+	setup
+
+	set -e
+	$IP li add dummy1 type dummy
+	$IP li add dummy2 type dummy
+	$IP li set dummy1 up
+	$IP li set dummy2 up
+
+	# default entry is metric 256
+	run_cmd "$IP addr add dev dummy1 172.16.104.1/24"
+	run_cmd "$IP addr add dev dummy2 172.16.104.2/24"
+	set +e
+
+	check_route "172.16.104.0/24 dev dummy1 proto kernel scope link src 172.16.104.1 172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2"
+	log_test $? 0 "Default metric"
+
+	set -e
+	run_cmd "$IP addr flush dev dummy1"
+	run_cmd "$IP addr add dev dummy1 172.16.104.1/24 metric 257"
+	set +e
+
+	check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 172.16.104.0/24 dev dummy1 proto kernel scope link src 172.16.104.1 metric 257"
+	log_test $? 0 "User specified metric on first device"
+
+	set -e
+	run_cmd "$IP addr flush dev dummy2"
+	run_cmd "$IP addr add dev dummy2 172.16.104.2/24 metric 258"
+	set +e
+
+	check_route "172.16.104.0/24 dev dummy1 proto kernel scope link src 172.16.104.1 metric 257 172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 metric 258"
+	log_test $? 0 "User specified metric on second device"
+
+	run_cmd "$IP addr del dev dummy1 172.16.104.1/24 metric 257"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 metric 258"
+		rc=$?
+	fi
+	log_test $rc 0 "Delete of address on first device"
+
+	run_cmd "$IP addr change dev dummy2 172.16.104.2/24 metric 259"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 metric 259"
+		rc=$?
+	fi
+	log_test $rc 0 "Modify metric of address"
+
+	# verify prefix route removed on down
+	run_cmd "$IP li set dev dummy2 down"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route ""
+		rc=$?
+	fi
+	log_test $rc 0 "Prefix route removed on link down"
+
+	# verify prefix route re-inserted with assigned metric
+	run_cmd "$IP li set dev dummy2 up"
+	rc=$?
+	if [ $rc -eq 0 ]; then
+		check_route "172.16.104.0/24 dev dummy2 proto kernel scope link src 172.16.104.2 metric 259"
+		rc=$?
+	fi
+	log_test $rc 0 "Prefix route with metric on link up"
+
+	$IP li del dummy1
+	$IP li del dummy2
+	cleanup
+}
+
+################################################################################
+# usage
+
+usage()
+{
+	cat <<EOF
+usage: ${0##*/} OPTS
+
+        -t <test>   Test(s) to run (default: all)
+                    (options: $TESTS)
+        -p          Pause on fail
+        -P          Pause after each test before cleanup
+        -v          verbose mode (show commands and output)
+EOF
+}
+
+################################################################################
+# main
+
+while getopts :t:pPhv o
+do
+	case $o in
+		t) TESTS=$OPTARG;;
+		p) PAUSE_ON_FAIL=yes;;
+		P) PAUSE=yes;;
+		v) VERBOSE=$(($VERBOSE + 1));;
+		h) usage; exit 0;;
+		*) usage; exit 1;;
+	esac
+done
+
+PEER_CMD="ip netns exec ${PEER_NS}"
+
+# make sure we don't pause twice
+[ "${PAUSE}" = "yes" ] && PAUSE_ON_FAIL=no
+
 if [ "$(id -u)" -ne 0 ];then
 	echo "SKIP: Need root privileges"
 	exit $ksft_skip;
@@ -598,6 +1415,25 @@ fi
 # start clean
 cleanup &> /dev/null
 
-fib_test
+for t in $TESTS
+do
+	case $t in
+	fib_unreg_test|unregister)	fib_unreg_test;;
+	fib_down_test|down)		fib_down_test;;
+	fib_carrier_test|carrier)	fib_carrier_test;;
+	fib_nexthop_test|nexthop)	fib_nexthop_test;;
+	ipv6_route_test|ipv6_rt)	ipv6_route_test;;
+	ipv4_route_test|ipv4_rt)	ipv4_route_test;;
+	ipv6_addr_metric)		ipv6_addr_metric_test;;
+	ipv4_addr_metric)		ipv4_addr_metric_test;;
+
+	help) echo "Test names: $TESTS"; exit 0;;
+	esac
+done
+
+if [ "$TESTS" != "none" ]; then
+	printf "\nTests passed: %3d\n" ${nsuccess}
+	printf "Tests failed: %3d\n"   ${nfail}
+fi
 
 exit $ret
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
index 75d922438bc9..d8313d0438b7 100755
--- a/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_aware.sh
@@ -1,6 +1,7 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
+ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding"
 NUM_NETIFS=4
 CHECK_TC="yes"
 source lib.sh
@@ -75,14 +76,31 @@ cleanup()
 	vrf_cleanup
 }
 
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.2
+}
+
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:1::2
+}
+
+learning()
+{
+	learning_test "br0" $swp1 $h1 $h2
+}
+
+flooding()
+{
+	flood_test $swp2 $h1 $h2
+}
+
 trap cleanup EXIT
 
 setup_prepare
 setup_wait
 
-ping_test $h1 192.0.2.2
-ping6_test $h1 2001:db8:1::2
-learning_test "br0" $swp1 $h1 $h2
-flood_test $swp2 $h1 $h2
+tests_run
 
 exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
index 1cddf06f691d..c15c6c85c984 100755
--- a/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
+++ b/tools/testing/selftests/net/forwarding/bridge_vlan_unaware.sh
@@ -1,6 +1,7 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
+ALL_TESTS="ping_ipv4 ping_ipv6 learning flooding"
 NUM_NETIFS=4
 source lib.sh
 
@@ -73,14 +74,31 @@ cleanup()
 	vrf_cleanup
 }
 
+ping_ipv4()
+{
+	ping_test $h1 192.0.2.2
+}
+
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:1::2
+}
+
+learning()
+{
+	learning_test "br0" $swp1 $h1 $h2
+}
+
+flooding()
+{
+	flood_test $swp2 $h1 $h2
+}
+
 trap cleanup EXIT
 
 setup_prepare
 setup_wait
 
-ping_test $h1 192.0.2.2
-ping6_test $h1 2001:db8:1::2
-learning_test "br0" $swp1 $h1 $h2
-flood_test $swp2 $h1 $h2
+tests_run
 
 exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh
index 1ac6c62271f3..7b18a53aa556 100644
--- a/tools/testing/selftests/net/forwarding/lib.sh
+++ b/tools/testing/selftests/net/forwarding/lib.sh
@@ -321,6 +321,50 @@ simple_if_fini()
 	vrf_destroy $vrf_name
 }
 
+tunnel_create()
+{
+	local name=$1; shift
+	local type=$1; shift
+	local local=$1; shift
+	local remote=$1; shift
+
+	ip link add name $name type $type \
+	   local $local remote $remote "$@"
+	ip link set dev $name up
+}
+
+tunnel_destroy()
+{
+	local name=$1; shift
+
+	ip link del dev $name
+}
+
+vlan_create()
+{
+	local if_name=$1; shift
+	local vid=$1; shift
+	local vrf=$1; shift
+	local ips=("${@}")
+	local name=$if_name.$vid
+
+	ip link add name $name link $if_name type vlan id $vid
+	if [ "$vrf" != "" ]; then
+		ip link set dev $name master $vrf
+	fi
+	ip link set dev $name up
+	__addr_add_del $name add "${ips[@]}"
+}
+
+vlan_destroy()
+{
+	local if_name=$1; shift
+	local vid=$1; shift
+	local name=$if_name.$vid
+
+	ip link del dev $name
+}
+
 master_name_get()
 {
 	local if_name=$1
@@ -335,6 +379,15 @@ link_stats_tx_packets_get()
        ip -j -s link show dev $if_name | jq '.[]["stats64"]["tx"]["packets"]'
 }
 
+tc_rule_stats_get()
+{
+	local dev=$1; shift
+	local pref=$1; shift
+
+	tc -j -s filter show dev $dev ingress pref $pref |
+	jq '.[1].options.actions[].stats.packets'
+}
+
 mac_get()
 {
 	local if_name=$1
@@ -353,19 +406,33 @@ bridge_ageing_time_get()
 	echo $((ageing_time / 100))
 }
 
-forwarding_enable()
+declare -A SYSCTL_ORIG
+sysctl_set()
+{
+	local key=$1; shift
+	local value=$1; shift
+
+	SYSCTL_ORIG[$key]=$(sysctl -n $key)
+	sysctl -qw $key=$value
+}
+
+sysctl_restore()
 {
-       ipv4_fwd=$(sysctl -n net.ipv4.conf.all.forwarding)
-       ipv6_fwd=$(sysctl -n net.ipv6.conf.all.forwarding)
+	local key=$1; shift
 
-       sysctl -q -w net.ipv4.conf.all.forwarding=1
-       sysctl -q -w net.ipv6.conf.all.forwarding=1
+	sysctl -qw $key=${SYSCTL_ORIG["$key"]}
+}
+
+forwarding_enable()
+{
+	sysctl_set net.ipv4.conf.all.forwarding 1
+	sysctl_set net.ipv6.conf.all.forwarding 1
 }
 
 forwarding_restore()
 {
-       sysctl -q -w net.ipv6.conf.all.forwarding=$ipv6_fwd
-       sysctl -q -w net.ipv4.conf.all.forwarding=$ipv4_fwd
+	sysctl_restore net.ipv6.conf.all.forwarding
+	sysctl_restore net.ipv4.conf.all.forwarding
 }
 
 tc_offload_check()
@@ -381,6 +448,115 @@ tc_offload_check()
 	return 0
 }
 
+trap_install()
+{
+	local dev=$1; shift
+	local direction=$1; shift
+
+	# For slow-path testing, we need to install a trap to get to
+	# slow path the packets that would otherwise be switched in HW.
+	tc filter add dev $dev $direction pref 1 flower skip_sw action trap
+}
+
+trap_uninstall()
+{
+	local dev=$1; shift
+	local direction=$1; shift
+
+	tc filter del dev $dev $direction pref 1 flower skip_sw
+}
+
+slow_path_trap_install()
+{
+	if [ "${tcflags/skip_hw}" != "$tcflags" ]; then
+		trap_install "$@"
+	fi
+}
+
+slow_path_trap_uninstall()
+{
+	if [ "${tcflags/skip_hw}" != "$tcflags" ]; then
+		trap_uninstall "$@"
+	fi
+}
+
+__icmp_capture_add_del()
+{
+	local add_del=$1; shift
+	local pref=$1; shift
+	local vsuf=$1; shift
+	local tundev=$1; shift
+	local filter=$1; shift
+
+	tc filter $add_del dev "$tundev" ingress \
+	   proto ip$vsuf pref $pref \
+	   flower ip_proto icmp$vsuf $filter \
+	   action pass
+}
+
+icmp_capture_install()
+{
+	__icmp_capture_add_del add 100 "" "$@"
+}
+
+icmp_capture_uninstall()
+{
+	__icmp_capture_add_del del 100 "" "$@"
+}
+
+icmp6_capture_install()
+{
+	__icmp_capture_add_del add 100 v6 "$@"
+}
+
+icmp6_capture_uninstall()
+{
+	__icmp_capture_add_del del 100 v6 "$@"
+}
+
+__vlan_capture_add_del()
+{
+	local add_del=$1; shift
+	local pref=$1; shift
+	local dev=$1; shift
+	local filter=$1; shift
+
+	tc filter $add_del dev "$dev" ingress \
+	   proto 802.1q pref $pref \
+	   flower $filter \
+	   action pass
+}
+
+vlan_capture_install()
+{
+	__vlan_capture_add_del add 100 "$@"
+}
+
+vlan_capture_uninstall()
+{
+	__vlan_capture_add_del del 100 "$@"
+}
+
+matchall_sink_create()
+{
+	local dev=$1; shift
+
+	tc qdisc add dev $dev clsact
+	tc filter add dev $dev ingress \
+	   pref 10000 \
+	   matchall \
+	   action drop
+}
+
+tests_run()
+{
+	local current_test
+
+	for current_test in ${TESTS:-$ALL_TESTS}; do
+		$current_test
+	done
+}
+
 ##############################################################################
 # Tests
 
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre.sh b/tools/testing/selftests/net/forwarding/mirror_gre.sh
new file mode 100755
index 000000000000..e6fd7a18c655
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre.sh
@@ -0,0 +1,159 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test for "tc action mirred egress mirror" when the device to mirror to is a
+# gretap or ip6gretap netdevice. Expect that the packets come out encapsulated,
+# and another gretap / ip6gretap netdevice is then capable of decapsulating the
+# traffic. Test that the payload is what is expected (ICMP ping request or
+# reply, depending on test).
+
+ALL_TESTS="
+	test_gretap
+	test_ip6gretap
+	test_gretap_mac
+	test_ip6gretap_mac
+	test_two_spans
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	vrf_prepare
+	mirror_gre_topo_create
+
+	ip address add dev $swp3 192.0.2.129/28
+	ip address add dev $h3 192.0.2.130/28
+
+	ip address add dev $swp3 2001:db8:2::1/64
+	ip address add dev $h3 2001:db8:2::2/64
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ip address del dev $h3 2001:db8:2::2/64
+	ip address del dev $swp3 2001:db8:2::1/64
+
+	ip address del dev $h3 192.0.2.130/28
+	ip address del dev $swp3 192.0.2.129/28
+
+	mirror_gre_topo_destroy
+	vrf_cleanup
+}
+
+test_span_gre_mac()
+{
+	local tundev=$1; shift
+	local direction=$1; shift
+	local prot=$1; shift
+	local what=$1; shift
+
+	local swp3mac=$(mac_get $swp3)
+	local h3mac=$(mac_get $h3)
+
+	RET=0
+
+	mirror_install $swp1 $direction $tundev "matchall $tcflags"
+	tc filter add dev $h3 ingress pref 77 prot $prot \
+		flower ip_proto 0x2f src_mac $swp3mac dst_mac $h3mac \
+		action pass
+
+	mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 10
+
+	tc filter del dev $h3 ingress pref 77
+	mirror_uninstall $swp1 $direction
+
+	log_test "$direction $what: envelope MAC ($tcflags)"
+}
+
+test_two_spans()
+{
+	RET=0
+
+	mirror_install $swp1 ingress gt4 "matchall $tcflags"
+	mirror_install $swp1 egress gt6 "matchall $tcflags"
+	quick_test_span_gre_dir gt4 ingress
+	quick_test_span_gre_dir gt6 egress
+
+	mirror_uninstall $swp1 ingress
+	fail_test_span_gre_dir gt4 ingress
+	quick_test_span_gre_dir gt6 egress
+
+	mirror_install $swp1 ingress gt4 "matchall $tcflags"
+	mirror_uninstall $swp1 egress
+	quick_test_span_gre_dir gt4 ingress
+	fail_test_span_gre_dir gt6 egress
+
+	mirror_uninstall $swp1 ingress
+	log_test "two simultaneously configured mirrors ($tcflags)"
+}
+
+test_gretap()
+{
+	full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap"
+	full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap"
+}
+
+test_ip6gretap()
+{
+	full_test_span_gre_dir gt6 ingress 8 0 "mirror to ip6gretap"
+	full_test_span_gre_dir gt6 egress 0 8 "mirror to ip6gretap"
+}
+
+test_gretap_mac()
+{
+	test_span_gre_mac gt4 ingress ip "mirror to gretap"
+	test_span_gre_mac gt4 egress ip "mirror to gretap"
+}
+
+test_ip6gretap_mac()
+{
+	test_span_gre_mac gt6 ingress ipv6 "mirror to ip6gretap"
+	test_span_gre_mac gt6 egress ipv6 "mirror to ip6gretap"
+}
+
+test_all()
+{
+	slow_path_trap_install $swp1 ingress
+	slow_path_trap_install $swp1 egress
+
+	tests_run
+
+	slow_path_trap_uninstall $swp1 egress
+	slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+	echo "WARN: Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh
new file mode 100755
index 000000000000..360ca133bead
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bound.sh
@@ -0,0 +1,226 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+#   +---------------------+                             +---------------------+
+#   | H1                  |                             |                  H2 |
+#   |     + $h1           |                             |           $h2 +     |
+#   |     | 192.0.2.1/28  |                             |  192.0.2.2/28 |     |
+#   +-----|---------------+                             +---------------|-----+
+#         |                                                             |
+#   +-----|-------------------------------------------------------------|-----+
+#   | SW  o--> mirror                                                   |     |
+#   | +---|-------------------------------------------------------------|---+ |
+#   | |   + $swp1                    BR                           $swp2 +   | |
+#   | +---------------------------------------------------------------------+ |
+#   |                                                                         |
+#   | +---------------------------------------------------------------------+ |
+#   | | OL                      + gt6 (ip6gretap)      + gt4 (gretap)       | |
+#   | |                         : loc=2001:db8:2::1    : loc=192.0.2.129    | |
+#   | |                         : rem=2001:db8:2::2    : rem=192.0.2.130    | |
+#   | |                         : ttl=100              : ttl=100            | |
+#   | |                         : tos=inherit          : tos=inherit        | |
+#   | +-------------------------:--|-------------------:--|-----------------+ |
+#   |                           :  |                   :  |                   |
+#   | +-------------------------:--|-------------------:--|-----------------+ |
+#   | | UL                      :  |,---------------------'                 | |
+#   | |   + $swp3               :  ||                  :                    | |
+#   | |   | 192.0.2.129/28      :  vv                  :                    | |
+#   | |   | 2001:db8:2::1/64    :  + ul (dummy)        :                    | |
+#   | +---|---------------------:----------------------:--------------------+ |
+#   +-----|---------------------:----------------------:----------------------+
+#         |                     :                      :
+#   +-----|---------------------:----------------------:----------------------+
+#   | H3  + $h3                 + h3-gt6 (ip6gretap)   + h3-gt4 (gretap)      |
+#   |       192.0.2.130/28        loc=2001:db8:2::2      loc=192.0.2.130      |
+#   |       2001:db8:2::2/64      rem=2001:db8:2::1      rem=192.0.2.129      |
+#   |                             ttl=100                ttl=100              |
+#   |                             tos=inherit            tos=inherit          |
+#   |                                                                         |
+#   +-------------------------------------------------------------------------+
+#
+# This tests mirroring to gretap and ip6gretap configured in an overlay /
+# underlay manner, i.e. with a bound dummy device that marks underlay VRF where
+# the encapsulated packed should be routed.
+
+ALL_TESTS="
+	test_gretap
+	test_ip6gretap
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/28
+}
+
+h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/28
+}
+
+h2_create()
+{
+	simple_if_init $h2 192.0.2.2/28
+}
+
+h2_destroy()
+{
+	simple_if_fini $h2 192.0.2.2/28
+}
+
+h3_create()
+{
+	simple_if_init $h3 192.0.2.130/28 2001:db8:2::2/64
+
+	tunnel_create h3-gt4 gretap 192.0.2.130 192.0.2.129
+	ip link set h3-gt4 vrf v$h3
+	matchall_sink_create h3-gt4
+
+	tunnel_create h3-gt6 ip6gretap 2001:db8:2::2 2001:db8:2::1
+	ip link set h3-gt6 vrf v$h3
+	matchall_sink_create h3-gt6
+}
+
+h3_destroy()
+{
+	tunnel_destroy h3-gt6
+	tunnel_destroy h3-gt4
+
+	simple_if_fini $h3 192.0.2.130/28 2001:db8:2::2/64
+}
+
+switch_create()
+{
+	# Bridge between H1 and H2.
+
+	ip link add name br1 type bridge vlan_filtering 1
+	ip link set dev br1 up
+
+	ip link set dev $swp1 master br1
+	ip link set dev $swp1 up
+
+	ip link set dev $swp2 master br1
+	ip link set dev $swp2 up
+
+	tc qdisc add dev $swp1 clsact
+
+	# Underlay.
+
+	simple_if_init $swp3 192.0.2.129/28 2001:db8:2::1/64
+
+	ip link add name ul type dummy
+	ip link set dev ul master v$swp3
+	ip link set dev ul up
+
+	# Overlay.
+
+	vrf_create vrf-ol
+	ip link set dev vrf-ol up
+
+	tunnel_create gt4 gretap 192.0.2.129 192.0.2.130 \
+		      ttl 100 tos inherit dev ul
+	ip link set dev gt4 master vrf-ol
+	ip link set dev gt4 up
+
+	tunnel_create gt6 ip6gretap 2001:db8:2::1 2001:db8:2::2 \
+		      ttl 100 tos inherit dev ul allow-localremote
+	ip link set dev gt6 master vrf-ol
+	ip link set dev gt6 up
+}
+
+switch_destroy()
+{
+	vrf_destroy vrf-ol
+
+	tunnel_destroy gt6
+	tunnel_destroy gt4
+
+	simple_if_fini $swp3 192.0.2.129/28 2001:db8:2::1/64
+
+	ip link del dev ul
+
+	tc qdisc del dev $swp1 clsact
+
+	ip link set dev $swp1 down
+	ip link set dev $swp2 down
+	ip link del dev br1
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	vrf_prepare
+
+	h1_create
+	h2_create
+	h3_create
+
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+
+	h3_destroy
+	h2_destroy
+	h1_destroy
+
+	vrf_cleanup
+}
+
+test_gretap()
+{
+	full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap w/ UL"
+	full_test_span_gre_dir gt4 egress  0 8 "mirror to gretap w/ UL"
+}
+
+test_ip6gretap()
+{
+	full_test_span_gre_dir gt6 ingress 8 0 "mirror to ip6gretap w/ UL"
+	full_test_span_gre_dir gt6 egress  0 8 "mirror to ip6gretap w/ UL"
+}
+
+test_all()
+{
+	RET=0
+
+	slow_path_trap_install $swp1 ingress
+	slow_path_trap_install $swp1 egress
+
+	tests_run
+
+	slow_path_trap_uninstall $swp1 egress
+	slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+	echo "WARN: Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh
new file mode 100755
index 000000000000..3bb4c2ba7b14
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_bridge_1d_vlan.sh
@@ -0,0 +1,121 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test for "tc action mirred egress mirror" when the underlay route points at a
+# bridge device without vlan filtering (802.1d). The device attached to that
+# bridge is a VLAN.
+
+ALL_TESTS="
+	test_gretap
+	test_ip6gretap
+	test_gretap_stp
+	test_ip6gretap_stp
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	vrf_prepare
+	mirror_gre_topo_create
+
+	ip link add name br2 type bridge vlan_filtering 0
+	ip link set dev br2 up
+
+	vlan_create $swp3 555
+
+	ip link set dev $swp3.555 master br2
+	ip route add 192.0.2.130/32 dev br2
+	ip -6 route add 2001:db8:2::2/128 dev br2
+
+	ip address add dev br2 192.0.2.129/32
+	ip address add dev br2 2001:db8:2::1/128
+
+	vlan_create $h3 555 v$h3 192.0.2.130/28 2001:db8:2::2/64
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	vlan_destroy $h3 555
+	ip link del dev br2
+	vlan_destroy $swp3 555
+
+	mirror_gre_topo_destroy
+	vrf_cleanup
+}
+
+test_vlan_match()
+{
+	local tundev=$1; shift
+	local vlan_match=$1; shift
+	local what=$1; shift
+
+	full_test_span_gre_dir_vlan $tundev ingress "$vlan_match" 8 0 "$what"
+	full_test_span_gre_dir_vlan $tundev egress "$vlan_match" 0 8 "$what"
+}
+
+test_gretap()
+{
+	test_vlan_match gt4 'vlan_id 555 vlan_ethtype ip' "mirror to gretap"
+}
+
+test_ip6gretap()
+{
+	test_vlan_match gt6 'vlan_id 555 vlan_ethtype ipv6' "mirror to ip6gretap"
+}
+
+test_gretap_stp()
+{
+	full_test_span_gre_stp gt4 $swp3.555 "mirror to gretap"
+}
+
+test_ip6gretap_stp()
+{
+	full_test_span_gre_stp gt6 $swp3.555 "mirror to ip6gretap"
+}
+
+test_all()
+{
+	slow_path_trap_install $swp1 ingress
+	slow_path_trap_install $swp1 egress
+
+	tests_run
+
+	slow_path_trap_uninstall $swp1 egress
+	slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+	echo "WARN: Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh
new file mode 100755
index 000000000000..aa29d46186a8
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_changes.sh
@@ -0,0 +1,278 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test how mirrors to gretap and ip6gretap react to changes to relevant
+# configuration.
+
+ALL_TESTS="
+	test_ttl
+	test_tun_up
+	test_egress_up
+	test_remote_ip
+	test_tun_del
+	test_route_del
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	vrf_prepare
+	mirror_gre_topo_create
+
+	# This test downs $swp3, which deletes the configured IPv6 address
+	# unless this sysctl is set.
+	sysctl_set net.ipv6.conf.$swp3.keep_addr_on_down 1
+
+	ip address add dev $swp3 192.0.2.129/28
+	ip address add dev $h3 192.0.2.130/28
+
+	ip address add dev $swp3 2001:db8:2::1/64
+	ip address add dev $h3 2001:db8:2::2/64
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ip address del dev $h3 2001:db8:2::2/64
+	ip address del dev $swp3 2001:db8:2::1/64
+
+	ip address del dev $h3 192.0.2.130/28
+	ip address del dev $swp3 192.0.2.129/28
+
+	sysctl_restore net.ipv6.conf.$swp3.keep_addr_on_down
+
+	mirror_gre_topo_destroy
+	vrf_cleanup
+}
+
+test_span_gre_ttl()
+{
+	local tundev=$1; shift
+	local type=$1; shift
+	local prot=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	mirror_install $swp1 ingress $tundev "matchall $tcflags"
+	tc filter add dev $h3 ingress pref 77 prot $prot \
+		flower ip_ttl 50 action pass
+
+	mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 0
+
+	ip link set dev $tundev type $type ttl 50
+	mirror_test v$h1 192.0.2.1 192.0.2.2 $h3 77 10
+
+	ip link set dev $tundev type $type ttl 100
+	tc filter del dev $h3 ingress pref 77
+	mirror_uninstall $swp1 ingress
+
+	log_test "$what: TTL change ($tcflags)"
+}
+
+test_span_gre_tun_up()
+{
+	local tundev=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	ip link set dev $tundev down
+	mirror_install $swp1 ingress $tundev "matchall $tcflags"
+	fail_test_span_gre_dir $tundev ingress
+
+	ip link set dev $tundev up
+
+	quick_test_span_gre_dir $tundev ingress
+	mirror_uninstall $swp1 ingress
+
+	log_test "$what: tunnel down/up ($tcflags)"
+}
+
+test_span_gre_egress_up()
+{
+	local tundev=$1; shift
+	local remote_ip=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	ip link set dev $swp3 down
+	mirror_install $swp1 ingress $tundev "matchall $tcflags"
+	fail_test_span_gre_dir $tundev ingress
+
+	# After setting the device up, wait for neighbor to get resolved so that
+	# we can expect mirroring to work.
+	ip link set dev $swp3 up
+	while true; do
+		ip neigh sh dev $swp3 $remote_ip nud reachable |
+		    grep -q ^
+		if [[ $? -ne 0 ]]; then
+			sleep 1
+		else
+			break
+		fi
+	done
+
+	quick_test_span_gre_dir $tundev ingress
+	mirror_uninstall $swp1 ingress
+
+	log_test "$what: egress down/up ($tcflags)"
+}
+
+test_span_gre_remote_ip()
+{
+	local tundev=$1; shift
+	local type=$1; shift
+	local correct_ip=$1; shift
+	local wrong_ip=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	ip link set dev $tundev type $type remote $wrong_ip
+	mirror_install $swp1 ingress $tundev "matchall $tcflags"
+	fail_test_span_gre_dir $tundev ingress
+
+	ip link set dev $tundev type $type remote $correct_ip
+	quick_test_span_gre_dir $tundev ingress
+	mirror_uninstall $swp1 ingress
+
+	log_test "$what: remote address change ($tcflags)"
+}
+
+test_span_gre_tun_del()
+{
+	local tundev=$1; shift
+	local type=$1; shift
+	local flags=$1; shift
+	local local_ip=$1; shift
+	local remote_ip=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	mirror_install $swp1 ingress $tundev "matchall $tcflags"
+	quick_test_span_gre_dir $tundev ingress
+	ip link del dev $tundev
+	fail_test_span_gre_dir $tundev ingress
+
+	tunnel_create $tundev $type $local_ip $remote_ip \
+		      ttl 100 tos inherit $flags
+
+	# Recreating the tunnel doesn't reestablish mirroring, so reinstall it
+	# and verify it works for the follow-up tests.
+	mirror_uninstall $swp1 ingress
+	mirror_install $swp1 ingress $tundev "matchall $tcflags"
+	quick_test_span_gre_dir $tundev ingress
+	mirror_uninstall $swp1 ingress
+
+	log_test "$what: tunnel deleted ($tcflags)"
+}
+
+test_span_gre_route_del()
+{
+	local tundev=$1; shift
+	local edev=$1; shift
+	local route=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	mirror_install $swp1 ingress $tundev "matchall $tcflags"
+	quick_test_span_gre_dir $tundev ingress
+
+	ip route del $route dev $edev
+	fail_test_span_gre_dir $tundev ingress
+
+	ip route add $route dev $edev
+	quick_test_span_gre_dir $tundev ingress
+
+	mirror_uninstall $swp1 ingress
+
+	log_test "$what: underlay route removal ($tcflags)"
+}
+
+test_ttl()
+{
+	test_span_gre_ttl gt4 gretap ip "mirror to gretap"
+	test_span_gre_ttl gt6 ip6gretap ipv6 "mirror to ip6gretap"
+}
+
+test_tun_up()
+{
+	test_span_gre_tun_up gt4 "mirror to gretap"
+	test_span_gre_tun_up gt6 "mirror to ip6gretap"
+}
+
+test_egress_up()
+{
+	test_span_gre_egress_up gt4 192.0.2.130 "mirror to gretap"
+	test_span_gre_egress_up gt6 2001:db8:2::2 "mirror to ip6gretap"
+}
+
+test_remote_ip()
+{
+	test_span_gre_remote_ip gt4 gretap 192.0.2.130 192.0.2.132 "mirror to gretap"
+	test_span_gre_remote_ip gt6 ip6gretap 2001:db8:2::2 2001:db8:2::4 "mirror to ip6gretap"
+}
+
+test_tun_del()
+{
+	test_span_gre_tun_del gt4 gretap "" \
+			      192.0.2.129 192.0.2.130 "mirror to gretap"
+	test_span_gre_tun_del gt6 ip6gretap allow-localremote \
+			      2001:db8:2::1 2001:db8:2::2 "mirror to ip6gretap"
+}
+
+test_route_del()
+{
+	test_span_gre_route_del gt4 $swp3 192.0.2.128/28 "mirror to gretap"
+	test_span_gre_route_del gt6 $swp3 2001:db8:2::/64 "mirror to ip6gretap"
+}
+
+test_all()
+{
+	slow_path_trap_install $swp1 ingress
+	slow_path_trap_install $swp1 egress
+
+	tests_run
+
+	slow_path_trap_uninstall $swp1 egress
+	slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+	echo "WARN: Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh b/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh
new file mode 100755
index 000000000000..12914f40612d
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_flower.sh
@@ -0,0 +1,137 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# This tests flower-triggered mirroring to gretap and ip6gretap netdevices. The
+# interfaces on H1 and H2 have two addresses each. Flower match on one of the
+# addresses is configured with mirror action. It is expected that when pinging
+# this address, mirroring takes place, whereas when pinging the other one,
+# there's no mirroring.
+
+ALL_TESTS="
+	test_gretap
+	test_ip6gretap
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	vrf_prepare
+	mirror_gre_topo_create
+
+	ip address add dev $swp3 192.0.2.129/28
+	ip address add dev $h3 192.0.2.130/28
+
+	ip address add dev $swp3 2001:db8:2::1/64
+	ip address add dev $h3 2001:db8:2::2/64
+
+	ip address add dev $h1 192.0.2.3/28
+	ip address add dev $h2 192.0.2.4/28
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ip address del dev $h2 192.0.2.4/28
+	ip address del dev $h1 192.0.2.3/28
+
+	ip address del dev $h3 2001:db8:2::2/64
+	ip address del dev $swp3 2001:db8:2::1/64
+
+	ip address del dev $h3 192.0.2.130/28
+	ip address del dev $swp3 192.0.2.129/28
+
+	mirror_gre_topo_destroy
+	vrf_cleanup
+}
+
+test_span_gre_dir_acl()
+{
+	test_span_gre_dir_ips "$@" 192.0.2.3 192.0.2.4
+}
+
+fail_test_span_gre_dir_acl()
+{
+	fail_test_span_gre_dir_ips "$@" 192.0.2.3 192.0.2.4
+}
+
+full_test_span_gre_dir_acl()
+{
+	local tundev=$1; shift
+	local direction=$1; shift
+	local forward_type=$1; shift
+	local backward_type=$1; shift
+	local match_dip=$1; shift
+	local what=$1; shift
+
+	mirror_install $swp1 $direction $tundev \
+		       "protocol ip flower $tcflags dst_ip $match_dip"
+	fail_test_span_gre_dir $tundev $direction
+	test_span_gre_dir_acl "$tundev" "$direction" \
+			  "$forward_type" "$backward_type"
+	mirror_uninstall $swp1 $direction
+
+	# Test lack of mirroring after ACL mirror is uninstalled.
+	fail_test_span_gre_dir_acl "$tundev" "$direction"
+
+	log_test "$direction $what ($tcflags)"
+}
+
+test_gretap()
+{
+	full_test_span_gre_dir_acl gt4 ingress 8 0 192.0.2.4 "ACL mirror to gretap"
+	full_test_span_gre_dir_acl gt4 egress 0 8 192.0.2.3 "ACL mirror to gretap"
+}
+
+test_ip6gretap()
+{
+	full_test_span_gre_dir_acl gt6 ingress 8 0 192.0.2.4 "ACL mirror to ip6gretap"
+	full_test_span_gre_dir_acl gt6 egress 0 8 192.0.2.3 "ACL mirror to ip6gretap"
+}
+
+test_all()
+{
+	RET=0
+
+	slow_path_trap_install $swp1 ingress
+	slow_path_trap_install $swp1 egress
+
+	tests_run
+
+	slow_path_trap_uninstall $swp1 egress
+	slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+	echo "WARN: Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh
new file mode 100644
index 000000000000..619b469365be
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh
@@ -0,0 +1,130 @@
+# SPDX-License-Identifier: GPL-2.0
+
+source mirror_lib.sh
+
+quick_test_span_gre_dir_ips()
+{
+	local tundev=$1; shift
+
+	do_test_span_dir_ips 10 h3-$tundev "$@"
+}
+
+fail_test_span_gre_dir_ips()
+{
+	local tundev=$1; shift
+
+	do_test_span_dir_ips 0 h3-$tundev "$@"
+}
+
+test_span_gre_dir_ips()
+{
+	local tundev=$1; shift
+
+	test_span_dir_ips h3-$tundev "$@"
+}
+
+full_test_span_gre_dir_ips()
+{
+	local tundev=$1; shift
+	local direction=$1; shift
+	local forward_type=$1; shift
+	local backward_type=$1; shift
+	local what=$1; shift
+	local ip1=$1; shift
+	local ip2=$1; shift
+
+	RET=0
+
+	mirror_install $swp1 $direction $tundev "matchall $tcflags"
+	test_span_dir_ips "h3-$tundev" "$direction" "$forward_type" \
+			  "$backward_type" "$ip1" "$ip2"
+	mirror_uninstall $swp1 $direction
+
+	log_test "$direction $what ($tcflags)"
+}
+
+full_test_span_gre_dir_vlan_ips()
+{
+	local tundev=$1; shift
+	local direction=$1; shift
+	local vlan_match=$1; shift
+	local forward_type=$1; shift
+	local backward_type=$1; shift
+	local what=$1; shift
+	local ip1=$1; shift
+	local ip2=$1; shift
+
+	RET=0
+
+	mirror_install $swp1 $direction $tundev "matchall $tcflags"
+
+	test_span_dir_ips "h3-$tundev" "$direction" "$forward_type" \
+			  "$backward_type" "$ip1" "$ip2"
+
+	tc filter add dev $h3 ingress pref 77 prot 802.1q \
+		flower $vlan_match ip_proto 0x2f \
+		action pass
+	mirror_test v$h1 $ip1 $ip2 $h3 77 10
+	tc filter del dev $h3 ingress pref 77
+
+	mirror_uninstall $swp1 $direction
+
+	log_test "$direction $what ($tcflags)"
+}
+
+quick_test_span_gre_dir()
+{
+	quick_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+fail_test_span_gre_dir()
+{
+	fail_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+test_span_gre_dir()
+{
+	test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+full_test_span_gre_dir()
+{
+	full_test_span_gre_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+full_test_span_gre_dir_vlan()
+{
+	full_test_span_gre_dir_vlan_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+full_test_span_gre_stp_ips()
+{
+	local tundev=$1; shift
+	local nbpdev=$1; shift
+	local what=$1; shift
+	local ip1=$1; shift
+	local ip2=$1; shift
+	local h3mac=$(mac_get $h3)
+
+	RET=0
+
+	mirror_install $swp1 ingress $tundev "matchall $tcflags"
+	quick_test_span_gre_dir_ips $tundev ingress $ip1 $ip2
+
+	bridge link set dev $nbpdev state disabled
+	sleep 1
+	fail_test_span_gre_dir_ips $tundev ingress $ip1 $ip2
+
+	bridge link set dev $nbpdev state forwarding
+	sleep 1
+	quick_test_span_gre_dir_ips $tundev ingress $ip1 $ip2
+
+	mirror_uninstall $swp1 ingress
+
+	log_test "$what: STP state ($tcflags)"
+}
+
+full_test_span_gre_stp()
+{
+	full_test_span_gre_stp_ips "$@" 192.0.2.1 192.0.2.2
+}
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh b/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh
new file mode 100755
index 000000000000..fc0508e40fca
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_neigh.sh
@@ -0,0 +1,115 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test for mirroring to gretap and ip6gretap, such that the neighbor entry for
+# the tunnel remote address has invalid address at the time that the mirroring
+# is set up. Later on, the neighbor is deleted and it is expected to be
+# reinitialized using the usual ARP process, and the mirroring offload updated.
+
+ALL_TESTS="
+	test_gretap
+	test_ip6gretap
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	vrf_prepare
+	mirror_gre_topo_create
+
+	ip address add dev $swp3 192.0.2.129/28
+	ip address add dev $h3 192.0.2.130/28
+
+	ip address add dev $swp3 2001:db8:2::1/64
+	ip address add dev $h3 2001:db8:2::2/64
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ip address del dev $h3 2001:db8:2::2/64
+	ip address del dev $swp3 2001:db8:2::1/64
+
+	ip address del dev $h3 192.0.2.130/28
+	ip address del dev $swp3 192.0.2.129/28
+
+	mirror_gre_topo_destroy
+	vrf_cleanup
+}
+
+test_span_gre_neigh()
+{
+	local addr=$1; shift
+	local tundev=$1; shift
+	local direction=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	ip neigh replace dev $swp3 $addr lladdr 00:11:22:33:44:55
+	mirror_install $swp1 $direction $tundev "matchall $tcflags"
+	fail_test_span_gre_dir $tundev ingress
+	ip neigh del dev $swp3 $addr
+	quick_test_span_gre_dir $tundev ingress
+	mirror_uninstall $swp1 $direction
+
+	log_test "$direction $what: neighbor change ($tcflags)"
+}
+
+test_gretap()
+{
+	test_span_gre_neigh 192.0.2.130 gt4 ingress "mirror to gretap"
+	test_span_gre_neigh 192.0.2.130 gt4 egress "mirror to gretap"
+}
+
+test_ip6gretap()
+{
+	test_span_gre_neigh 2001:db8:2::2 gt6 ingress "mirror to ip6gretap"
+	test_span_gre_neigh 2001:db8:2::2 gt6 egress "mirror to ip6gretap"
+}
+
+test_all()
+{
+	slow_path_trap_install $swp1 ingress
+	slow_path_trap_install $swp1 egress
+
+	tests_run
+
+	slow_path_trap_uninstall $swp1 egress
+	slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+	echo "WARN: Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh b/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh
new file mode 100755
index 000000000000..8fa681eb90e7
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_nh.sh
@@ -0,0 +1,127 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test that gretap and ip6gretap mirroring works when the other tunnel endpoint
+# is reachable through a next-hop route (as opposed to directly-attached route).
+
+ALL_TESTS="
+	test_gretap
+	test_ip6gretap
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	sysctl_set net.ipv4.conf.all.rp_filter 0
+	sysctl_set net.ipv4.conf.$h3.rp_filter 0
+
+	vrf_prepare
+	mirror_gre_topo_create
+
+	ip address add dev $swp3 192.0.2.161/28
+	ip address add dev $h3 192.0.2.162/28
+	ip address add dev gt4 192.0.2.129/32
+	ip address add dev h3-gt4 192.0.2.130/32
+
+	# IPv6 route can't be added after address. Such routes are rejected due
+	# to the gateway address having been configured on the local system. It
+	# works the other way around though.
+	ip address add dev $swp3 2001:db8:4::1/64
+	ip -6 route add 2001:db8:2::2/128 via 2001:db8:4::2
+	ip address add dev $h3 2001:db8:4::2/64
+	ip address add dev gt6 2001:db8:2::1
+	ip address add dev h3-gt6 2001:db8:2::2
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ip -6 route del 2001:db8:2::2/128 via 2001:db8:4::2
+	ip address del dev $h3 2001:db8:4::2/64
+	ip address del dev $swp3 2001:db8:4::1/64
+
+	ip address del dev $h3 192.0.2.162/28
+	ip address del dev $swp3 192.0.2.161/28
+
+	mirror_gre_topo_destroy
+	vrf_cleanup
+
+	sysctl_restore net.ipv4.conf.$h3.rp_filter
+	sysctl_restore net.ipv4.conf.all.rp_filter
+}
+
+test_gretap()
+{
+	RET=0
+	mirror_install $swp1 ingress gt4 "matchall $tcflags"
+
+	# For IPv4, test that there's no mirroring without the route directing
+	# the traffic to tunnel remote address. Then add it and test that
+	# mirroring starts. For IPv6 we can't test this due to the limitation
+	# that routes for locally-specified IPv6 addresses can't be added.
+	fail_test_span_gre_dir gt4 ingress
+
+	ip route add 192.0.2.130/32 via 192.0.2.162
+	quick_test_span_gre_dir gt4 ingress
+	ip route del 192.0.2.130/32 via 192.0.2.162
+
+	mirror_uninstall $swp1 ingress
+	log_test "mirror to gre with next-hop remote ($tcflags)"
+}
+
+test_ip6gretap()
+{
+	RET=0
+
+	mirror_install $swp1 ingress gt6 "matchall $tcflags"
+	quick_test_span_gre_dir gt6 ingress
+	mirror_uninstall $swp1 ingress
+
+	log_test "mirror to ip6gre with next-hop remote ($tcflags)"
+}
+
+test_all()
+{
+	slow_path_trap_install $swp1 ingress
+	slow_path_trap_install $swp1 egress
+
+	tests_run
+
+	slow_path_trap_uninstall $swp1 egress
+	slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+	echo "WARN: Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh
new file mode 100644
index 000000000000..253419564708
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh
@@ -0,0 +1,94 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# This is the standard topology for testing mirroring to gretap and ip6gretap
+# netdevices. The tests that use it tweak it in one way or another--importantly,
+# $swp3 and $h3 need to have addresses set up.
+#
+#   +---------------------+                             +---------------------+
+#   | H1                  |                             |                  H2 |
+#   |     + $h1           |                             |           $h2 +     |
+#   |     | 192.0.2.1/28  |                             |  192.0.2.2/28 |     |
+#   +-----|---------------+                             +---------------|-----+
+#         |                                                             |
+#   +-----|-------------------------------------------------------------|-----+
+#   | SW  o--> mirror                                                   |     |
+#   | +---|-------------------------------------------------------------|---+ |
+#   | |   + $swp1                    BR                           $swp2 +   | |
+#   | +---------------------------------------------------------------------+ |
+#   |                                                                         |
+#   |     + $swp3               + gt6 (ip6gretap)      + gt4 (gretap)         |
+#   |     |                     : loc=2001:db8:2::1    : loc=192.0.2.129      |
+#   |     |                     : rem=2001:db8:2::2    : rem=192.0.2.130      |
+#   |     |                     : ttl=100              : ttl=100              |
+#   |     |                     : tos=inherit          : tos=inherit          |
+#   |     |                     :                      :                      |
+#   +-----|---------------------:----------------------:----------------------+
+#         |                     :                      :
+#   +-----|---------------------:----------------------:----------------------+
+#   | H3  + $h3                 + h3-gt6 (ip6gretap)   + h3-gt4 (gretap)      |
+#   |                             loc=2001:db8:2::2      loc=192.0.2.130      |
+#   |                             rem=2001:db8:2::1      rem=192.0.2.129      |
+#   |                             ttl=100                ttl=100              |
+#   |                             tos=inherit            tos=inherit          |
+#   |                                                                         |
+#   +-------------------------------------------------------------------------+
+
+source mirror_topo_lib.sh
+
+mirror_gre_topo_h3_create()
+{
+	mirror_topo_h3_create
+
+	tunnel_create h3-gt4 gretap 192.0.2.130 192.0.2.129
+	ip link set h3-gt4 vrf v$h3
+	matchall_sink_create h3-gt4
+
+	tunnel_create h3-gt6 ip6gretap 2001:db8:2::2 2001:db8:2::1
+	ip link set h3-gt6 vrf v$h3
+	matchall_sink_create h3-gt6
+}
+
+mirror_gre_topo_h3_destroy()
+{
+	tunnel_destroy h3-gt6
+	tunnel_destroy h3-gt4
+
+	mirror_topo_h3_destroy
+}
+
+mirror_gre_topo_switch_create()
+{
+	mirror_topo_switch_create
+
+	tunnel_create gt4 gretap 192.0.2.129 192.0.2.130 \
+		      ttl 100 tos inherit
+
+	tunnel_create gt6 ip6gretap 2001:db8:2::1 2001:db8:2::2 \
+		      ttl 100 tos inherit allow-localremote
+}
+
+mirror_gre_topo_switch_destroy()
+{
+	tunnel_destroy gt6
+	tunnel_destroy gt4
+
+	mirror_topo_switch_destroy
+}
+
+mirror_gre_topo_create()
+{
+	mirror_topo_h1_create
+	mirror_topo_h2_create
+	mirror_gre_topo_h3_create
+
+	mirror_gre_topo_switch_create
+}
+
+mirror_gre_topo_destroy()
+{
+	mirror_gre_topo_switch_destroy
+
+	mirror_gre_topo_h3_destroy
+	mirror_topo_h2_destroy
+	mirror_topo_h1_destroy
+}
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh
new file mode 100755
index 000000000000..88cecdb9a861
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_vlan.sh
@@ -0,0 +1,92 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test for "tc action mirred egress mirror" that mirrors to a gretap netdevice
+# whose underlay route points at a vlan device.
+
+ALL_TESTS="
+	test_gretap
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	vrf_prepare
+	mirror_gre_topo_create
+
+	ip link add name $swp3.555 link $swp3 type vlan id 555
+	ip address add dev $swp3.555 192.0.2.129/32
+	ip address add dev $swp3.555 2001:db8:2::1/128
+	ip link set dev $swp3.555 up
+
+	ip route add 192.0.2.130/32 dev $swp3.555
+	ip -6 route add 2001:db8:2::2/128 dev $swp3.555
+
+	ip link add name $h3.555 link $h3 type vlan id 555
+	ip link set dev $h3.555 master v$h3
+	ip address add dev $h3.555 192.0.2.130/28
+	ip address add dev $h3.555 2001:db8:2::2/64
+	ip link set dev $h3.555 up
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ip link del dev $h3.555
+	ip link del dev $swp3.555
+
+	mirror_gre_topo_destroy
+	vrf_cleanup
+}
+
+test_gretap()
+{
+	full_test_span_gre_dir gt4 ingress 8 0 "mirror to gretap"
+	full_test_span_gre_dir gt4 egress 0 8 "mirror to gretap"
+}
+
+test_all()
+{
+	slow_path_trap_install $swp1 ingress
+	slow_path_trap_install $swp1 egress
+
+	tests_run
+
+	slow_path_trap_uninstall $swp1 egress
+	slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+	echo "WARN: Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh
new file mode 100755
index 000000000000..5dbc7a08f4bd
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_gre_vlan_bridge_1q.sh
@@ -0,0 +1,270 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing gretap. See
+# mirror_gre_topo_lib.sh for more details.
+#
+# Test for "tc action mirred egress mirror" when the underlay route points at a
+# vlan device on top of a bridge device with vlan filtering (802.1q).
+
+ALL_TESTS="
+	test_gretap
+	test_ip6gretap
+	test_gretap_forbidden_cpu
+	test_ip6gretap_forbidden_cpu
+	test_gretap_forbidden_egress
+	test_ip6gretap_forbidden_egress
+	test_gretap_untagged_egress
+	test_ip6gretap_untagged_egress
+	test_gretap_fdb_roaming
+	test_ip6gretap_fdb_roaming
+	test_gretap_stp
+	test_ip6gretap_stp
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_gre_lib.sh
+source mirror_gre_topo_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	vrf_prepare
+	mirror_gre_topo_create
+
+	vlan_create br1 555 "" 192.0.2.129/32 2001:db8:2::1/128
+	bridge vlan add dev br1 vid 555 self
+	ip route rep 192.0.2.130/32 dev br1.555
+	ip -6 route rep 2001:db8:2::2/128 dev br1.555
+
+	vlan_create $h3 555 v$h3 192.0.2.130/28 2001:db8:2::2/64
+
+	ip link set dev $swp3 master br1
+	bridge vlan add dev $swp3 vid 555
+	bridge vlan add dev $swp2 vid 555
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	ip link set dev $swp2 nomaster
+	ip link set dev $swp3 nomaster
+	vlan_destroy $h3 555
+	vlan_destroy br1 555
+
+	mirror_gre_topo_destroy
+	vrf_cleanup
+}
+
+test_vlan_match()
+{
+	local tundev=$1; shift
+	local vlan_match=$1; shift
+	local what=$1; shift
+
+	full_test_span_gre_dir_vlan $tundev ingress "$vlan_match" 8 0 "$what"
+	full_test_span_gre_dir_vlan $tundev egress "$vlan_match" 0 8 "$what"
+}
+
+test_gretap()
+{
+	test_vlan_match gt4 'vlan_id 555 vlan_ethtype ip' "mirror to gretap"
+}
+
+test_ip6gretap()
+{
+	test_vlan_match gt6 'vlan_id 555 vlan_ethtype ipv6' "mirror to ip6gretap"
+}
+
+test_span_gre_forbidden_cpu()
+{
+	local tundev=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	# Run the pass-test first, to prime neighbor table.
+	mirror_install $swp1 ingress $tundev "matchall $tcflags"
+	quick_test_span_gre_dir $tundev ingress
+
+	# Now forbid the VLAN at the bridge and see it fail.
+	bridge vlan del dev br1 vid 555 self
+	sleep 1
+	fail_test_span_gre_dir $tundev ingress
+
+	bridge vlan add dev br1 vid 555 self
+	sleep 1
+	quick_test_span_gre_dir $tundev ingress
+
+	mirror_uninstall $swp1 ingress
+
+	log_test "$what: vlan forbidden at a bridge ($tcflags)"
+}
+
+test_gretap_forbidden_cpu()
+{
+	test_span_gre_forbidden_cpu gt4 "mirror to gretap"
+}
+
+test_ip6gretap_forbidden_cpu()
+{
+	test_span_gre_forbidden_cpu gt6 "mirror to ip6gretap"
+}
+
+test_span_gre_forbidden_egress()
+{
+	local tundev=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	mirror_install $swp1 ingress $tundev "matchall $tcflags"
+	quick_test_span_gre_dir $tundev ingress
+
+	bridge vlan del dev $swp3 vid 555
+	sleep 1
+	fail_test_span_gre_dir $tundev ingress
+
+	bridge vlan add dev $swp3 vid 555
+	# Re-prime FDB
+	arping -I br1.555 192.0.2.130 -fqc 1
+	sleep 1
+	quick_test_span_gre_dir $tundev ingress
+
+	mirror_uninstall $swp1 ingress
+
+	log_test "$what: vlan forbidden at a bridge egress ($tcflags)"
+}
+
+test_gretap_forbidden_egress()
+{
+	test_span_gre_forbidden_egress gt4 "mirror to gretap"
+}
+
+test_ip6gretap_forbidden_egress()
+{
+	test_span_gre_forbidden_egress gt6 "mirror to ip6gretap"
+}
+
+test_span_gre_untagged_egress()
+{
+	local tundev=$1; shift
+	local what=$1; shift
+
+	RET=0
+
+	mirror_install $swp1 ingress $tundev "matchall $tcflags"
+
+	quick_test_span_gre_dir $tundev ingress
+	quick_test_span_vlan_dir $h3 555 ingress
+
+	bridge vlan add dev $swp3 vid 555 pvid untagged
+	sleep 1
+	quick_test_span_gre_dir $tundev ingress
+	fail_test_span_vlan_dir $h3 555 ingress
+
+	bridge vlan add dev $swp3 vid 555
+	sleep 1
+	quick_test_span_gre_dir $tundev ingress
+	quick_test_span_vlan_dir $h3 555 ingress
+
+	mirror_uninstall $swp1 ingress
+
+	log_test "$what: vlan untagged at a bridge egress ($tcflags)"
+}
+
+test_gretap_untagged_egress()
+{
+	test_span_gre_untagged_egress gt4 "mirror to gretap"
+}
+
+test_ip6gretap_untagged_egress()
+{
+	test_span_gre_untagged_egress gt6 "mirror to ip6gretap"
+}
+
+test_span_gre_fdb_roaming()
+{
+	local tundev=$1; shift
+	local what=$1; shift
+	local h3mac=$(mac_get $h3)
+
+	RET=0
+
+	mirror_install $swp1 ingress $tundev "matchall $tcflags"
+	quick_test_span_gre_dir $tundev ingress
+
+	bridge fdb del dev $swp3 $h3mac vlan 555 master
+	bridge fdb add dev $swp2 $h3mac vlan 555 master
+	sleep 1
+	fail_test_span_gre_dir $tundev ingress
+
+	bridge fdb del dev $swp2 $h3mac vlan 555 master
+	# Re-prime FDB
+	arping -I br1.555 192.0.2.130 -fqc 1
+	sleep 1
+	quick_test_span_gre_dir $tundev ingress
+
+	mirror_uninstall $swp1 ingress
+
+	log_test "$what: MAC roaming ($tcflags)"
+}
+
+test_gretap_fdb_roaming()
+{
+	test_span_gre_fdb_roaming gt4 "mirror to gretap"
+}
+
+test_ip6gretap_fdb_roaming()
+{
+	test_span_gre_fdb_roaming gt6 "mirror to ip6gretap"
+}
+
+test_gretap_stp()
+{
+	full_test_span_gre_stp gt4 $swp3 "mirror to gretap"
+}
+
+test_ip6gretap_stp()
+{
+	full_test_span_gre_stp gt6 $swp3 "mirror to ip6gretap"
+}
+
+test_all()
+{
+	slow_path_trap_install $swp1 ingress
+	slow_path_trap_install $swp1 egress
+
+	tests_run
+
+	slow_path_trap_uninstall $swp1 egress
+	slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+	echo "WARN: Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/mirror_lib.sh b/tools/testing/selftests/net/forwarding/mirror_lib.sh
new file mode 100644
index 000000000000..d36dc26c6c51
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_lib.sh
@@ -0,0 +1,132 @@
+# SPDX-License-Identifier: GPL-2.0
+
+mirror_install()
+{
+	local from_dev=$1; shift
+	local direction=$1; shift
+	local to_dev=$1; shift
+	local filter=$1; shift
+
+	tc filter add dev $from_dev $direction \
+	   pref 1000 $filter \
+	   action mirred egress mirror dev $to_dev
+}
+
+mirror_uninstall()
+{
+	local from_dev=$1; shift
+	local direction=$1; shift
+
+	tc filter del dev $swp1 $direction pref 1000
+}
+
+mirror_test()
+{
+	local vrf_name=$1; shift
+	local sip=$1; shift
+	local dip=$1; shift
+	local dev=$1; shift
+	local pref=$1; shift
+	local expect=$1; shift
+
+	local t0=$(tc_rule_stats_get $dev $pref)
+	ip vrf exec $vrf_name \
+	   ${PING} ${sip:+-I $sip} $dip -c 10 -i 0.1 -w 2 &> /dev/null
+	local t1=$(tc_rule_stats_get $dev $pref)
+	local delta=$((t1 - t0))
+	# Tolerate a couple stray extra packets.
+	((expect <= delta && delta <= expect + 2))
+	check_err $? "Expected to capture $expect packets, got $delta."
+}
+
+do_test_span_dir_ips()
+{
+	local expect=$1; shift
+	local dev=$1; shift
+	local direction=$1; shift
+	local ip1=$1; shift
+	local ip2=$1; shift
+
+	icmp_capture_install $dev
+	mirror_test v$h1 $ip1 $ip2 $dev 100 $expect
+	mirror_test v$h2 $ip2 $ip1 $dev 100 $expect
+	icmp_capture_uninstall $dev
+}
+
+quick_test_span_dir_ips()
+{
+	do_test_span_dir_ips 10 "$@"
+}
+
+fail_test_span_dir_ips()
+{
+	do_test_span_dir_ips 0 "$@"
+}
+
+test_span_dir_ips()
+{
+	local dev=$1; shift
+	local direction=$1; shift
+	local forward_type=$1; shift
+	local backward_type=$1; shift
+	local ip1=$1; shift
+	local ip2=$1; shift
+
+	quick_test_span_dir_ips "$dev" "$direction" "$ip1" "$ip2"
+
+	icmp_capture_install $dev "type $forward_type"
+	mirror_test v$h1 $ip1 $ip2 $dev 100 10
+	icmp_capture_uninstall $dev
+
+	icmp_capture_install $dev "type $backward_type"
+	mirror_test v$h2 $ip2 $ip1 $dev 100 10
+	icmp_capture_uninstall $dev
+}
+
+fail_test_span_dir()
+{
+	fail_test_span_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+test_span_dir()
+{
+	test_span_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+do_test_span_vlan_dir_ips()
+{
+	local expect=$1; shift
+	local dev=$1; shift
+	local vid=$1; shift
+	local direction=$1; shift
+	local ip1=$1; shift
+	local ip2=$1; shift
+
+	# Install the capture as skip_hw to avoid double-counting of packets.
+	# The traffic is meant for local box anyway, so will be trapped to
+	# kernel.
+	vlan_capture_install $dev "skip_hw vlan_id $vid"
+	mirror_test v$h1 $ip1 $ip2 $dev 100 $expect
+	mirror_test v$h2 $ip2 $ip1 $dev 100 $expect
+	vlan_capture_uninstall $dev
+}
+
+quick_test_span_vlan_dir_ips()
+{
+	do_test_span_vlan_dir_ips 10 "$@"
+}
+
+fail_test_span_vlan_dir_ips()
+{
+	do_test_span_vlan_dir_ips 0 "$@"
+}
+
+quick_test_span_vlan_dir()
+{
+	quick_test_span_vlan_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
+
+fail_test_span_vlan_dir()
+{
+	fail_test_span_vlan_dir_ips "$@" 192.0.2.1 192.0.2.2
+}
diff --git a/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh b/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh
new file mode 100644
index 000000000000..04979e5962e7
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_topo_lib.sh
@@ -0,0 +1,101 @@
+# SPDX-License-Identifier: GPL-2.0
+
+# This is the standard topology for testing mirroring. The tests that use it
+# tweak it in one way or another--typically add more devices to the topology.
+#
+#   +---------------------+                             +---------------------+
+#   | H1                  |                             |                  H2 |
+#   |     + $h1           |                             |           $h2 +     |
+#   |     | 192.0.2.1/28  |                             |  192.0.2.2/28 |     |
+#   +-----|---------------+                             +---------------|-----+
+#         |                                                             |
+#   +-----|-------------------------------------------------------------|-----+
+#   | SW  o--> mirror                                                   |     |
+#   | +---|-------------------------------------------------------------|---+ |
+#   | |   + $swp1                    BR                           $swp2 +   | |
+#   | +---------------------------------------------------------------------+ |
+#   |                                                                         |
+#   |     + $swp3                                                             |
+#   +-----|-------------------------------------------------------------------+
+#         |
+#   +-----|-------------------------------------------------------------------+
+#   | H3  + $h3                                                               |
+#   |                                                                         |
+#   +-------------------------------------------------------------------------+
+
+mirror_topo_h1_create()
+{
+	simple_if_init $h1 192.0.2.1/28
+}
+
+mirror_topo_h1_destroy()
+{
+	simple_if_fini $h1 192.0.2.1/28
+}
+
+mirror_topo_h2_create()
+{
+	simple_if_init $h2 192.0.2.2/28
+}
+
+mirror_topo_h2_destroy()
+{
+	simple_if_fini $h2 192.0.2.2/28
+}
+
+mirror_topo_h3_create()
+{
+	simple_if_init $h3
+	tc qdisc add dev $h3 clsact
+}
+
+mirror_topo_h3_destroy()
+{
+	tc qdisc del dev $h3 clsact
+	simple_if_fini $h3
+}
+
+mirror_topo_switch_create()
+{
+	ip link set dev $swp3 up
+
+	ip link add name br1 type bridge vlan_filtering 1
+	ip link set dev br1 up
+
+	ip link set dev $swp1 master br1
+	ip link set dev $swp1 up
+
+	ip link set dev $swp2 master br1
+	ip link set dev $swp2 up
+
+	tc qdisc add dev $swp1 clsact
+}
+
+mirror_topo_switch_destroy()
+{
+	tc qdisc del dev $swp1 clsact
+
+	ip link set dev $swp1 down
+	ip link set dev $swp2 down
+	ip link del dev br1
+
+	ip link set dev $swp3 down
+}
+
+mirror_topo_create()
+{
+	mirror_topo_h1_create
+	mirror_topo_h2_create
+	mirror_topo_h3_create
+
+	mirror_topo_switch_create
+}
+
+mirror_topo_destroy()
+{
+	mirror_topo_switch_destroy
+
+	mirror_topo_h3_destroy
+	mirror_topo_h2_destroy
+	mirror_topo_h1_destroy
+}
diff --git a/tools/testing/selftests/net/forwarding/mirror_vlan.sh b/tools/testing/selftests/net/forwarding/mirror_vlan.sh
new file mode 100755
index 000000000000..9ab2ce77b332
--- /dev/null
+++ b/tools/testing/selftests/net/forwarding/mirror_vlan.sh
@@ -0,0 +1,131 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+# This test uses standard topology for testing mirroring. See mirror_topo_lib.sh
+# for more details.
+#
+# Test for "tc action mirred egress mirror" that mirrors to a vlan device.
+
+ALL_TESTS="
+	test_vlan
+	test_tagged_vlan
+"
+
+NUM_NETIFS=6
+source lib.sh
+source mirror_lib.sh
+source mirror_topo_lib.sh
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	swp1=${NETIFS[p2]}
+
+	swp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	swp3=${NETIFS[p5]}
+	h3=${NETIFS[p6]}
+
+	vrf_prepare
+	mirror_topo_create
+
+	vlan_create $swp3 555
+
+	vlan_create $h3 555 v$h3
+	matchall_sink_create $h3.555
+
+	vlan_create $h1 111 v$h1 192.0.2.17/28
+	bridge vlan add dev $swp1 vid 111
+
+	vlan_create $h2 111 v$h2 192.0.2.18/28
+	bridge vlan add dev $swp2 vid 111
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	vlan_destroy $h2 111
+	vlan_destroy $h1 111
+	vlan_destroy $h3 555
+	vlan_destroy $swp3 555
+
+	mirror_topo_destroy
+	vrf_cleanup
+}
+
+test_vlan_dir()
+{
+	local direction=$1; shift
+	local forward_type=$1; shift
+	local backward_type=$1; shift
+
+	RET=0
+
+	mirror_install $swp1 $direction $swp3.555 "matchall $tcflags"
+	test_span_dir "$h3.555" "$direction" "$forward_type" "$backward_type"
+	mirror_uninstall $swp1 $direction
+
+	log_test "$direction mirror to vlan ($tcflags)"
+}
+
+test_vlan()
+{
+	test_vlan_dir ingress 8 0
+	test_vlan_dir egress 0 8
+}
+
+test_tagged_vlan_dir()
+{
+	local direction=$1; shift
+	local forward_type=$1; shift
+	local backward_type=$1; shift
+
+	RET=0
+
+	mirror_install $swp1 $direction $swp3.555 "matchall $tcflags"
+	do_test_span_vlan_dir_ips 10 "$h3.555" 111 "$direction" \
+				  192.0.2.17 192.0.2.18
+	do_test_span_vlan_dir_ips  0 "$h3.555" 555 "$direction" \
+				  192.0.2.17 192.0.2.18
+	mirror_uninstall $swp1 $direction
+
+	log_test "$direction mirror tagged to vlan ($tcflags)"
+}
+
+test_tagged_vlan()
+{
+	test_tagged_vlan_dir ingress 8 0
+	test_tagged_vlan_dir egress 0 8
+}
+
+test_all()
+{
+	slow_path_trap_install $swp1 ingress
+	slow_path_trap_install $swp1 egress
+	trap_install $h3 ingress
+
+	tests_run
+
+	trap_uninstall $h3 ingress
+	slow_path_trap_uninstall $swp1 egress
+	slow_path_trap_uninstall $swp1 ingress
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tcflags="skip_hw"
+test_all
+
+if ! tc_offload_check; then
+	echo "WARN: Could not test offloaded functionality"
+else
+	tcflags="skip_sw"
+	test_all
+fi
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router.sh b/tools/testing/selftests/net/forwarding/router.sh
index cc6a14abfa87..a75cb51cc5bd 100755
--- a/tools/testing/selftests/net/forwarding/router.sh
+++ b/tools/testing/selftests/net/forwarding/router.sh
@@ -1,6 +1,7 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
+ALL_TESTS="ping_ipv4 ping_ipv6"
 NUM_NETIFS=4
 source lib.sh
 
@@ -114,12 +115,21 @@ cleanup()
 	vrf_cleanup
 }
 
+ping_ipv4()
+{
+	ping_test $h1 198.51.100.2
+}
+
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:2::2
+}
+
 trap cleanup EXIT
 
 setup_prepare
 setup_wait
 
-ping_test $h1 198.51.100.2
-ping6_test $h1 2001:db8:2::2
+tests_run
 
 exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh
index 3bc351008db6..8b6d0fb6d604 100755
--- a/tools/testing/selftests/net/forwarding/router_multipath.sh
+++ b/tools/testing/selftests/net/forwarding/router_multipath.sh
@@ -1,6 +1,7 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
+ALL_TESTS="ping_ipv4 ping_ipv6 multipath_test"
 NUM_NETIFS=8
 source lib.sh
 
@@ -191,7 +192,7 @@ multipath_eval()
        diff=$(echo $weights_ratio - $packets_ratio | bc -l)
        diff=${diff#-}
 
-       test "$(echo "$diff / $weights_ratio > 0.1" | bc -l)" -eq 0
+       test "$(echo "$diff / $weights_ratio > 0.15" | bc -l)" -eq 0
        check_err $? "Too large discrepancy between expected and measured ratios"
        log_test "$desc"
        log_info "Expected ratio $weights_ratio Measured ratio $packets_ratio"
@@ -204,13 +205,11 @@ multipath4_test()
        local weight_rp13=$3
        local t0_rp12 t0_rp13 t1_rp12 t1_rp13
        local packets_rp12 packets_rp13
-       local hash_policy
 
        # Transmit multiple flows from h1 to h2 and make sure they are
        # distributed between both multipath links (rp12 and rp13)
        # according to the configured weights.
-       hash_policy=$(sysctl -n net.ipv4.fib_multipath_hash_policy)
-       sysctl -q -w net.ipv4.fib_multipath_hash_policy=1
+       sysctl_set net.ipv4.fib_multipath_hash_policy 1
        ip route replace 198.51.100.0/24 vrf vrf-r1 \
                nexthop via 169.254.2.22 dev $rp12 weight $weight_rp12 \
                nexthop via 169.254.3.23 dev $rp13 weight $weight_rp13
@@ -232,7 +231,7 @@ multipath4_test()
        ip route replace 198.51.100.0/24 vrf vrf-r1 \
                nexthop via 169.254.2.22 dev $rp12 \
                nexthop via 169.254.3.23 dev $rp13
-       sysctl -q -w net.ipv4.fib_multipath_hash_policy=$hash_policy
+       sysctl_restore net.ipv4.fib_multipath_hash_policy
 }
 
 multipath6_l4_test()
@@ -242,13 +241,11 @@ multipath6_l4_test()
        local weight_rp13=$3
        local t0_rp12 t0_rp13 t1_rp12 t1_rp13
        local packets_rp12 packets_rp13
-       local hash_policy
 
        # Transmit multiple flows from h1 to h2 and make sure they are
        # distributed between both multipath links (rp12 and rp13)
        # according to the configured weights.
-       hash_policy=$(sysctl -n net.ipv6.fib_multipath_hash_policy)
-       sysctl -q -w net.ipv6.fib_multipath_hash_policy=1
+       sysctl_set net.ipv6.fib_multipath_hash_policy 1
 
        ip route replace 2001:db8:2::/64 vrf vrf-r1 \
 	       nexthop via fe80:2::22 dev $rp12 weight $weight_rp12 \
@@ -271,7 +268,7 @@ multipath6_l4_test()
 	       nexthop via fe80:2::22 dev $rp12 \
 	       nexthop via fe80:3::23 dev $rp13
 
-       sysctl -q -w net.ipv6.fib_multipath_hash_policy=$hash_policy
+       sysctl_restore net.ipv6.fib_multipath_hash_policy
 }
 
 multipath6_test()
@@ -364,13 +361,21 @@ cleanup()
 	vrf_cleanup
 }
 
+ping_ipv4()
+{
+	ping_test $h1 198.51.100.2
+}
+
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:2::2
+}
+
 trap cleanup EXIT
 
 setup_prepare
 setup_wait
 
-ping_test $h1 198.51.100.2
-ping6_test $h1 2001:db8:2::2
-multipath_test
+tests_run
 
 exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh
index 3a6385ebd5d0..813d02d1939d 100755
--- a/tools/testing/selftests/net/forwarding/tc_actions.sh
+++ b/tools/testing/selftests/net/forwarding/tc_actions.sh
@@ -1,6 +1,8 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
+ALL_TESTS="gact_drop_and_ok_test mirred_egress_redirect_test \
+	mirred_egress_mirror_test gact_trap_test"
 NUM_NETIFS=4
 source tc_common.sh
 source lib.sh
@@ -111,6 +113,10 @@ gact_trap_test()
 {
 	RET=0
 
+	if [[ "$tcflags" != "skip_sw" ]]; then
+		return 0;
+	fi
+
 	tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
 		skip_hw dst_ip 192.0.2.2 action drop
 	tc filter add dev $swp1 ingress protocol ip pref 3 handle 103 flower \
@@ -179,24 +185,29 @@ cleanup()
 	ip link set $swp1 address $swp1origmac
 }
 
+mirred_egress_redirect_test()
+{
+	mirred_egress_test "redirect"
+}
+
+mirred_egress_mirror_test()
+{
+	mirred_egress_test "mirror"
+}
+
 trap cleanup EXIT
 
 setup_prepare
 setup_wait
 
-gact_drop_and_ok_test
-mirred_egress_test "redirect"
-mirred_egress_test "mirror"
+tests_run
 
 tc_offload_check
 if [[ $? -ne 0 ]]; then
 	log_info "Could not test offloaded functionality"
 else
 	tcflags="skip_sw"
-	gact_drop_and_ok_test
-	mirred_egress_test "redirect"
-	mirred_egress_test "mirror"
-	gact_trap_test
+	tests_run
 fi
 
 exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_chains.sh b/tools/testing/selftests/net/forwarding/tc_chains.sh
index 2fd15226974b..d2c783e94df3 100755
--- a/tools/testing/selftests/net/forwarding/tc_chains.sh
+++ b/tools/testing/selftests/net/forwarding/tc_chains.sh
@@ -1,6 +1,7 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
+ALL_TESTS="unreachable_chain_test gact_goto_chain_test"
 NUM_NETIFS=2
 source tc_common.sh
 source lib.sh
@@ -107,16 +108,14 @@ trap cleanup EXIT
 setup_prepare
 setup_wait
 
-unreachable_chain_test
-gact_goto_chain_test
+tests_run
 
 tc_offload_check
 if [[ $? -ne 0 ]]; then
 	log_info "Could not test offloaded functionality"
 else
 	tcflags="skip_sw"
-	unreachable_chain_test
-	gact_goto_chain_test
+	tests_run
 fi
 
 exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_flower.sh b/tools/testing/selftests/net/forwarding/tc_flower.sh
index 032b882adfc0..20d1077e5a3d 100755
--- a/tools/testing/selftests/net/forwarding/tc_flower.sh
+++ b/tools/testing/selftests/net/forwarding/tc_flower.sh
@@ -1,6 +1,8 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
+ALL_TESTS="match_dst_mac_test match_src_mac_test match_dst_ip_test \
+	match_src_ip_test match_ip_flags_test"
 NUM_NETIFS=2
 source tc_common.sh
 source lib.sh
@@ -149,6 +151,74 @@ match_src_ip_test()
 	log_test "src_ip match ($tcflags)"
 }
 
+match_ip_flags_test()
+{
+	RET=0
+
+	tc filter add dev $h2 ingress protocol ip pref 1 handle 101 flower \
+		$tcflags ip_flags frag action continue
+	tc filter add dev $h2 ingress protocol ip pref 2 handle 102 flower \
+		$tcflags ip_flags firstfrag action continue
+	tc filter add dev $h2 ingress protocol ip pref 3 handle 103 flower \
+		$tcflags ip_flags nofirstfrag action continue
+	tc filter add dev $h2 ingress protocol ip pref 4 handle 104 flower \
+		$tcflags ip_flags nofrag action drop
+
+	$MZ $h1 -c 1 -p 1000 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip "frag=0" -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_fail $? "Matched on wrong frag filter (nofrag)"
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_fail $? "Matched on wrong firstfrag filter (nofrag)"
+
+	tc_check_packets "dev $h2 ingress" 103 1
+	check_err $? "Did not match on nofirstfrag filter (nofrag) "
+
+	tc_check_packets "dev $h2 ingress" 104 1
+	check_err $? "Did not match on nofrag filter (nofrag)"
+
+	$MZ $h1 -c 1 -p 1000 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip "frag=0,mf" -q
+
+	tc_check_packets "dev $h2 ingress" 101 1
+	check_err $? "Did not match on frag filter (1stfrag)"
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Did not match fistfrag filter (1stfrag)"
+
+	tc_check_packets "dev $h2 ingress" 103 1
+	check_err $? "Matched on wrong nofirstfrag filter (1stfrag)"
+
+	tc_check_packets "dev $h2 ingress" 104 1
+	check_err $? "Match on wrong nofrag filter (1stfrag)"
+
+	$MZ $h1 -c 1 -p 1000 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip "frag=256,mf" -q
+	$MZ $h1 -c 1 -p 1000 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
+		-t ip "frag=256" -q
+
+	tc_check_packets "dev $h2 ingress" 101 3
+	check_err $? "Did not match on frag filter (no1stfrag)"
+
+	tc_check_packets "dev $h2 ingress" 102 1
+	check_err $? "Matched on wrong firstfrag filter (no1stfrag)"
+
+	tc_check_packets "dev $h2 ingress" 103 3
+	check_err $? "Did not match on nofirstfrag filter (no1stfrag)"
+
+	tc_check_packets "dev $h2 ingress" 104 1
+	check_err $? "Matched on nofrag filter (no1stfrag)"
+
+	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
+	tc filter del dev $h2 ingress protocol ip pref 2 handle 102 flower
+	tc filter del dev $h2 ingress protocol ip pref 3 handle 103 flower
+	tc filter del dev $h2 ingress protocol ip pref 4 handle 104 flower
+
+	log_test "ip_flags match ($tcflags)"
+}
+
 setup_prepare()
 {
 	h1=${NETIFS[p1]}
@@ -177,20 +247,14 @@ trap cleanup EXIT
 setup_prepare
 setup_wait
 
-match_dst_mac_test
-match_src_mac_test
-match_dst_ip_test
-match_src_ip_test
+tests_run
 
 tc_offload_check
 if [[ $? -ne 0 ]]; then
 	log_info "Could not test offloaded functionality"
 else
 	tcflags="skip_sw"
-	match_dst_mac_test
-	match_src_mac_test
-	match_dst_ip_test
-	match_src_ip_test
+	tests_run
 fi
 
 exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/tc_shblocks.sh b/tools/testing/selftests/net/forwarding/tc_shblocks.sh
index 077b98048ef4..b5b917203815 100755
--- a/tools/testing/selftests/net/forwarding/tc_shblocks.sh
+++ b/tools/testing/selftests/net/forwarding/tc_shblocks.sh
@@ -1,6 +1,7 @@
 #!/bin/bash
 # SPDX-License-Identifier: GPL-2.0
 
+ALL_TESTS="shared_block_test"
 NUM_NETIFS=4
 source tc_common.sh
 source lib.sh
@@ -109,14 +110,14 @@ trap cleanup EXIT
 setup_prepare
 setup_wait
 
-shared_block_test
+tests_run
 
 tc_offload_check
 if [[ $? -ne 0 ]]; then
 	log_info "Could not test offloaded functionality"
 else
 	tcflags="skip_sw"
-	shared_block_test
+	tests_run
 fi
 
 exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/msg_zerocopy.sh b/tools/testing/selftests/net/msg_zerocopy.sh
index d571d213418d..c43c6debda06 100755
--- a/tools/testing/selftests/net/msg_zerocopy.sh
+++ b/tools/testing/selftests/net/msg_zerocopy.sh
@@ -21,6 +21,14 @@ readonly DADDR6='fd::2'
 
 readonly path_sysctl_mem="net.core.optmem_max"
 
+# No arguments: automated test
+if [[ "$#" -eq "0" ]]; then
+	$0 4 tcp -t 1
+	$0 6 tcp -t 1
+	echo "OK. All tests passed"
+	exit 0
+fi
+
 # Argument parsing
 if [[ "$#" -lt "2" ]]; then
 	echo "Usage: $0 [4|6] [tcp|udp|raw|raw_hdrincl|packet|packet_dgram] <args>"
diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 7514f93e1624..f8cc38afffa2 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -371,7 +371,7 @@ test_pmtu_vti6_link_add_mtu() {
 
 	fail=0
 
-	min=1280
+	min=68			# vti6 can carry IPv4 packets too
 	max=$((65535 - 40))
 	# Check invalid values first
 	for v in $((min - 1)) $((max + 1)); do
@@ -387,7 +387,7 @@ test_pmtu_vti6_link_add_mtu() {
 	done
 
 	# Now check valid values
-	for v in 1280 1300 $((65535 - 40)); do
+	for v in 68 1280 1300 $((65535 - 40)); do
 		${ns_a} ip link add vti6_a mtu ${v} type vti6 local ${veth6_a_addr} remote ${veth6_b_addr} key 10
 		mtu="$(link_get_mtu "${ns_a}" vti6_a)"
 		${ns_a} ip link del vti6_a
diff --git a/tools/testing/selftests/net/psock_snd.c b/tools/testing/selftests/net/psock_snd.c
new file mode 100644
index 000000000000..7d15e10a9fb6
--- /dev/null
+++ b/tools/testing/selftests/net/psock_snd.c
@@ -0,0 +1,397 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <fcntl.h>
+#include <limits.h>
+#include <linux/filter.h>
+#include <linux/bpf.h>
+#include <linux/if_packet.h>
+#include <linux/if_vlan.h>
+#include <linux/virtio_net.h>
+#include <net/if.h>
+#include <net/ethernet.h>
+#include <netinet/ip.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdint.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/mman.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#include "psock_lib.h"
+
+static bool	cfg_use_bind;
+static bool	cfg_use_csum_off;
+static bool	cfg_use_csum_off_bad;
+static bool	cfg_use_dgram;
+static bool	cfg_use_gso;
+static bool	cfg_use_qdisc_bypass;
+static bool	cfg_use_vlan;
+static bool	cfg_use_vnet;
+
+static char	*cfg_ifname = "lo";
+static int	cfg_mtu	= 1500;
+static int	cfg_payload_len = DATA_LEN;
+static int	cfg_truncate_len = INT_MAX;
+static uint16_t	cfg_port = 8000;
+
+/* test sending up to max mtu + 1 */
+#define TEST_SZ	(sizeof(struct virtio_net_hdr) + ETH_HLEN + ETH_MAX_MTU + 1)
+
+static char tbuf[TEST_SZ], rbuf[TEST_SZ];
+
+static unsigned long add_csum_hword(const uint16_t *start, int num_u16)
+{
+	unsigned long sum = 0;
+	int i;
+
+	for (i = 0; i < num_u16; i++)
+		sum += start[i];
+
+	return sum;
+}
+
+static uint16_t build_ip_csum(const uint16_t *start, int num_u16,
+			      unsigned long sum)
+{
+	sum += add_csum_hword(start, num_u16);
+
+	while (sum >> 16)
+		sum = (sum & 0xffff) + (sum >> 16);
+
+	return ~sum;
+}
+
+static int build_vnet_header(void *header)
+{
+	struct virtio_net_hdr *vh = header;
+
+	vh->hdr_len = ETH_HLEN + sizeof(struct iphdr) + sizeof(struct udphdr);
+
+	if (cfg_use_csum_off) {
+		vh->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM;
+		vh->csum_start = ETH_HLEN + sizeof(struct iphdr);
+		vh->csum_offset = __builtin_offsetof(struct udphdr, check);
+
+		/* position check field exactly one byte beyond end of packet */
+		if (cfg_use_csum_off_bad)
+			vh->csum_start += sizeof(struct udphdr) + cfg_payload_len -
+					  vh->csum_offset - 1;
+	}
+
+	if (cfg_use_gso) {
+		vh->gso_type = VIRTIO_NET_HDR_GSO_UDP;
+		vh->gso_size = cfg_mtu - sizeof(struct iphdr);
+	}
+
+	return sizeof(*vh);
+}
+
+static int build_eth_header(void *header)
+{
+	struct ethhdr *eth = header;
+
+	if (cfg_use_vlan) {
+		uint16_t *tag = header + ETH_HLEN;
+
+		eth->h_proto = htons(ETH_P_8021Q);
+		tag[1] = htons(ETH_P_IP);
+		return ETH_HLEN + 4;
+	}
+
+	eth->h_proto = htons(ETH_P_IP);
+	return ETH_HLEN;
+}
+
+static int build_ipv4_header(void *header, int payload_len)
+{
+	struct iphdr *iph = header;
+
+	iph->ihl = 5;
+	iph->version = 4;
+	iph->ttl = 8;
+	iph->tot_len = htons(sizeof(*iph) + sizeof(struct udphdr) + payload_len);
+	iph->id = htons(1337);
+	iph->protocol = IPPROTO_UDP;
+	iph->saddr = htonl((172 << 24) | (17 << 16) | 2);
+	iph->daddr = htonl((172 << 24) | (17 << 16) | 1);
+	iph->check = build_ip_csum((void *) iph, iph->ihl << 1, 0);
+
+	return iph->ihl << 2;
+}
+
+static int build_udp_header(void *header, int payload_len)
+{
+	const int alen = sizeof(uint32_t);
+	struct udphdr *udph = header;
+	int len = sizeof(*udph) + payload_len;
+
+	udph->source = htons(9);
+	udph->dest = htons(cfg_port);
+	udph->len = htons(len);
+
+	if (cfg_use_csum_off)
+		udph->check = build_ip_csum(header - (2 * alen), alen,
+					    htons(IPPROTO_UDP) + udph->len);
+	else
+		udph->check = 0;
+
+	return sizeof(*udph);
+}
+
+static int build_packet(int payload_len)
+{
+	int off = 0;
+
+	off += build_vnet_header(tbuf);
+	off += build_eth_header(tbuf + off);
+	off += build_ipv4_header(tbuf + off, payload_len);
+	off += build_udp_header(tbuf + off, payload_len);
+
+	if (off + payload_len > sizeof(tbuf))
+		error(1, 0, "payload length exceeds max");
+
+	memset(tbuf + off, DATA_CHAR, payload_len);
+
+	return off + payload_len;
+}
+
+static void do_bind(int fd)
+{
+	struct sockaddr_ll laddr = {0};
+
+	laddr.sll_family = AF_PACKET;
+	laddr.sll_protocol = htons(ETH_P_IP);
+	laddr.sll_ifindex = if_nametoindex(cfg_ifname);
+	if (!laddr.sll_ifindex)
+		error(1, errno, "if_nametoindex");
+
+	if (bind(fd, (void *)&laddr, sizeof(laddr)))
+		error(1, errno, "bind");
+}
+
+static void do_send(int fd, char *buf, int len)
+{
+	int ret;
+
+	if (!cfg_use_vnet) {
+		buf += sizeof(struct virtio_net_hdr);
+		len -= sizeof(struct virtio_net_hdr);
+	}
+	if (cfg_use_dgram) {
+		buf += ETH_HLEN;
+		len -= ETH_HLEN;
+	}
+
+	if (cfg_use_bind) {
+		ret = write(fd, buf, len);
+	} else {
+		struct sockaddr_ll laddr = {0};
+
+		laddr.sll_protocol = htons(ETH_P_IP);
+		laddr.sll_ifindex = if_nametoindex(cfg_ifname);
+		if (!laddr.sll_ifindex)
+			error(1, errno, "if_nametoindex");
+
+		ret = sendto(fd, buf, len, 0, (void *)&laddr, sizeof(laddr));
+	}
+
+	if (ret == -1)
+		error(1, errno, "write");
+	if (ret != len)
+		error(1, 0, "write: %u %u", ret, len);
+
+	fprintf(stderr, "tx: %u\n", ret);
+}
+
+static int do_tx(void)
+{
+	const int one = 1;
+	int fd, len;
+
+	fd = socket(PF_PACKET, cfg_use_dgram ? SOCK_DGRAM : SOCK_RAW, 0);
+	if (fd == -1)
+		error(1, errno, "socket t");
+
+	if (cfg_use_bind)
+		do_bind(fd);
+
+	if (cfg_use_qdisc_bypass &&
+	    setsockopt(fd, SOL_PACKET, PACKET_QDISC_BYPASS, &one, sizeof(one)))
+		error(1, errno, "setsockopt qdisc bypass");
+
+	if (cfg_use_vnet &&
+	    setsockopt(fd, SOL_PACKET, PACKET_VNET_HDR, &one, sizeof(one)))
+		error(1, errno, "setsockopt vnet");
+
+	len = build_packet(cfg_payload_len);
+
+	if (cfg_truncate_len < len)
+		len = cfg_truncate_len;
+
+	do_send(fd, tbuf, len);
+
+	if (close(fd))
+		error(1, errno, "close t");
+
+	return len;
+}
+
+static int setup_rx(void)
+{
+	struct timeval tv = { .tv_usec = 100 * 1000 };
+	struct sockaddr_in raddr = {0};
+	int fd;
+
+	fd = socket(PF_INET, SOCK_DGRAM, 0);
+	if (fd == -1)
+		error(1, errno, "socket r");
+
+	if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))
+		error(1, errno, "setsockopt rcv timeout");
+
+	raddr.sin_family = AF_INET;
+	raddr.sin_port = htons(cfg_port);
+	raddr.sin_addr.s_addr = htonl(INADDR_ANY);
+
+	if (bind(fd, (void *)&raddr, sizeof(raddr)))
+		error(1, errno, "bind r");
+
+	return fd;
+}
+
+static void do_rx(int fd, int expected_len, char *expected)
+{
+	int ret;
+
+	ret = recv(fd, rbuf, sizeof(rbuf), 0);
+	if (ret == -1)
+		error(1, errno, "recv");
+	if (ret != expected_len)
+		error(1, 0, "recv: %u != %u", ret, expected_len);
+
+	if (memcmp(rbuf, expected, ret))
+		error(1, 0, "recv: data mismatch");
+
+	fprintf(stderr, "rx: %u\n", ret);
+}
+
+static int setup_sniffer(void)
+{
+	struct timeval tv = { .tv_usec = 100 * 1000 };
+	int fd;
+
+	fd = socket(PF_PACKET, SOCK_RAW, 0);
+	if (fd == -1)
+		error(1, errno, "socket p");
+
+	if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))
+		error(1, errno, "setsockopt rcv timeout");
+
+	pair_udp_setfilter(fd);
+	do_bind(fd);
+
+	return fd;
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	int c;
+
+	while ((c = getopt(argc, argv, "bcCdgl:qt:vV")) != -1) {
+		switch (c) {
+		case 'b':
+			cfg_use_bind = true;
+			break;
+		case 'c':
+			cfg_use_csum_off = true;
+			break;
+		case 'C':
+			cfg_use_csum_off_bad = true;
+			break;
+		case 'd':
+			cfg_use_dgram = true;
+			break;
+		case 'g':
+			cfg_use_gso = true;
+			break;
+		case 'l':
+			cfg_payload_len = strtoul(optarg, NULL, 0);
+			break;
+		case 'q':
+			cfg_use_qdisc_bypass = true;
+			break;
+		case 't':
+			cfg_truncate_len = strtoul(optarg, NULL, 0);
+			break;
+		case 'v':
+			cfg_use_vnet = true;
+			break;
+		case 'V':
+			cfg_use_vlan = true;
+			break;
+		default:
+			error(1, 0, "%s: parse error", argv[0]);
+		}
+	}
+
+	if (cfg_use_vlan && cfg_use_dgram)
+		error(1, 0, "option vlan (-V) conflicts with dgram (-d)");
+
+	if (cfg_use_csum_off && !cfg_use_vnet)
+		error(1, 0, "option csum offload (-c) requires vnet (-v)");
+
+	if (cfg_use_csum_off_bad && !cfg_use_csum_off)
+		error(1, 0, "option csum bad (-C) requires csum offload (-c)");
+
+	if (cfg_use_gso && !cfg_use_csum_off)
+		error(1, 0, "option gso (-g) requires csum offload (-c)");
+}
+
+static void run_test(void)
+{
+	int fdr, fds, total_len;
+
+	fdr = setup_rx();
+	fds = setup_sniffer();
+
+	total_len = do_tx();
+
+	/* BPF filter accepts only this length, vlan changes MAC */
+	if (cfg_payload_len == DATA_LEN && !cfg_use_vlan)
+		do_rx(fds, total_len - sizeof(struct virtio_net_hdr),
+		      tbuf + sizeof(struct virtio_net_hdr));
+
+	do_rx(fdr, cfg_payload_len, tbuf + total_len - cfg_payload_len);
+
+	if (close(fds))
+		error(1, errno, "close s");
+	if (close(fdr))
+		error(1, errno, "close r");
+}
+
+int main(int argc, char **argv)
+{
+	parse_opts(argc, argv);
+
+	if (system("ip link set dev lo mtu 1500"))
+		error(1, errno, "ip link set mtu");
+	if (system("ip addr add dev lo 172.17.0.1/24"))
+		error(1, errno, "ip addr add");
+
+	run_test();
+
+	fprintf(stderr, "OK\n\n");
+	return 0;
+}
diff --git a/tools/testing/selftests/net/psock_snd.sh b/tools/testing/selftests/net/psock_snd.sh
new file mode 100755
index 000000000000..6331d91b86a6
--- /dev/null
+++ b/tools/testing/selftests/net/psock_snd.sh
@@ -0,0 +1,98 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run a series of packet socket send regression tests
+
+set -e
+
+readonly mtu=1500
+readonly iphlen=20
+readonly udphlen=8
+
+readonly vnet_hlen=10
+readonly eth_hlen=14
+
+readonly mss="$((${mtu} - ${iphlen} - ${udphlen}))"
+readonly mss_exceeds="$((${mss} + 1))"
+
+readonly max_mtu=65535
+readonly max_mss="$((${max_mtu} - ${iphlen} - ${udphlen}))"
+readonly max_mss_exceeds="$((${max_mss} + 1))"
+
+# functional checks (not a full cross-product)
+
+echo "dgram"
+./in_netns.sh ./psock_snd -d
+
+echo "dgram bind"
+./in_netns.sh ./psock_snd -d -b
+
+echo "raw"
+./in_netns.sh ./psock_snd
+
+echo "raw bind"
+./in_netns.sh ./psock_snd -b
+
+echo "raw qdisc bypass"
+./in_netns.sh ./psock_snd -q
+
+echo "raw vlan"
+./in_netns.sh ./psock_snd -V
+
+echo "raw vnet hdr"
+./in_netns.sh ./psock_snd -v
+
+echo "raw csum_off"
+./in_netns.sh ./psock_snd -v -c
+
+echo "raw csum_off with bad offset (fails)"
+(! ./in_netns.sh ./psock_snd -v -c -C)
+
+
+# bounds check: send {max, max + 1, min, min - 1} lengths
+
+echo "raw min size"
+./in_netns.sh ./psock_snd -l 0
+
+echo "raw mtu size"
+./in_netns.sh ./psock_snd -l "${mss}"
+
+echo "raw mtu size + 1 (fails)"
+(! ./in_netns.sh ./psock_snd -l "${mss_exceeds}")
+
+# fails due to ARPHRD_ETHER check in packet_extra_vlan_len_allowed
+#
+# echo "raw vlan mtu size"
+# ./in_netns.sh ./psock_snd -V -l "${mss}"
+
+echo "raw vlan mtu size + 1 (fails)"
+(! ./in_netns.sh ./psock_snd -V -l "${mss_exceeds}")
+
+echo "dgram mtu size"
+./in_netns.sh ./psock_snd -d -l "${mss}"
+
+echo "dgram mtu size + 1 (fails)"
+(! ./in_netns.sh ./psock_snd -d -l "${mss_exceeds}")
+
+echo "raw truncate hlen (fails: does not arrive)"
+(! ./in_netns.sh ./psock_snd -t "$((${vnet_hlen} + ${eth_hlen}))")
+
+echo "raw truncate hlen - 1 (fails: EINVAL)"
+(! ./in_netns.sh ./psock_snd -t "$((${vnet_hlen} + ${eth_hlen} - 1))")
+
+
+# gso checks: implies -l, because with gso len must exceed gso_size
+
+echo "raw gso min size"
+./in_netns.sh ./psock_snd -v -c -g -l "${mss_exceeds}"
+
+echo "raw gso min size - 1 (fails)"
+(! ./in_netns.sh ./psock_snd -v -c -g -l "${mss}")
+
+echo "raw gso max size"
+./in_netns.sh ./psock_snd -v -c -g -l "${max_mss}"
+
+echo "raw gso max size + 1 (fails)"
+(! ./in_netns.sh ./psock_snd -v -c -g -l "${max_mss_exceeds}")
+
+echo "OK. All tests passed"
diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh
index fb3767844e42..0d7a44fa30af 100755
--- a/tools/testing/selftests/net/rtnetlink.sh
+++ b/tools/testing/selftests/net/rtnetlink.sh
@@ -505,6 +505,108 @@ kci_test_macsec()
 	echo "PASS: macsec"
 }
 
+#-------------------------------------------------------------------
+# Example commands
+#   ip x s add proto esp src 14.0.0.52 dst 14.0.0.70 \
+#            spi 0x07 mode transport reqid 0x07 replay-window 32 \
+#            aead 'rfc4106(gcm(aes))' 1234567890123456dcba 128 \
+#            sel src 14.0.0.52/24 dst 14.0.0.70/24
+#   ip x p add dir out src 14.0.0.52/24 dst 14.0.0.70/24 \
+#            tmpl proto esp src 14.0.0.52 dst 14.0.0.70 \
+#            spi 0x07 mode transport reqid 0x07
+#
+# Subcommands not tested
+#    ip x s update
+#    ip x s allocspi
+#    ip x s deleteall
+#    ip x p update
+#    ip x p deleteall
+#    ip x p set
+#-------------------------------------------------------------------
+kci_test_ipsec()
+{
+	srcip="14.0.0.52"
+	dstip="14.0.0.70"
+	algo="aead rfc4106(gcm(aes)) 0x3132333435363738393031323334353664636261 128"
+
+	# flush to be sure there's nothing configured
+	ip x s flush ; ip x p flush
+	check_err $?
+
+	# start the monitor in the background
+	tmpfile=`mktemp ipsectestXXX`
+	ip x m > $tmpfile &
+	mpid=$!
+	sleep 0.2
+
+	ipsecid="proto esp src $srcip dst $dstip spi 0x07"
+	ip x s add $ipsecid \
+            mode transport reqid 0x07 replay-window 32 \
+            $algo sel src $srcip/24 dst $dstip/24
+	check_err $?
+
+	lines=`ip x s list | grep $srcip | grep $dstip | wc -l`
+	test $lines -eq 2
+	check_err $?
+
+	ip x s count | grep -q "SAD count 1"
+	check_err $?
+
+	lines=`ip x s get $ipsecid | grep $srcip | grep $dstip | wc -l`
+	test $lines -eq 2
+	check_err $?
+
+	ip x s delete $ipsecid
+	check_err $?
+
+	lines=`ip x s list | wc -l`
+	test $lines -eq 0
+	check_err $?
+
+	ipsecsel="dir out src $srcip/24 dst $dstip/24"
+	ip x p add $ipsecsel \
+		    tmpl proto esp src $srcip dst $dstip \
+		    spi 0x07 mode transport reqid 0x07
+	check_err $?
+
+	lines=`ip x p list | grep $srcip | grep $dstip | wc -l`
+	test $lines -eq 2
+	check_err $?
+
+	ip x p count | grep -q "SPD IN  0 OUT 1 FWD 0"
+	check_err $?
+
+	lines=`ip x p get $ipsecsel | grep $srcip | grep $dstip | wc -l`
+	test $lines -eq 2
+	check_err $?
+
+	ip x p delete $ipsecsel
+	check_err $?
+
+	lines=`ip x p list | wc -l`
+	test $lines -eq 0
+	check_err $?
+
+	# check the monitor results
+	kill $mpid
+	lines=`wc -l $tmpfile | cut "-d " -f1`
+	test $lines -eq 20
+	check_err $?
+	rm -rf $tmpfile
+
+	# clean up any leftovers
+	ip x s flush
+	check_err $?
+	ip x p flush
+	check_err $?
+
+	if [ $ret -ne 0 ]; then
+		echo "FAIL: ipsec"
+		return 1
+	fi
+	echo "PASS: ipsec"
+}
+
 kci_test_gretap()
 {
 	testns="testns"
@@ -758,6 +860,7 @@ kci_test_rtnl()
 	kci_test_vrf
 	kci_test_encap
 	kci_test_macsec
+	kci_test_ipsec
 
 	kci_del_dummy
 }
diff --git a/tools/testing/selftests/net/tcp_inq.c b/tools/testing/selftests/net/tcp_inq.c
new file mode 100644
index 000000000000..d044b29ddabc
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_inq.c
@@ -0,0 +1,189 @@
+/*
+ * Copyright 2018 Google Inc.
+ * Author: Soheil Hassas Yeganeh (soheil@google.com)
+ *
+ * Simple example on how to use TCP_INQ and TCP_CM_INQ.
+ *
+ * License (GPLv2):
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
+ * more details.
+ */
+#define _GNU_SOURCE
+
+#include <error.h>
+#include <netinet/in.h>
+#include <netinet/tcp.h>
+#include <pthread.h>
+#include <stdio.h>
+#include <errno.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <unistd.h>
+
+#ifndef TCP_INQ
+#define TCP_INQ 36
+#endif
+
+#ifndef TCP_CM_INQ
+#define TCP_CM_INQ TCP_INQ
+#endif
+
+#define BUF_SIZE 8192
+#define CMSG_SIZE 32
+
+static int family = AF_INET6;
+static socklen_t addr_len = sizeof(struct sockaddr_in6);
+static int port = 4974;
+
+static void setup_loopback_addr(int family, struct sockaddr_storage *sockaddr)
+{
+	struct sockaddr_in6 *addr6 = (void *) sockaddr;
+	struct sockaddr_in *addr4 = (void *) sockaddr;
+
+	switch (family) {
+	case PF_INET:
+		memset(addr4, 0, sizeof(*addr4));
+		addr4->sin_family = AF_INET;
+		addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+		addr4->sin_port = htons(port);
+		break;
+	case PF_INET6:
+		memset(addr6, 0, sizeof(*addr6));
+		addr6->sin6_family = AF_INET6;
+		addr6->sin6_addr = in6addr_loopback;
+		addr6->sin6_port = htons(port);
+		break;
+	default:
+		error(1, 0, "illegal family");
+	}
+}
+
+void *start_server(void *arg)
+{
+	int server_fd = (int)(unsigned long)arg;
+	struct sockaddr_in addr;
+	socklen_t addrlen = sizeof(addr);
+	char *buf;
+	int fd;
+	int r;
+
+	buf = malloc(BUF_SIZE);
+
+	for (;;) {
+		fd = accept(server_fd, (struct sockaddr *)&addr, &addrlen);
+		if (fd == -1) {
+			perror("accept");
+			break;
+		}
+		do {
+			r = send(fd, buf, BUF_SIZE, 0);
+		} while (r < 0 && errno == EINTR);
+		if (r < 0)
+			perror("send");
+		if (r != BUF_SIZE)
+			fprintf(stderr, "can only send %d bytes\n", r);
+		/* TCP_INQ can overestimate in-queue by one byte if we send
+		 * the FIN packet. Sleep for 1 second, so that the client
+		 * likely invoked recvmsg().
+		 */
+		sleep(1);
+		close(fd);
+	}
+
+	free(buf);
+	close(server_fd);
+	pthread_exit(0);
+}
+
+int main(int argc, char *argv[])
+{
+	struct sockaddr_storage listen_addr, addr;
+	int c, one = 1, inq = -1;
+	pthread_t server_thread;
+	char cmsgbuf[CMSG_SIZE];
+	struct iovec iov[1];
+	struct cmsghdr *cm;
+	struct msghdr msg;
+	int server_fd, fd;
+	char *buf;
+
+	while ((c = getopt(argc, argv, "46p:")) != -1) {
+		switch (c) {
+		case '4':
+			family = PF_INET;
+			addr_len = sizeof(struct sockaddr_in);
+			break;
+		case '6':
+			family = PF_INET6;
+			addr_len = sizeof(struct sockaddr_in6);
+			break;
+		case 'p':
+			port = atoi(optarg);
+			break;
+		}
+	}
+
+	server_fd = socket(family, SOCK_STREAM, 0);
+	if (server_fd < 0)
+		error(1, errno, "server socket");
+	setup_loopback_addr(family, &listen_addr);
+	if (setsockopt(server_fd, SOL_SOCKET, SO_REUSEADDR,
+		       &one, sizeof(one)) != 0)
+		error(1, errno, "setsockopt(SO_REUSEADDR)");
+	if (bind(server_fd, (const struct sockaddr *)&listen_addr,
+		 addr_len) == -1)
+		error(1, errno, "bind");
+	if (listen(server_fd, 128) == -1)
+		error(1, errno, "listen");
+	if (pthread_create(&server_thread, NULL, start_server,
+			   (void *)(unsigned long)server_fd) != 0)
+		error(1, errno, "pthread_create");
+
+	fd = socket(family, SOCK_STREAM, 0);
+	if (fd < 0)
+		error(1, errno, "client socket");
+	setup_loopback_addr(family, &addr);
+	if (connect(fd, (const struct sockaddr *)&addr, addr_len) == -1)
+		error(1, errno, "connect");
+	if (setsockopt(fd, SOL_TCP, TCP_INQ, &one, sizeof(one)) != 0)
+		error(1, errno, "setsockopt(TCP_INQ)");
+
+	msg.msg_name = NULL;
+	msg.msg_namelen = 0;
+	msg.msg_iov = iov;
+	msg.msg_iovlen = 1;
+	msg.msg_control = cmsgbuf;
+	msg.msg_controllen = sizeof(cmsgbuf);
+	msg.msg_flags = 0;
+
+	buf = malloc(BUF_SIZE);
+	iov[0].iov_base = buf;
+	iov[0].iov_len = BUF_SIZE / 2;
+
+	if (recvmsg(fd, &msg, 0) != iov[0].iov_len)
+		error(1, errno, "recvmsg");
+	if (msg.msg_flags & MSG_CTRUNC)
+		error(1, 0, "control message is truncated");
+
+	for (cm = CMSG_FIRSTHDR(&msg); cm; cm = CMSG_NXTHDR(&msg, cm))
+		if (cm->cmsg_level == SOL_TCP && cm->cmsg_type == TCP_CM_INQ)
+			inq = *((int *) CMSG_DATA(cm));
+
+	if (inq != BUF_SIZE - iov[0].iov_len) {
+		fprintf(stderr, "unexpected inq: %d\n", inq);
+		exit(1);
+	}
+
+	printf("PASSED\n");
+	free(buf);
+	close(fd);
+	return 0;
+}
diff --git a/tools/testing/selftests/net/tcp_mmap.c b/tools/testing/selftests/net/tcp_mmap.c
new file mode 100644
index 000000000000..77f762780199
--- /dev/null
+++ b/tools/testing/selftests/net/tcp_mmap.c
@@ -0,0 +1,447 @@
+/*
+ * Copyright 2018 Google Inc.
+ * Author: Eric Dumazet (edumazet@google.com)
+ *
+ * Reference program demonstrating tcp mmap() usage,
+ * and SO_RCVLOWAT hints for receiver.
+ *
+ * Note : NIC with header split is needed to use mmap() on TCP :
+ * Each incoming frame must be a multiple of PAGE_SIZE bytes of TCP payload.
+ *
+ * How to use on loopback interface :
+ *
+ *  ifconfig lo mtu 61512  # 15*4096 + 40 (ipv6 header) + 32 (TCP with TS option header)
+ *  tcp_mmap -s -z &
+ *  tcp_mmap -H ::1 -z
+ *
+ *  Or leave default lo mtu, but use -M option to set TCP_MAXSEG option to (4096 + 12)
+ *      (4096 : page size on x86, 12: TCP TS option length)
+ *  tcp_mmap -s -z -M $((4096+12)) &
+ *  tcp_mmap -H ::1 -z -M $((4096+12))
+ *
+ * Note: -z option on sender uses MSG_ZEROCOPY, which forces a copy when packets go through loopback interface.
+ *       We might use sendfile() instead, but really this test program is about mmap(), for receivers ;)
+ *
+ * $ ./tcp_mmap -s &                                 # Without mmap()
+ * $ for i in {1..4}; do ./tcp_mmap -H ::1 -z ; done
+ * received 32768 MB (0 % mmap'ed) in 14.1157 s, 19.4732 Gbit
+ *   cpu usage user:0.057 sys:7.815, 240.234 usec per MB, 65531 c-switches
+ * received 32768 MB (0 % mmap'ed) in 14.6833 s, 18.7204 Gbit
+ *  cpu usage user:0.043 sys:8.103, 248.596 usec per MB, 65524 c-switches
+ * received 32768 MB (0 % mmap'ed) in 11.143 s, 24.6682 Gbit
+ *   cpu usage user:0.044 sys:6.576, 202.026 usec per MB, 65519 c-switches
+ * received 32768 MB (0 % mmap'ed) in 14.9056 s, 18.4413 Gbit
+ *   cpu usage user:0.036 sys:8.193, 251.129 usec per MB, 65530 c-switches
+ * $ kill %1   # kill tcp_mmap server
+ *
+ * $ ./tcp_mmap -s -z &                              # With mmap()
+ * $ for i in {1..4}; do ./tcp_mmap -H ::1 -z ; done
+ * received 32768 MB (99.9939 % mmap'ed) in 6.73792 s, 40.7956 Gbit
+ *   cpu usage user:0.045 sys:2.827, 87.6465 usec per MB, 65532 c-switches
+ * received 32768 MB (99.9939 % mmap'ed) in 7.26732 s, 37.8238 Gbit
+ *   cpu usage user:0.037 sys:3.087, 95.3369 usec per MB, 65532 c-switches
+ * received 32768 MB (99.9939 % mmap'ed) in 7.61661 s, 36.0893 Gbit
+ *   cpu usage user:0.046 sys:3.559, 110.016 usec per MB, 65529 c-switches
+ * received 32768 MB (99.9939 % mmap'ed) in 7.43764 s, 36.9577 Gbit
+ *   cpu usage user:0.035 sys:3.467, 106.873 usec per MB, 65530 c-switches
+ *
+ * License (GPLv2):
+ *
+ * This program is free software; you can redistribute it and/or modify it
+ * under the terms and conditions of the GNU General Public License,
+ * version 2, as published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope it will be useful, but WITHOUT
+ * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+ * FITNESS FOR A PARTICULAR PURPOSE. * See the GNU General Public License for
+ * more details.
+ *
+ * You should have received a copy of the GNU General Public License along with
+ * this program; if not, write to the Free Software Foundation, Inc.,
+ * 51 Franklin St - Fifth Floor, Boston, MA 02110-1301 USA.
+ */
+#define _GNU_SOURCE
+#include <pthread.h>
+#include <sys/types.h>
+#include <fcntl.h>
+#include <error.h>
+#include <sys/socket.h>
+#include <sys/mman.h>
+#include <sys/resource.h>
+#include <unistd.h>
+#include <string.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <time.h>
+#include <sys/time.h>
+#include <netinet/in.h>
+#include <arpa/inet.h>
+#include <poll.h>
+#include <linux/tcp.h>
+#include <assert.h>
+
+#ifndef MSG_ZEROCOPY
+#define MSG_ZEROCOPY    0x4000000
+#endif
+
+#define FILE_SZ (1UL << 35)
+static int cfg_family = AF_INET6;
+static socklen_t cfg_alen = sizeof(struct sockaddr_in6);
+static int cfg_port = 8787;
+
+static int rcvbuf; /* Default: autotuning.  Can be set with -r <integer> option */
+static int sndbuf; /* Default: autotuning.  Can be set with -w <integer> option */
+static int zflg; /* zero copy option. (MSG_ZEROCOPY for sender, mmap() for receiver */
+static int xflg; /* hash received data (simple xor) (-h option) */
+static int keepflag; /* -k option: receiver shall keep all received file in memory (no munmap() calls) */
+
+static int chunk_size  = 512*1024;
+
+unsigned long htotal;
+
+static inline void prefetch(const void *x)
+{
+#if defined(__x86_64__)
+	asm volatile("prefetcht0 %P0" : : "m" (*(const char *)x));
+#endif
+}
+
+void hash_zone(void *zone, unsigned int length)
+{
+	unsigned long temp = htotal;
+
+	while (length >= 8*sizeof(long)) {
+		prefetch(zone + 384);
+		temp ^= *(unsigned long *)zone;
+		temp ^= *(unsigned long *)(zone + sizeof(long));
+		temp ^= *(unsigned long *)(zone + 2*sizeof(long));
+		temp ^= *(unsigned long *)(zone + 3*sizeof(long));
+		temp ^= *(unsigned long *)(zone + 4*sizeof(long));
+		temp ^= *(unsigned long *)(zone + 5*sizeof(long));
+		temp ^= *(unsigned long *)(zone + 6*sizeof(long));
+		temp ^= *(unsigned long *)(zone + 7*sizeof(long));
+		zone += 8*sizeof(long);
+		length -= 8*sizeof(long);
+	}
+	while (length >= 1) {
+		temp ^= *(unsigned char *)zone;
+		zone += 1;
+		length--;
+	}
+	htotal = temp;
+}
+
+void *child_thread(void *arg)
+{
+	unsigned long total_mmap = 0, total = 0;
+	struct tcp_zerocopy_receive zc;
+	unsigned long delta_usec;
+	int flags = MAP_SHARED;
+	struct timeval t0, t1;
+	char *buffer = NULL;
+	void *addr = NULL;
+	double throughput;
+	struct rusage ru;
+	int lu, fd;
+
+	fd = (int)(unsigned long)arg;
+
+	gettimeofday(&t0, NULL);
+
+	fcntl(fd, F_SETFL, O_NDELAY);
+	buffer = malloc(chunk_size);
+	if (!buffer) {
+		perror("malloc");
+		goto error;
+	}
+	if (zflg) {
+		addr = mmap(NULL, chunk_size, PROT_READ, flags, fd, 0);
+		if (addr == (void *)-1)
+			zflg = 0;
+	}
+	while (1) {
+		struct pollfd pfd = { .fd = fd, .events = POLLIN, };
+		int sub;
+
+		poll(&pfd, 1, 10000);
+		if (zflg) {
+			socklen_t zc_len = sizeof(zc);
+			int res;
+
+			zc.address = (__u64)addr;
+			zc.length = chunk_size;
+			zc.recv_skip_hint = 0;
+			res = getsockopt(fd, IPPROTO_TCP, TCP_ZEROCOPY_RECEIVE,
+					 &zc, &zc_len);
+			if (res == -1)
+				break;
+
+			if (zc.length) {
+				assert(zc.length <= chunk_size);
+				total_mmap += zc.length;
+				if (xflg)
+					hash_zone(addr, zc.length);
+				total += zc.length;
+			}
+			if (zc.recv_skip_hint) {
+				assert(zc.recv_skip_hint <= chunk_size);
+				lu = read(fd, buffer, zc.recv_skip_hint);
+				if (lu > 0) {
+					if (xflg)
+						hash_zone(buffer, lu);
+					total += lu;
+				}
+			}
+			continue;
+		}
+		sub = 0;
+		while (sub < chunk_size) {
+			lu = read(fd, buffer + sub, chunk_size - sub);
+			if (lu == 0)
+				goto end;
+			if (lu < 0)
+				break;
+			if (xflg)
+				hash_zone(buffer + sub, lu);
+			total += lu;
+			sub += lu;
+		}
+	}
+end:
+	gettimeofday(&t1, NULL);
+	delta_usec = (t1.tv_sec - t0.tv_sec) * 1000000 + t1.tv_usec - t0.tv_usec;
+
+	throughput = 0;
+	if (delta_usec)
+		throughput = total * 8.0 / (double)delta_usec / 1000.0;
+	getrusage(RUSAGE_THREAD, &ru);
+	if (total > 1024*1024) {
+		unsigned long total_usec;
+		unsigned long mb = total >> 20;
+		total_usec = 1000000*ru.ru_utime.tv_sec + ru.ru_utime.tv_usec +
+			     1000000*ru.ru_stime.tv_sec + ru.ru_stime.tv_usec;
+		printf("received %lg MB (%lg %% mmap'ed) in %lg s, %lg Gbit\n"
+		       "  cpu usage user:%lg sys:%lg, %lg usec per MB, %lu c-switches\n",
+				total / (1024.0 * 1024.0),
+				100.0*total_mmap/total,
+				(double)delta_usec / 1000000.0,
+				throughput,
+				(double)ru.ru_utime.tv_sec + (double)ru.ru_utime.tv_usec / 1000000.0,
+				(double)ru.ru_stime.tv_sec + (double)ru.ru_stime.tv_usec / 1000000.0,
+				(double)total_usec/mb,
+				ru.ru_nvcsw);
+	}
+error:
+	free(buffer);
+	close(fd);
+	if (zflg)
+		munmap(addr, chunk_size);
+	pthread_exit(0);
+}
+
+static void apply_rcvsnd_buf(int fd)
+{
+	if (rcvbuf && setsockopt(fd, SOL_SOCKET,
+				 SO_RCVBUF, &rcvbuf, sizeof(rcvbuf)) == -1) {
+		perror("setsockopt SO_RCVBUF");
+	}
+
+	if (sndbuf && setsockopt(fd, SOL_SOCKET,
+				 SO_SNDBUF, &sndbuf, sizeof(sndbuf)) == -1) {
+		perror("setsockopt SO_SNDBUF");
+	}
+}
+
+
+static void setup_sockaddr(int domain, const char *str_addr,
+			   struct sockaddr_storage *sockaddr)
+{
+	struct sockaddr_in6 *addr6 = (void *) sockaddr;
+	struct sockaddr_in *addr4 = (void *) sockaddr;
+
+	switch (domain) {
+	case PF_INET:
+		memset(addr4, 0, sizeof(*addr4));
+		addr4->sin_family = AF_INET;
+		addr4->sin_port = htons(cfg_port);
+		if (str_addr &&
+		    inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1)
+			error(1, 0, "ipv4 parse error: %s", str_addr);
+		break;
+	case PF_INET6:
+		memset(addr6, 0, sizeof(*addr6));
+		addr6->sin6_family = AF_INET6;
+		addr6->sin6_port = htons(cfg_port);
+		if (str_addr &&
+		    inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1)
+			error(1, 0, "ipv6 parse error: %s", str_addr);
+		break;
+	default:
+		error(1, 0, "illegal domain");
+	}
+}
+
+static void do_accept(int fdlisten)
+{
+	if (setsockopt(fdlisten, SOL_SOCKET, SO_RCVLOWAT,
+		       &chunk_size, sizeof(chunk_size)) == -1) {
+		perror("setsockopt SO_RCVLOWAT");
+	}
+
+	apply_rcvsnd_buf(fdlisten);
+
+	while (1) {
+		struct sockaddr_in addr;
+		socklen_t addrlen = sizeof(addr);
+		pthread_t th;
+		int fd, res;
+
+		fd = accept(fdlisten, (struct sockaddr *)&addr, &addrlen);
+		if (fd == -1) {
+			perror("accept");
+			continue;
+		}
+		res = pthread_create(&th, NULL, child_thread,
+				     (void *)(unsigned long)fd);
+		if (res) {
+			errno = res;
+			perror("pthread_create");
+			close(fd);
+		}
+	}
+}
+
+int main(int argc, char *argv[])
+{
+	struct sockaddr_storage listenaddr, addr;
+	unsigned int max_pacing_rate = 0;
+	unsigned long total = 0;
+	char *host = NULL;
+	int fd, c, on = 1;
+	char *buffer;
+	int sflg = 0;
+	int mss = 0;
+
+	while ((c = getopt(argc, argv, "46p:svr:w:H:zxkP:M:")) != -1) {
+		switch (c) {
+		case '4':
+			cfg_family = PF_INET;
+			cfg_alen = sizeof(struct sockaddr_in);
+			break;
+		case '6':
+			cfg_family = PF_INET6;
+			cfg_alen = sizeof(struct sockaddr_in6);
+			break;
+		case 'p':
+			cfg_port = atoi(optarg);
+			break;
+		case 'H':
+			host = optarg;
+			break;
+		case 's': /* server : listen for incoming connections */
+			sflg++;
+			break;
+		case 'r':
+			rcvbuf = atoi(optarg);
+			break;
+		case 'w':
+			sndbuf = atoi(optarg);
+			break;
+		case 'z':
+			zflg = 1;
+			break;
+		case 'M':
+			mss = atoi(optarg);
+			break;
+		case 'x':
+			xflg = 1;
+			break;
+		case 'k':
+			keepflag = 1;
+			break;
+		case 'P':
+			max_pacing_rate = atoi(optarg) ;
+			break;
+		default:
+			exit(1);
+		}
+	}
+	if (sflg) {
+		int fdlisten = socket(cfg_family, SOCK_STREAM, 0);
+
+		if (fdlisten == -1) {
+			perror("socket");
+			exit(1);
+		}
+		apply_rcvsnd_buf(fdlisten);
+		setsockopt(fdlisten, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on));
+
+		setup_sockaddr(cfg_family, host, &listenaddr);
+
+		if (mss &&
+		    setsockopt(fdlisten, IPPROTO_TCP, TCP_MAXSEG,
+			       &mss, sizeof(mss)) == -1) {
+			perror("setsockopt TCP_MAXSEG");
+			exit(1);
+		}
+		if (bind(fdlisten, (const struct sockaddr *)&listenaddr, cfg_alen) == -1) {
+			perror("bind");
+			exit(1);
+		}
+		if (listen(fdlisten, 128) == -1) {
+			perror("listen");
+			exit(1);
+		}
+		do_accept(fdlisten);
+	}
+	buffer = mmap(NULL, chunk_size, PROT_READ | PROT_WRITE,
+			      MAP_PRIVATE | MAP_ANONYMOUS, -1, 0);
+	if (buffer == (char *)-1) {
+		perror("mmap");
+		exit(1);
+	}
+
+	fd = socket(AF_INET6, SOCK_STREAM, 0);
+	if (fd == -1) {
+		perror("socket");
+		exit(1);
+	}
+	apply_rcvsnd_buf(fd);
+
+	setup_sockaddr(cfg_family, host, &addr);
+
+	if (mss &&
+	    setsockopt(fd, IPPROTO_TCP, TCP_MAXSEG, &mss, sizeof(mss)) == -1) {
+		perror("setsockopt TCP_MAXSEG");
+		exit(1);
+	}
+	if (connect(fd, (const struct sockaddr *)&addr, cfg_alen) == -1) {
+		perror("connect");
+		exit(1);
+	}
+	if (max_pacing_rate &&
+	    setsockopt(fd, SOL_SOCKET, SO_MAX_PACING_RATE,
+		       &max_pacing_rate, sizeof(max_pacing_rate)) == -1)
+		perror("setsockopt SO_MAX_PACING_RATE");
+
+	if (zflg && setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY,
+			       &on, sizeof(on)) == -1) {
+		perror("setsockopt SO_ZEROCOPY, (-z option disabled)");
+		zflg = 0;
+	}
+	while (total < FILE_SZ) {
+		long wr = FILE_SZ - total;
+
+		if (wr > chunk_size)
+			wr = chunk_size;
+		/* Note : we just want to fill the pipe with 0 bytes */
+		wr = send(fd, buffer, wr, zflg ? MSG_ZEROCOPY : 0);
+		if (wr <= 0)
+			break;
+		total += wr;
+	}
+	close(fd);
+	munmap(buffer, chunk_size);
+	return 0;
+}
diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c
new file mode 100644
index 000000000000..e279051bc631
--- /dev/null
+++ b/tools/testing/selftests/net/udpgso.c
@@ -0,0 +1,693 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <stddef.h>
+#include <arpa/inet.h>
+#include <error.h>
+#include <errno.h>
+#include <net/if.h>
+#include <linux/in.h>
+#include <linux/netlink.h>
+#include <linux/rtnetlink.h>
+#include <netinet/if_ether.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/udp.h>
+#include <stdbool.h>
+#include <stdlib.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#ifndef ETH_MAX_MTU
+#define ETH_MAX_MTU	0xFFFFU
+#endif
+
+#ifndef UDP_SEGMENT
+#define UDP_SEGMENT		103
+#endif
+
+#ifndef UDP_MAX_SEGMENTS
+#define UDP_MAX_SEGMENTS	(1 << 6UL)
+#endif
+
+#define CONST_MTU_TEST	1500
+
+#define CONST_HDRLEN_V4		(sizeof(struct iphdr) + sizeof(struct udphdr))
+#define CONST_HDRLEN_V6		(sizeof(struct ip6_hdr) + sizeof(struct udphdr))
+
+#define CONST_MSS_V4		(CONST_MTU_TEST - CONST_HDRLEN_V4)
+#define CONST_MSS_V6		(CONST_MTU_TEST - CONST_HDRLEN_V6)
+
+#define CONST_MAX_SEGS_V4	(ETH_MAX_MTU / CONST_MSS_V4)
+#define CONST_MAX_SEGS_V6	(ETH_MAX_MTU / CONST_MSS_V6)
+
+static bool		cfg_do_ipv4;
+static bool		cfg_do_ipv6;
+static bool		cfg_do_connected;
+static bool		cfg_do_connectionless;
+static bool		cfg_do_msgmore;
+static bool		cfg_do_setsockopt;
+static int		cfg_specific_test_id = -1;
+
+static const char	cfg_ifname[] = "lo";
+static unsigned short	cfg_port = 9000;
+
+static char buf[ETH_MAX_MTU];
+
+struct testcase {
+	int tlen;		/* send() buffer size, may exceed mss */
+	bool tfail;		/* send() call is expected to fail */
+	int gso_len;		/* mss after applying gso */
+	int r_num_mss;		/* recv(): number of calls of full mss */
+	int r_len_last;		/* recv(): size of last non-mss dgram, if any */
+};
+
+const struct in6_addr addr6 = IN6ADDR_LOOPBACK_INIT;
+const struct in_addr addr4 = { .s_addr = __constant_htonl(INADDR_LOOPBACK + 2) };
+
+struct testcase testcases_v4[] = {
+	{
+		/* no GSO: send a single byte */
+		.tlen = 1,
+		.r_len_last = 1,
+	},
+	{
+		/* no GSO: send a single MSS */
+		.tlen = CONST_MSS_V4,
+		.r_num_mss = 1,
+	},
+	{
+		/* no GSO: send a single MSS + 1B: fail */
+		.tlen = CONST_MSS_V4 + 1,
+		.tfail = true,
+	},
+	{
+		/* send a single MSS: will fail with GSO, because the segment
+		 * logic in udp4_ufo_fragment demands a gso skb to be > MTU
+		 */
+		.tlen = CONST_MSS_V4,
+		.gso_len = CONST_MSS_V4,
+		.tfail = true,
+		.r_num_mss = 1,
+	},
+	{
+		/* send a single MSS + 1B */
+		.tlen = CONST_MSS_V4 + 1,
+		.gso_len = CONST_MSS_V4,
+		.r_num_mss = 1,
+		.r_len_last = 1,
+	},
+	{
+		/* send exactly 2 MSS */
+		.tlen = CONST_MSS_V4 * 2,
+		.gso_len = CONST_MSS_V4,
+		.r_num_mss = 2,
+	},
+	{
+		/* send 2 MSS + 1B */
+		.tlen = (CONST_MSS_V4 * 2) + 1,
+		.gso_len = CONST_MSS_V4,
+		.r_num_mss = 2,
+		.r_len_last = 1,
+	},
+	{
+		/* send MAX segs */
+		.tlen = (ETH_MAX_MTU / CONST_MSS_V4) * CONST_MSS_V4,
+		.gso_len = CONST_MSS_V4,
+		.r_num_mss = (ETH_MAX_MTU / CONST_MSS_V4),
+	},
+
+	{
+		/* send MAX bytes */
+		.tlen = ETH_MAX_MTU - CONST_HDRLEN_V4,
+		.gso_len = CONST_MSS_V4,
+		.r_num_mss = CONST_MAX_SEGS_V4,
+		.r_len_last = ETH_MAX_MTU - CONST_HDRLEN_V4 -
+			      (CONST_MAX_SEGS_V4 * CONST_MSS_V4),
+	},
+	{
+		/* send MAX + 1: fail */
+		.tlen = ETH_MAX_MTU - CONST_HDRLEN_V4 + 1,
+		.gso_len = CONST_MSS_V4,
+		.tfail = true,
+	},
+	{
+		/* send a single 1B MSS: will fail, see single MSS above */
+		.tlen = 1,
+		.gso_len = 1,
+		.tfail = true,
+		.r_num_mss = 1,
+	},
+	{
+		/* send 2 1B segments */
+		.tlen = 2,
+		.gso_len = 1,
+		.r_num_mss = 2,
+	},
+	{
+		/* send 2B + 2B + 1B segments */
+		.tlen = 5,
+		.gso_len = 2,
+		.r_num_mss = 2,
+		.r_len_last = 1,
+	},
+	{
+		/* send max number of min sized segments */
+		.tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4,
+		.gso_len = 1,
+		.r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4,
+	},
+	{
+		/* send max number + 1 of min sized segments: fail */
+		.tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V4 + 1,
+		.gso_len = 1,
+		.tfail = true,
+	},
+	{
+		/* EOL */
+	}
+};
+
+#ifndef IP6_MAX_MTU
+#define IP6_MAX_MTU	(ETH_MAX_MTU + sizeof(struct ip6_hdr))
+#endif
+
+struct testcase testcases_v6[] = {
+	{
+		/* no GSO: send a single byte */
+		.tlen = 1,
+		.r_len_last = 1,
+	},
+	{
+		/* no GSO: send a single MSS */
+		.tlen = CONST_MSS_V6,
+		.r_num_mss = 1,
+	},
+	{
+		/* no GSO: send a single MSS + 1B: fail */
+		.tlen = CONST_MSS_V6 + 1,
+		.tfail = true,
+	},
+	{
+		/* send a single MSS: will fail with GSO, because the segment
+		 * logic in udp4_ufo_fragment demands a gso skb to be > MTU
+		 */
+		.tlen = CONST_MSS_V6,
+		.gso_len = CONST_MSS_V6,
+		.tfail = true,
+		.r_num_mss = 1,
+	},
+	{
+		/* send a single MSS + 1B */
+		.tlen = CONST_MSS_V6 + 1,
+		.gso_len = CONST_MSS_V6,
+		.r_num_mss = 1,
+		.r_len_last = 1,
+	},
+	{
+		/* send exactly 2 MSS */
+		.tlen = CONST_MSS_V6 * 2,
+		.gso_len = CONST_MSS_V6,
+		.r_num_mss = 2,
+	},
+	{
+		/* send 2 MSS + 1B */
+		.tlen = (CONST_MSS_V6 * 2) + 1,
+		.gso_len = CONST_MSS_V6,
+		.r_num_mss = 2,
+		.r_len_last = 1,
+	},
+	{
+		/* send MAX segs */
+		.tlen = (IP6_MAX_MTU / CONST_MSS_V6) * CONST_MSS_V6,
+		.gso_len = CONST_MSS_V6,
+		.r_num_mss = (IP6_MAX_MTU / CONST_MSS_V6),
+	},
+
+	{
+		/* send MAX bytes */
+		.tlen = IP6_MAX_MTU - CONST_HDRLEN_V6,
+		.gso_len = CONST_MSS_V6,
+		.r_num_mss = CONST_MAX_SEGS_V6,
+		.r_len_last = IP6_MAX_MTU - CONST_HDRLEN_V6 -
+			      (CONST_MAX_SEGS_V6 * CONST_MSS_V6),
+	},
+	{
+		/* send MAX + 1: fail */
+		.tlen = IP6_MAX_MTU - CONST_HDRLEN_V6 + 1,
+		.gso_len = CONST_MSS_V6,
+		.tfail = true,
+	},
+	{
+		/* send a single 1B MSS: will fail, see single MSS above */
+		.tlen = 1,
+		.gso_len = 1,
+		.tfail = true,
+		.r_num_mss = 1,
+	},
+	{
+		/* send 2 1B segments */
+		.tlen = 2,
+		.gso_len = 1,
+		.r_num_mss = 2,
+	},
+	{
+		/* send 2B + 2B + 1B segments */
+		.tlen = 5,
+		.gso_len = 2,
+		.r_num_mss = 2,
+		.r_len_last = 1,
+	},
+	{
+		/* send max number of min sized segments */
+		.tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6,
+		.gso_len = 1,
+		.r_num_mss = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6,
+	},
+	{
+		/* send max number + 1 of min sized segments: fail */
+		.tlen = UDP_MAX_SEGMENTS - CONST_HDRLEN_V6 + 1,
+		.gso_len = 1,
+		.tfail = true,
+	},
+	{
+		/* EOL */
+	}
+};
+
+static unsigned int get_device_mtu(int fd, const char *ifname)
+{
+	struct ifreq ifr;
+
+	memset(&ifr, 0, sizeof(ifr));
+
+	strcpy(ifr.ifr_name, ifname);
+
+	if (ioctl(fd, SIOCGIFMTU, &ifr))
+		error(1, errno, "ioctl get mtu");
+
+	return ifr.ifr_mtu;
+}
+
+static void __set_device_mtu(int fd, const char *ifname, unsigned int mtu)
+{
+	struct ifreq ifr;
+
+	memset(&ifr, 0, sizeof(ifr));
+
+	ifr.ifr_mtu = mtu;
+	strcpy(ifr.ifr_name, ifname);
+
+	if (ioctl(fd, SIOCSIFMTU, &ifr))
+		error(1, errno, "ioctl set mtu");
+}
+
+static void set_device_mtu(int fd, int mtu)
+{
+	int val;
+
+	val = get_device_mtu(fd, cfg_ifname);
+	fprintf(stderr, "device mtu (orig): %u\n", val);
+
+	__set_device_mtu(fd, cfg_ifname, mtu);
+	val = get_device_mtu(fd, cfg_ifname);
+	if (val != mtu)
+		error(1, 0, "unable to set device mtu to %u\n", val);
+
+	fprintf(stderr, "device mtu (test): %u\n", val);
+}
+
+static void set_pmtu_discover(int fd, bool is_ipv4)
+{
+	int level, name, val;
+
+	if (is_ipv4) {
+		level	= SOL_IP;
+		name	= IP_MTU_DISCOVER;
+		val	= IP_PMTUDISC_DO;
+	} else {
+		level	= SOL_IPV6;
+		name	= IPV6_MTU_DISCOVER;
+		val	= IPV6_PMTUDISC_DO;
+	}
+
+	if (setsockopt(fd, level, name, &val, sizeof(val)))
+		error(1, errno, "setsockopt path mtu");
+}
+
+static unsigned int get_path_mtu(int fd, bool is_ipv4)
+{
+	socklen_t vallen;
+	unsigned int mtu;
+	int ret;
+
+	vallen = sizeof(mtu);
+	if (is_ipv4)
+		ret = getsockopt(fd, SOL_IP, IP_MTU, &mtu, &vallen);
+	else
+		ret = getsockopt(fd, SOL_IPV6, IPV6_MTU, &mtu, &vallen);
+
+	if (ret)
+		error(1, errno, "getsockopt mtu");
+
+
+	fprintf(stderr, "path mtu (read):  %u\n", mtu);
+	return mtu;
+}
+
+/* very wordy version of system("ip route add dev lo mtu 1500 127.0.0.3/32") */
+static void set_route_mtu(int mtu, bool is_ipv4)
+{
+	struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK };
+	struct nlmsghdr *nh;
+	struct rtattr *rta;
+	struct rtmsg *rt;
+	char data[NLMSG_ALIGN(sizeof(*nh)) +
+		  NLMSG_ALIGN(sizeof(*rt)) +
+		  NLMSG_ALIGN(RTA_LENGTH(sizeof(addr6))) +
+		  NLMSG_ALIGN(RTA_LENGTH(sizeof(int))) +
+		  NLMSG_ALIGN(RTA_LENGTH(0) + RTA_LENGTH(sizeof(int)))];
+	int fd, ret, alen, off = 0;
+
+	alen = is_ipv4 ? sizeof(addr4) : sizeof(addr6);
+
+	fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE);
+	if (fd == -1)
+		error(1, errno, "socket netlink");
+
+	memset(data, 0, sizeof(data));
+
+	nh = (void *)data;
+	nh->nlmsg_type = RTM_NEWROUTE;
+	nh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE;
+	off += NLMSG_ALIGN(sizeof(*nh));
+
+	rt = (void *)(data + off);
+	rt->rtm_family = is_ipv4 ? AF_INET : AF_INET6;
+	rt->rtm_table = RT_TABLE_MAIN;
+	rt->rtm_dst_len = alen << 3;
+	rt->rtm_protocol = RTPROT_BOOT;
+	rt->rtm_scope = RT_SCOPE_UNIVERSE;
+	rt->rtm_type = RTN_UNICAST;
+	off += NLMSG_ALIGN(sizeof(*rt));
+
+	rta = (void *)(data + off);
+	rta->rta_type = RTA_DST;
+	rta->rta_len = RTA_LENGTH(alen);
+	if (is_ipv4)
+		memcpy(RTA_DATA(rta), &addr4, alen);
+	else
+		memcpy(RTA_DATA(rta), &addr6, alen);
+	off += NLMSG_ALIGN(rta->rta_len);
+
+	rta = (void *)(data + off);
+	rta->rta_type = RTA_OIF;
+	rta->rta_len = RTA_LENGTH(sizeof(int));
+	*((int *)(RTA_DATA(rta))) = 1; //if_nametoindex("lo");
+	off += NLMSG_ALIGN(rta->rta_len);
+
+	/* MTU is a subtype in a metrics type */
+	rta = (void *)(data + off);
+	rta->rta_type = RTA_METRICS;
+	rta->rta_len = RTA_LENGTH(0) + RTA_LENGTH(sizeof(int));
+	off += NLMSG_ALIGN(rta->rta_len);
+
+	/* now fill MTU subtype. Note that it fits within above rta_len */
+	rta = (void *)(((char *) rta) + RTA_LENGTH(0));
+	rta->rta_type = RTAX_MTU;
+	rta->rta_len = RTA_LENGTH(sizeof(int));
+	*((int *)(RTA_DATA(rta))) = mtu;
+
+	nh->nlmsg_len = off;
+
+	ret = sendto(fd, data, off, 0, (void *)&nladdr, sizeof(nladdr));
+	if (ret != off)
+		error(1, errno, "send netlink: %uB != %uB\n", ret, off);
+
+	if (close(fd))
+		error(1, errno, "close netlink");
+
+	fprintf(stderr, "route mtu (test): %u\n", mtu);
+}
+
+static bool __send_one(int fd, struct msghdr *msg, int flags)
+{
+	int ret;
+
+	ret = sendmsg(fd, msg, flags);
+	if (ret == -1 &&
+	    (errno == EMSGSIZE || errno == ENOMEM || errno == EINVAL))
+		return false;
+	if (ret == -1)
+		error(1, errno, "sendmsg");
+	if (ret != msg->msg_iov->iov_len)
+		error(1, 0, "sendto: %d != %lu", ret, msg->msg_iov->iov_len);
+	if (msg->msg_flags)
+		error(1, 0, "sendmsg: return flags 0x%x\n", msg->msg_flags);
+
+	return true;
+}
+
+static bool send_one(int fd, int len, int gso_len,
+		     struct sockaddr *addr, socklen_t alen)
+{
+	char control[CMSG_SPACE(sizeof(uint16_t))] = {0};
+	struct msghdr msg = {0};
+	struct iovec iov = {0};
+	struct cmsghdr *cm;
+
+	iov.iov_base = buf;
+	iov.iov_len = len;
+
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+
+	msg.msg_name = addr;
+	msg.msg_namelen = alen;
+
+	if (gso_len && !cfg_do_setsockopt) {
+		msg.msg_control = control;
+		msg.msg_controllen = sizeof(control);
+
+		cm = CMSG_FIRSTHDR(&msg);
+		cm->cmsg_level = SOL_UDP;
+		cm->cmsg_type = UDP_SEGMENT;
+		cm->cmsg_len = CMSG_LEN(sizeof(uint16_t));
+		*((uint16_t *) CMSG_DATA(cm)) = gso_len;
+	}
+
+	/* If MSG_MORE, send 1 byte followed by remainder */
+	if (cfg_do_msgmore && len > 1) {
+		iov.iov_len = 1;
+		if (!__send_one(fd, &msg, MSG_MORE))
+			error(1, 0, "send 1B failed");
+
+		iov.iov_base++;
+		iov.iov_len = len - 1;
+	}
+
+	return __send_one(fd, &msg, 0);
+}
+
+static int recv_one(int fd, int flags)
+{
+	int ret;
+
+	ret = recv(fd, buf, sizeof(buf), flags);
+	if (ret == -1 && errno == EAGAIN && (flags & MSG_DONTWAIT))
+		return 0;
+	if (ret == -1)
+		error(1, errno, "recv");
+
+	return ret;
+}
+
+static void run_one(struct testcase *test, int fdt, int fdr,
+		    struct sockaddr *addr, socklen_t alen)
+{
+	int i, ret, val, mss;
+	bool sent;
+
+	fprintf(stderr, "ipv%d tx:%d gso:%d %s\n",
+			addr->sa_family == AF_INET ? 4 : 6,
+			test->tlen, test->gso_len,
+			test->tfail ? "(fail)" : "");
+
+	val = test->gso_len;
+	if (cfg_do_setsockopt) {
+		if (setsockopt(fdt, SOL_UDP, UDP_SEGMENT, &val, sizeof(val)))
+			error(1, errno, "setsockopt udp segment");
+	}
+
+	sent = send_one(fdt, test->tlen, test->gso_len, addr, alen);
+	if (sent && test->tfail)
+		error(1, 0, "send succeeded while expecting failure");
+	if (!sent && !test->tfail)
+		error(1, 0, "send failed while expecting success");
+	if (!sent)
+		return;
+
+	if (test->gso_len)
+		mss = test->gso_len;
+	else
+		mss = addr->sa_family == AF_INET ? CONST_MSS_V4 : CONST_MSS_V6;
+
+
+	/* Recv all full MSS datagrams */
+	for (i = 0; i < test->r_num_mss; i++) {
+		ret = recv_one(fdr, 0);
+		if (ret != mss)
+			error(1, 0, "recv.%d: %d != %d", i, ret, mss);
+	}
+
+	/* Recv the non-full last datagram, if tlen was not a multiple of mss */
+	if (test->r_len_last) {
+		ret = recv_one(fdr, 0);
+		if (ret != test->r_len_last)
+			error(1, 0, "recv.%d: %d != %d (last)",
+			      i, ret, test->r_len_last);
+	}
+
+	/* Verify received all data */
+	ret = recv_one(fdr, MSG_DONTWAIT);
+	if (ret)
+		error(1, 0, "recv: unexpected datagram");
+}
+
+static void run_all(int fdt, int fdr, struct sockaddr *addr, socklen_t alen)
+{
+	struct testcase *tests, *test;
+
+	tests = addr->sa_family == AF_INET ? testcases_v4 : testcases_v6;
+
+	for (test = tests; test->tlen; test++) {
+		/* if a specific test is given, then skip all others */
+		if (cfg_specific_test_id == -1 ||
+		    cfg_specific_test_id == test - tests)
+			run_one(test, fdt, fdr, addr, alen);
+	}
+}
+
+static void run_test(struct sockaddr *addr, socklen_t alen)
+{
+	struct timeval tv = { .tv_usec = 100 * 1000 };
+	int fdr, fdt, val;
+
+	fdr = socket(addr->sa_family, SOCK_DGRAM, 0);
+	if (fdr == -1)
+		error(1, errno, "socket r");
+
+	if (bind(fdr, addr, alen))
+		error(1, errno, "bind");
+
+	/* Have tests fail quickly instead of hang */
+	if (setsockopt(fdr, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)))
+		error(1, errno, "setsockopt rcv timeout");
+
+	fdt = socket(addr->sa_family, SOCK_DGRAM, 0);
+	if (fdt == -1)
+		error(1, errno, "socket t");
+
+	/* Do not fragment these datagrams: only succeed if GSO works */
+	set_pmtu_discover(fdt, addr->sa_family == AF_INET);
+
+	if (cfg_do_connectionless) {
+		set_device_mtu(fdt, CONST_MTU_TEST);
+		run_all(fdt, fdr, addr, alen);
+	}
+
+	if (cfg_do_connected) {
+		set_device_mtu(fdt, CONST_MTU_TEST + 100);
+		set_route_mtu(CONST_MTU_TEST, addr->sa_family == AF_INET);
+
+		if (connect(fdt, addr, alen))
+			error(1, errno, "connect");
+
+		val = get_path_mtu(fdt, addr->sa_family == AF_INET);
+		if (val != CONST_MTU_TEST)
+			error(1, 0, "bad path mtu %u\n", val);
+
+		run_all(fdt, fdr, addr, 0 /* use connected addr */);
+	}
+
+	if (close(fdt))
+		error(1, errno, "close t");
+	if (close(fdr))
+		error(1, errno, "close r");
+}
+
+static void run_test_v4(void)
+{
+	struct sockaddr_in addr = {0};
+
+	addr.sin_family = AF_INET;
+	addr.sin_port = htons(cfg_port);
+	addr.sin_addr = addr4;
+
+	run_test((void *)&addr, sizeof(addr));
+}
+
+static void run_test_v6(void)
+{
+	struct sockaddr_in6 addr = {0};
+
+	addr.sin6_family = AF_INET6;
+	addr.sin6_port = htons(cfg_port);
+	addr.sin6_addr = addr6;
+
+	run_test((void *)&addr, sizeof(addr));
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	int c;
+
+	while ((c = getopt(argc, argv, "46cCmst:")) != -1) {
+		switch (c) {
+		case '4':
+			cfg_do_ipv4 = true;
+			break;
+		case '6':
+			cfg_do_ipv6 = true;
+			break;
+		case 'c':
+			cfg_do_connected = true;
+			break;
+		case 'C':
+			cfg_do_connectionless = true;
+			break;
+		case 'm':
+			cfg_do_msgmore = true;
+			break;
+		case 's':
+			cfg_do_setsockopt = true;
+			break;
+		case 't':
+			cfg_specific_test_id = strtoul(optarg, NULL, 0);
+			break;
+		default:
+			error(1, 0, "%s: parse error", argv[0]);
+		}
+	}
+}
+
+int main(int argc, char **argv)
+{
+	parse_opts(argc, argv);
+
+	if (cfg_do_ipv4)
+		run_test_v4();
+	if (cfg_do_ipv6)
+		run_test_v6();
+
+	fprintf(stderr, "OK\n");
+	return 0;
+}
diff --git a/tools/testing/selftests/net/udpgso.sh b/tools/testing/selftests/net/udpgso.sh
new file mode 100755
index 000000000000..fec24f584fe9
--- /dev/null
+++ b/tools/testing/selftests/net/udpgso.sh
@@ -0,0 +1,29 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run a series of udpgso regression tests
+
+echo "ipv4 cmsg"
+./in_netns.sh ./udpgso -4 -C
+
+echo "ipv4 setsockopt"
+./in_netns.sh ./udpgso -4 -C -s
+
+echo "ipv6 cmsg"
+./in_netns.sh ./udpgso -6 -C
+
+echo "ipv6 setsockopt"
+./in_netns.sh ./udpgso -6 -C -s
+
+echo "ipv4 connected"
+./in_netns.sh ./udpgso -4 -c
+
+# blocked on 2nd loopback address
+# echo "ipv6 connected"
+# ./in_netns.sh ./udpgso -6 -c
+
+echo "ipv4 msg_more"
+./in_netns.sh ./udpgso -4 -C -m
+
+echo "ipv6 msg_more"
+./in_netns.sh ./udpgso -6 -C -m
diff --git a/tools/testing/selftests/net/udpgso_bench.sh b/tools/testing/selftests/net/udpgso_bench.sh
new file mode 100755
index 000000000000..792fa4d0285e
--- /dev/null
+++ b/tools/testing/selftests/net/udpgso_bench.sh
@@ -0,0 +1,74 @@
+#!/bin/sh
+# SPDX-License-Identifier: GPL-2.0
+#
+# Run a series of udpgso benchmarks
+
+wake_children() {
+	local -r jobs="$(jobs -p)"
+
+	if [[ "${jobs}" != "" ]]; then
+		kill -1 ${jobs} 2>/dev/null
+	fi
+}
+trap wake_children EXIT
+
+run_one() {
+	local -r args=$@
+
+	./udpgso_bench_rx &
+	./udpgso_bench_rx -t &
+
+	./udpgso_bench_tx ${args}
+}
+
+run_in_netns() {
+	local -r args=$@
+
+	./in_netns.sh $0 __subprocess ${args}
+}
+
+run_udp() {
+	local -r args=$@
+
+	echo "udp"
+	run_in_netns ${args}
+
+	echo "udp gso"
+	run_in_netns ${args} -S
+
+	echo "udp gso zerocopy"
+	run_in_netns ${args} -S -z
+}
+
+run_tcp() {
+	local -r args=$@
+
+	echo "tcp"
+	run_in_netns ${args} -t
+
+	echo "tcp zerocopy"
+	run_in_netns ${args} -t -z
+}
+
+run_all() {
+	local -r core_args="-l 4"
+	local -r ipv4_args="${core_args} -4 -D 127.0.0.1"
+	local -r ipv6_args="${core_args} -6 -D ::1"
+
+	echo "ipv4"
+	run_tcp "${ipv4_args}"
+	run_udp "${ipv4_args}"
+
+	echo "ipv6"
+	run_tcp "${ipv4_args}"
+	run_udp "${ipv6_args}"
+}
+
+if [[ $# -eq 0 ]]; then
+	run_all
+elif [[ $1 == "__subprocess" ]]; then
+	shift
+	run_one $@
+else
+	run_in_netns $@
+fi
diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c b/tools/testing/selftests/net/udpgso_bench_rx.c
new file mode 100644
index 000000000000..727cf67a3f75
--- /dev/null
+++ b/tools/testing/selftests/net/udpgso_bench_rx.c
@@ -0,0 +1,265 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <error.h>
+#include <errno.h>
+#include <limits.h>
+#include <linux/errqueue.h>
+#include <linux/if_packet.h>
+#include <linux/socket.h>
+#include <linux/sockios.h>
+#include <net/ethernet.h>
+#include <net/if.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/tcp.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <sched.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdint.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/ioctl.h>
+#include <sys/socket.h>
+#include <sys/stat.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <sys/wait.h>
+#include <unistd.h>
+
+static int  cfg_port		= 8000;
+static bool cfg_tcp;
+static bool cfg_verify;
+
+static bool interrupted;
+static unsigned long packets, bytes;
+
+static void sigint_handler(int signum)
+{
+	if (signum == SIGINT)
+		interrupted = true;
+}
+
+static unsigned long gettimeofday_ms(void)
+{
+	struct timeval tv;
+
+	gettimeofday(&tv, NULL);
+	return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
+}
+
+static void do_poll(int fd)
+{
+	struct pollfd pfd;
+	int ret;
+
+	pfd.events = POLLIN;
+	pfd.revents = 0;
+	pfd.fd = fd;
+
+	do {
+		ret = poll(&pfd, 1, 10);
+		if (ret == -1)
+			error(1, errno, "poll");
+		if (ret == 0)
+			continue;
+		if (pfd.revents != POLLIN)
+			error(1, errno, "poll: 0x%x expected 0x%x\n",
+					pfd.revents, POLLIN);
+	} while (!ret && !interrupted);
+}
+
+static int do_socket(bool do_tcp)
+{
+	struct sockaddr_in6 addr = {0};
+	int fd, val;
+
+	fd = socket(PF_INET6, cfg_tcp ? SOCK_STREAM : SOCK_DGRAM, 0);
+	if (fd == -1)
+		error(1, errno, "socket");
+
+	val = 1 << 21;
+	if (setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &val, sizeof(val)))
+		error(1, errno, "setsockopt rcvbuf");
+	val = 1;
+	if (setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &val, sizeof(val)))
+		error(1, errno, "setsockopt reuseport");
+
+	addr.sin6_family =	PF_INET6;
+	addr.sin6_port =	htons(cfg_port);
+	addr.sin6_addr =	in6addr_any;
+	if (bind(fd, (void *) &addr, sizeof(addr)))
+		error(1, errno, "bind");
+
+	if (do_tcp) {
+		int accept_fd = fd;
+
+		if (listen(accept_fd, 1))
+			error(1, errno, "listen");
+
+		do_poll(accept_fd);
+
+		fd = accept(accept_fd, NULL, NULL);
+		if (fd == -1)
+			error(1, errno, "accept");
+		if (close(accept_fd))
+			error(1, errno, "close accept fd");
+	}
+
+	return fd;
+}
+
+/* Flush all outstanding bytes for the tcp receive queue */
+static void do_flush_tcp(int fd)
+{
+	int ret;
+
+	while (true) {
+		/* MSG_TRUNC flushes up to len bytes */
+		ret = recv(fd, NULL, 1 << 21, MSG_TRUNC | MSG_DONTWAIT);
+		if (ret == -1 && errno == EAGAIN)
+			return;
+		if (ret == -1)
+			error(1, errno, "flush");
+		if (ret == 0) {
+			/* client detached */
+			exit(0);
+		}
+
+		packets++;
+		bytes += ret;
+	}
+
+}
+
+static char sanitized_char(char val)
+{
+	return (val >= 'a' && val <= 'z') ? val : '.';
+}
+
+static void do_verify_udp(const char *data, int len)
+{
+	char cur = data[0];
+	int i;
+
+	/* verify contents */
+	if (cur < 'a' || cur > 'z')
+		error(1, 0, "data initial byte out of range");
+
+	for (i = 1; i < len; i++) {
+		if (cur == 'z')
+			cur = 'a';
+		else
+			cur++;
+
+		if (data[i] != cur)
+			error(1, 0, "data[%d]: len %d, %c(%hhu) != %c(%hhu)\n",
+			      i, len,
+			      sanitized_char(data[i]), data[i],
+			      sanitized_char(cur), cur);
+	}
+}
+
+/* Flush all outstanding datagrams. Verify first few bytes of each. */
+static void do_flush_udp(int fd)
+{
+	static char rbuf[ETH_DATA_LEN];
+	int ret, len, budget = 256;
+
+	len = cfg_verify ? sizeof(rbuf) : 0;
+	while (budget--) {
+		/* MSG_TRUNC will make return value full datagram length */
+		ret = recv(fd, rbuf, len, MSG_TRUNC | MSG_DONTWAIT);
+		if (ret == -1 && errno == EAGAIN)
+			return;
+		if (ret == -1)
+			error(1, errno, "recv");
+		if (len) {
+			if (ret == 0)
+				error(1, errno, "recv: 0 byte datagram\n");
+
+			do_verify_udp(rbuf, ret);
+		}
+
+		packets++;
+		bytes += ret;
+	}
+}
+
+static void usage(const char *filepath)
+{
+	error(1, 0, "Usage: %s [-tv] [-p port]", filepath);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	int c;
+
+	while ((c = getopt(argc, argv, "ptv")) != -1) {
+		switch (c) {
+		case 'p':
+			cfg_port = htons(strtoul(optarg, NULL, 0));
+			break;
+		case 't':
+			cfg_tcp = true;
+			break;
+		case 'v':
+			cfg_verify = true;
+			break;
+		}
+	}
+
+	if (optind != argc)
+		usage(argv[0]);
+
+	if (cfg_tcp && cfg_verify)
+		error(1, 0, "TODO: implement verify mode for tcp");
+}
+
+static void do_recv(void)
+{
+	unsigned long tnow, treport;
+	int fd;
+
+	fd = do_socket(cfg_tcp);
+
+	treport = gettimeofday_ms() + 1000;
+	do {
+		do_poll(fd);
+
+		if (cfg_tcp)
+			do_flush_tcp(fd);
+		else
+			do_flush_udp(fd);
+
+		tnow = gettimeofday_ms();
+		if (tnow > treport) {
+			if (packets)
+				fprintf(stderr,
+					"%s rx: %6lu MB/s %8lu calls/s\n",
+					cfg_tcp ? "tcp" : "udp",
+					bytes >> 20, packets);
+			bytes = packets = 0;
+			treport = tnow + 1000;
+		}
+
+	} while (!interrupted);
+
+	if (close(fd))
+		error(1, errno, "close");
+}
+
+int main(int argc, char **argv)
+{
+	parse_opts(argc, argv);
+
+	signal(SIGINT, sigint_handler);
+
+	do_recv();
+
+	return 0;
+}
diff --git a/tools/testing/selftests/net/udpgso_bench_tx.c b/tools/testing/selftests/net/udpgso_bench_tx.c
new file mode 100644
index 000000000000..e821564053cf
--- /dev/null
+++ b/tools/testing/selftests/net/udpgso_bench_tx.c
@@ -0,0 +1,420 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <errno.h>
+#include <error.h>
+#include <netinet/if_ether.h>
+#include <netinet/in.h>
+#include <netinet/ip.h>
+#include <netinet/ip6.h>
+#include <netinet/udp.h>
+#include <poll.h>
+#include <sched.h>
+#include <signal.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/socket.h>
+#include <sys/time.h>
+#include <sys/types.h>
+#include <unistd.h>
+
+#ifndef ETH_MAX_MTU
+#define ETH_MAX_MTU 0xFFFFU
+#endif
+
+#ifndef UDP_SEGMENT
+#define UDP_SEGMENT		103
+#endif
+
+#ifndef SO_ZEROCOPY
+#define SO_ZEROCOPY	60
+#endif
+
+#ifndef MSG_ZEROCOPY
+#define MSG_ZEROCOPY	0x4000000
+#endif
+
+#define NUM_PKT		100
+
+static bool	cfg_cache_trash;
+static int	cfg_cpu		= -1;
+static int	cfg_connected	= true;
+static int	cfg_family	= PF_UNSPEC;
+static uint16_t	cfg_mss;
+static int	cfg_payload_len	= (1472 * 42);
+static int	cfg_port	= 8000;
+static int	cfg_runtime_ms	= -1;
+static bool	cfg_segment;
+static bool	cfg_sendmmsg;
+static bool	cfg_tcp;
+static bool	cfg_zerocopy;
+
+static socklen_t cfg_alen;
+static struct sockaddr_storage cfg_dst_addr;
+
+static bool interrupted;
+static char buf[NUM_PKT][ETH_MAX_MTU];
+
+static void sigint_handler(int signum)
+{
+	if (signum == SIGINT)
+		interrupted = true;
+}
+
+static unsigned long gettimeofday_ms(void)
+{
+	struct timeval tv;
+
+	gettimeofday(&tv, NULL);
+	return (tv.tv_sec * 1000) + (tv.tv_usec / 1000);
+}
+
+static int set_cpu(int cpu)
+{
+	cpu_set_t mask;
+
+	CPU_ZERO(&mask);
+	CPU_SET(cpu, &mask);
+	if (sched_setaffinity(0, sizeof(mask), &mask))
+		error(1, 0, "setaffinity %d", cpu);
+
+	return 0;
+}
+
+static void setup_sockaddr(int domain, const char *str_addr, void *sockaddr)
+{
+	struct sockaddr_in6 *addr6 = (void *) sockaddr;
+	struct sockaddr_in *addr4 = (void *) sockaddr;
+
+	switch (domain) {
+	case PF_INET:
+		addr4->sin_family = AF_INET;
+		addr4->sin_port = htons(cfg_port);
+		if (inet_pton(AF_INET, str_addr, &(addr4->sin_addr)) != 1)
+			error(1, 0, "ipv4 parse error: %s", str_addr);
+		break;
+	case PF_INET6:
+		addr6->sin6_family = AF_INET6;
+		addr6->sin6_port = htons(cfg_port);
+		if (inet_pton(AF_INET6, str_addr, &(addr6->sin6_addr)) != 1)
+			error(1, 0, "ipv6 parse error: %s", str_addr);
+		break;
+	default:
+		error(1, 0, "illegal domain");
+	}
+}
+
+static void flush_zerocopy(int fd)
+{
+	struct msghdr msg = {0};	/* flush */
+	int ret;
+
+	while (1) {
+		ret = recvmsg(fd, &msg, MSG_ERRQUEUE);
+		if (ret == -1 && errno == EAGAIN)
+			break;
+		if (ret == -1)
+			error(1, errno, "errqueue");
+		if (msg.msg_flags != (MSG_ERRQUEUE | MSG_CTRUNC))
+			error(1, 0, "errqueue: flags 0x%x\n", msg.msg_flags);
+		msg.msg_flags = 0;
+	}
+}
+
+static int send_tcp(int fd, char *data)
+{
+	int ret, done = 0, count = 0;
+
+	while (done < cfg_payload_len) {
+		ret = send(fd, data + done, cfg_payload_len - done,
+			   cfg_zerocopy ? MSG_ZEROCOPY : 0);
+		if (ret == -1)
+			error(1, errno, "write");
+
+		done += ret;
+		count++;
+	}
+
+	return count;
+}
+
+static int send_udp(int fd, char *data)
+{
+	int ret, total_len, len, count = 0;
+
+	total_len = cfg_payload_len;
+
+	while (total_len) {
+		len = total_len < cfg_mss ? total_len : cfg_mss;
+
+		ret = sendto(fd, data, len, cfg_zerocopy ? MSG_ZEROCOPY : 0,
+			     cfg_connected ? NULL : (void *)&cfg_dst_addr,
+			     cfg_connected ? 0 : cfg_alen);
+		if (ret == -1)
+			error(1, errno, "write");
+		if (ret != len)
+			error(1, errno, "write: %uB != %uB\n", ret, len);
+
+		total_len -= len;
+		count++;
+	}
+
+	return count;
+}
+
+static int send_udp_sendmmsg(int fd, char *data)
+{
+	const int max_nr_msg = ETH_MAX_MTU / ETH_DATA_LEN;
+	struct mmsghdr mmsgs[max_nr_msg];
+	struct iovec iov[max_nr_msg];
+	unsigned int off = 0, left;
+	int i = 0, ret;
+
+	memset(mmsgs, 0, sizeof(mmsgs));
+
+	left = cfg_payload_len;
+	while (left) {
+		if (i == max_nr_msg)
+			error(1, 0, "sendmmsg: exceeds max_nr_msg");
+
+		iov[i].iov_base = data + off;
+		iov[i].iov_len = cfg_mss < left ? cfg_mss : left;
+
+		mmsgs[i].msg_hdr.msg_iov = iov + i;
+		mmsgs[i].msg_hdr.msg_iovlen = 1;
+
+		off += iov[i].iov_len;
+		left -= iov[i].iov_len;
+		i++;
+	}
+
+	ret = sendmmsg(fd, mmsgs, i, cfg_zerocopy ? MSG_ZEROCOPY : 0);
+	if (ret == -1)
+		error(1, errno, "sendmmsg");
+
+	return ret;
+}
+
+static void send_udp_segment_cmsg(struct cmsghdr *cm)
+{
+	uint16_t *valp;
+
+	cm->cmsg_level = SOL_UDP;
+	cm->cmsg_type = UDP_SEGMENT;
+	cm->cmsg_len = CMSG_LEN(sizeof(cfg_mss));
+	valp = (void *)CMSG_DATA(cm);
+	*valp = cfg_mss;
+}
+
+static int send_udp_segment(int fd, char *data)
+{
+	char control[CMSG_SPACE(sizeof(cfg_mss))] = {0};
+	struct msghdr msg = {0};
+	struct iovec iov = {0};
+	int ret;
+
+	iov.iov_base = data;
+	iov.iov_len = cfg_payload_len;
+
+	msg.msg_iov = &iov;
+	msg.msg_iovlen = 1;
+
+	msg.msg_control = control;
+	msg.msg_controllen = sizeof(control);
+	send_udp_segment_cmsg(CMSG_FIRSTHDR(&msg));
+
+	msg.msg_name = (void *)&cfg_dst_addr;
+	msg.msg_namelen = cfg_alen;
+
+	ret = sendmsg(fd, &msg, cfg_zerocopy ? MSG_ZEROCOPY : 0);
+	if (ret == -1)
+		error(1, errno, "sendmsg");
+	if (ret != iov.iov_len)
+		error(1, 0, "sendmsg: %u != %lu\n", ret, iov.iov_len);
+
+	return 1;
+}
+
+static void usage(const char *filepath)
+{
+	error(1, 0, "Usage: %s [-46cmStuz] [-C cpu] [-D dst ip] [-l secs] [-p port] [-s sendsize]",
+		    filepath);
+}
+
+static void parse_opts(int argc, char **argv)
+{
+	int max_len, hdrlen;
+	int c;
+
+	while ((c = getopt(argc, argv, "46cC:D:l:mp:s:Stuz")) != -1) {
+		switch (c) {
+		case '4':
+			if (cfg_family != PF_UNSPEC)
+				error(1, 0, "Pass one of -4 or -6");
+			cfg_family = PF_INET;
+			cfg_alen = sizeof(struct sockaddr_in);
+			break;
+		case '6':
+			if (cfg_family != PF_UNSPEC)
+				error(1, 0, "Pass one of -4 or -6");
+			cfg_family = PF_INET6;
+			cfg_alen = sizeof(struct sockaddr_in6);
+			break;
+		case 'c':
+			cfg_cache_trash = true;
+			break;
+		case 'C':
+			cfg_cpu = strtol(optarg, NULL, 0);
+			break;
+		case 'D':
+			setup_sockaddr(cfg_family, optarg, &cfg_dst_addr);
+			break;
+		case 'l':
+			cfg_runtime_ms = strtoul(optarg, NULL, 10) * 1000;
+			break;
+		case 'm':
+			cfg_sendmmsg = true;
+			break;
+		case 'p':
+			cfg_port = strtoul(optarg, NULL, 0);
+			break;
+		case 's':
+			cfg_payload_len = strtoul(optarg, NULL, 0);
+			break;
+		case 'S':
+			cfg_segment = true;
+			break;
+		case 't':
+			cfg_tcp = true;
+			break;
+		case 'u':
+			cfg_connected = false;
+			break;
+		case 'z':
+			cfg_zerocopy = true;
+			break;
+		}
+	}
+
+	if (optind != argc)
+		usage(argv[0]);
+
+	if (cfg_family == PF_UNSPEC)
+		error(1, 0, "must pass one of -4 or -6");
+	if (cfg_tcp && !cfg_connected)
+		error(1, 0, "connectionless tcp makes no sense");
+	if (cfg_segment && cfg_sendmmsg)
+		error(1, 0, "cannot combine segment offload and sendmmsg");
+
+	if (cfg_family == PF_INET)
+		hdrlen = sizeof(struct iphdr) + sizeof(struct udphdr);
+	else
+		hdrlen = sizeof(struct ip6_hdr) + sizeof(struct udphdr);
+
+	cfg_mss = ETH_DATA_LEN - hdrlen;
+	max_len = ETH_MAX_MTU - hdrlen;
+
+	if (cfg_payload_len > max_len)
+		error(1, 0, "payload length %u exceeds max %u",
+		      cfg_payload_len, max_len);
+}
+
+static void set_pmtu_discover(int fd, bool is_ipv4)
+{
+	int level, name, val;
+
+	if (is_ipv4) {
+		level	= SOL_IP;
+		name	= IP_MTU_DISCOVER;
+		val	= IP_PMTUDISC_DO;
+	} else {
+		level	= SOL_IPV6;
+		name	= IPV6_MTU_DISCOVER;
+		val	= IPV6_PMTUDISC_DO;
+	}
+
+	if (setsockopt(fd, level, name, &val, sizeof(val)))
+		error(1, errno, "setsockopt path mtu");
+}
+
+int main(int argc, char **argv)
+{
+	unsigned long num_msgs, num_sends;
+	unsigned long tnow, treport, tstop;
+	int fd, i, val;
+
+	parse_opts(argc, argv);
+
+	if (cfg_cpu > 0)
+		set_cpu(cfg_cpu);
+
+	for (i = 0; i < sizeof(buf[0]); i++)
+		buf[0][i] = 'a' + (i % 26);
+	for (i = 1; i < NUM_PKT; i++)
+		memcpy(buf[i], buf[0], sizeof(buf[0]));
+
+	signal(SIGINT, sigint_handler);
+
+	fd = socket(cfg_family, cfg_tcp ? SOCK_STREAM : SOCK_DGRAM, 0);
+	if (fd == -1)
+		error(1, errno, "socket");
+
+	if (cfg_zerocopy) {
+		val = 1;
+		if (setsockopt(fd, SOL_SOCKET, SO_ZEROCOPY, &val, sizeof(val)))
+			error(1, errno, "setsockopt zerocopy");
+	}
+
+	if (cfg_connected &&
+	    connect(fd, (void *)&cfg_dst_addr, cfg_alen))
+		error(1, errno, "connect");
+
+	if (cfg_segment)
+		set_pmtu_discover(fd, cfg_family == PF_INET);
+
+	num_msgs = num_sends = 0;
+	tnow = gettimeofday_ms();
+	tstop = tnow + cfg_runtime_ms;
+	treport = tnow + 1000;
+
+	i = 0;
+	do {
+		if (cfg_tcp)
+			num_sends += send_tcp(fd, buf[i]);
+		else if (cfg_segment)
+			num_sends += send_udp_segment(fd, buf[i]);
+		else if (cfg_sendmmsg)
+			num_sends += send_udp_sendmmsg(fd, buf[i]);
+		else
+			num_sends += send_udp(fd, buf[i]);
+		num_msgs++;
+
+		if (cfg_zerocopy && ((num_msgs & 0xF) == 0))
+			flush_zerocopy(fd);
+
+		tnow = gettimeofday_ms();
+		if (tnow > treport) {
+			fprintf(stderr,
+				"%s tx: %6lu MB/s %8lu calls/s %6lu msg/s\n",
+				cfg_tcp ? "tcp" : "udp",
+				(num_msgs * cfg_payload_len) >> 20,
+				num_sends, num_msgs);
+			num_msgs = num_sends = 0;
+			treport = tnow + 1000;
+		}
+
+		/* cold cache when writing buffer */
+		if (cfg_cache_trash)
+			i = ++i < NUM_PKT ? i : 0;
+
+	} while (!interrupted && (cfg_runtime_ms == -1 || tnow < tstop));
+
+	if (close(fd))
+		error(1, errno, "close");
+
+	return 0;
+}