diff options
Diffstat (limited to 'tools/testing/selftests/net')
24 files changed, 3998 insertions, 65 deletions
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index ecc52d4c034d..997c65dcad68 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -23,3 +23,8 @@ so_txtime tcp_fastopen_backup_key nettest fin_ack_lat +reuseaddr_ports_exhausted +hwtstamp_config +rxtimestamp +timestamping +txtimestamp diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 4c1bd03ffa1c..3f386eb9e7d7 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -14,6 +14,8 @@ TEST_PROGS += tcp_fastopen_backup_key.sh fcnal-test.sh l2tp.sh traceroute.sh TEST_PROGS += fin_ack_lat.sh fib_nexthop_multiprefix.sh fib_nexthops.sh TEST_PROGS += altnames.sh icmp_redirect.sh ip6_gre_headroom.sh TEST_PROGS += route_localnet.sh +TEST_PROGS += reuseaddr_ports_exhausted.sh +TEST_PROGS += txtimestamp.sh TEST_PROGS_EXTENDED := in_netns.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any @@ -22,6 +24,8 @@ TEST_GEN_FILES += udpgso udpgso_bench_tx udpgso_bench_rx ip_defrag TEST_GEN_FILES += so_txtime ipv6_flowlabel ipv6_flowlabel_mgr TEST_GEN_FILES += tcp_fastopen_backup_key TEST_GEN_FILES += fin_ack_lat +TEST_GEN_FILES += reuseaddr_ports_exhausted +TEST_GEN_FILES += hwtstamp_config rxtimestamp timestamping txtimestamp TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index b8503a8119b0..3b42c06b5985 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -12,6 +12,7 @@ CONFIG_IPV6_VTI=y CONFIG_DUMMY=y CONFIG_BRIDGE=y CONFIG_VLAN_8021Q=y +CONFIG_IFB=y CONFIG_NETFILTER=y CONFIG_NETFILTER_ADVANCED=y CONFIG_NF_CONNTRACK=m @@ -27,5 +28,6 @@ CONFIG_NFT_CHAIN_NAT_IPV6=m CONFIG_NFT_CHAIN_NAT_IPV4=m CONFIG_NET_SCH_FQ=m CONFIG_NET_SCH_ETF=m +CONFIG_NET_SCH_NETEM=y CONFIG_TEST_BLACKHOLE_DEV=m CONFIG_KALLSYMS=y diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh index 40b076983239..155d48bd4d9e 100644 --- a/tools/testing/selftests/net/forwarding/devlink_lib.sh +++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh @@ -35,6 +35,12 @@ if [ $? -ne 0 ]; then exit 1 fi +devlink dev help 2>&1 | grep info &> /dev/null +if [ $? -ne 0 ]; then + echo "SKIP: iproute2 too old, missing devlink dev info support" + exit 1 +fi + ############################################################################## # Devlink helpers @@ -373,6 +379,7 @@ devlink_trap_drop_test() local trap_name=$1; shift local group_name=$1; shift local dev=$1; shift + local handle=$1; shift # This is the common part of all the tests. It checks that stats are # initially idle, then non-idle after changing the trap action and @@ -397,7 +404,7 @@ devlink_trap_drop_test() devlink_trap_group_stats_idle_test $group_name check_err $? "Trap group stats not idle after setting action to drop" - tc_check_packets "dev $dev egress" 101 0 + tc_check_packets "dev $dev egress" $handle 0 check_err $? "Packets were not dropped" } @@ -406,7 +413,68 @@ devlink_trap_drop_cleanup() local mz_pid=$1; shift local dev=$1; shift local proto=$1; shift + local pref=$1; shift + local handle=$1; shift kill $mz_pid && wait $mz_pid &> /dev/null - tc filter del dev $dev egress protocol $proto pref 1 handle 101 flower + tc filter del dev $dev egress protocol $proto pref $pref handle $handle flower +} + +devlink_trap_policers_num_get() +{ + devlink -j -p trap policer show | jq '.[]["'$DEVLINK_DEV'"] | length' +} + +devlink_trap_policer_rate_get() +{ + local policer_id=$1; shift + + devlink -j -p trap policer show $DEVLINK_DEV policer $policer_id \ + | jq '.[][][]["rate"]' +} + +devlink_trap_policer_burst_get() +{ + local policer_id=$1; shift + + devlink -j -p trap policer show $DEVLINK_DEV policer $policer_id \ + | jq '.[][][]["burst"]' +} + +devlink_trap_policer_rx_dropped_get() +{ + local policer_id=$1; shift + + devlink -j -p -s trap policer show $DEVLINK_DEV policer $policer_id \ + | jq '.[][][]["stats"]["rx"]["dropped"]' +} + +devlink_trap_group_policer_get() +{ + local group_name=$1; shift + + devlink -j -p trap group show $DEVLINK_DEV group $group_name \ + | jq '.[][][]["policer"]' +} + +devlink_trap_policer_ids_get() +{ + devlink -j -p trap policer show \ + | jq '.[]["'$DEVLINK_DEV'"][]["policer"]' +} + +devlink_port_by_netdev() +{ + local if_name=$1 + + devlink -j port show $if_name | jq -e '.[] | keys' | jq -r '.[]' +} + +devlink_cpu_port_get() +{ + local cpu_dl_port_num=$(devlink port list | grep "$DEVLINK_DEV" | + grep cpu | cut -d/ -f3 | cut -d: -f1 | + sed -n '1p') + + echo "$DEVLINK_DEV/$cpu_dl_port_num" } diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 2f5da414aaa7..977fc2b326a2 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -60,6 +60,15 @@ check_tc_chain_support() fi } +check_tc_action_hw_stats_support() +{ + tc actions help 2>&1 | grep -q hw_stats + if [[ $? -ne 0 ]]; then + echo "SKIP: iproute2 too old; tc is missing action hw_stats support" + exit 1 + fi +} + if [[ "$(id -u)" -ne 0 ]]; then echo "SKIP: need root privileges" exit 0 @@ -248,13 +257,40 @@ busywait() done } +not() +{ + "$@" + [[ $? != 0 ]] +} + +grep_bridge_fdb() +{ + local addr=$1; shift + local word + local flag + + if [ "$1" == "self" ] || [ "$1" == "master" ]; then + word=$1; shift + if [ "$1" == "-v" ]; then + flag=$1; shift + fi + fi + + $@ | grep $addr | grep $flag "$word" +} + +wait_for_offload() +{ + "$@" | grep -q offload +} + until_counter_is() { - local value=$1; shift + local expr=$1; shift local current=$("$@") echo $((current)) - ((current >= value)) + ((current $expr)) } busywait_for_counter() @@ -263,7 +299,7 @@ busywait_for_counter() local delta=$1; shift local base=$("$@") - busywait "$timeout" until_counter_is $((base + delta)) "$@" + busywait "$timeout" until_counter_is ">= $((base + delta))" "$@" } setup_wait_dev() @@ -599,6 +635,17 @@ tc_rule_stats_get() | jq ".[1].options.actions[].stats$selector" } +tc_rule_handle_stats_get() +{ + local id=$1; shift + local handle=$1; shift + local selector=${1:-.packets}; shift + + tc -j -s filter show $id \ + | jq ".[] | select(.options.handle == $handle) | \ + .options.actions[0].stats$selector" +} + ethtool_stats_get() { local dev=$1; shift @@ -607,6 +654,26 @@ ethtool_stats_get() ethtool -S $dev | grep "^ *$stat:" | head -n 1 | cut -d: -f2 } +qdisc_stats_get() +{ + local dev=$1; shift + local handle=$1; shift + local selector=$1; shift + + tc -j -s qdisc show dev "$dev" \ + | jq '.[] | select(.handle == "'"$handle"'") | '"$selector" +} + +qdisc_parent_stats_get() +{ + local dev=$1; shift + local parent=$1; shift + local selector=$1; shift + + tc -j -s qdisc show dev "$dev" invisible \ + | jq '.[] | select(.parent == "'"$parent"'") | '"$selector" +} + humanize() { local speed=$1; shift @@ -1132,18 +1199,29 @@ flood_test() flood_multicast_test $br_port $host1_if $host2_if } -start_traffic() +__start_traffic() { + local proto=$1; shift local h_in=$1; shift # Where the traffic egresses the host local sip=$1; shift local dip=$1; shift local dmac=$1; shift $MZ $h_in -p 8000 -A $sip -B $dip -c 0 \ - -a own -b $dmac -t udp -q & + -a own -b $dmac -t "$proto" -q "$@" & sleep 1 } +start_traffic() +{ + __start_traffic udp "$@" +} + +start_tcp_traffic() +{ + __start_traffic tcp "$@" +} + stop_traffic() { # Suppress noise from killing mausezahn. diff --git a/tools/testing/selftests/net/forwarding/pedit_dsfield.sh b/tools/testing/selftests/net/forwarding/pedit_dsfield.sh new file mode 100755 index 000000000000..b50081855913 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/pedit_dsfield.sh @@ -0,0 +1,238 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test sends traffic from H1 to H2. Either on ingress of $swp1, or on +# egress of $swp2, the traffic is acted upon by a pedit action. An ingress +# filter installed on $h2 verifies that the packet looks like expected. +# +# +----------------------+ +----------------------+ +# | H1 | | H2 | +# | + $h1 | | $h2 + | +# | | 192.0.2.1/28 | | 192.0.2.2/28 | | +# +----|-----------------+ +----------------|-----+ +# | | +# +----|----------------------------------------------------------------|-----+ +# | SW | | | +# | +-|----------------------------------------------------------------|-+ | +# | | + $swp1 BR $swp2 + | | +# | +--------------------------------------------------------------------+ | +# +---------------------------------------------------------------------------+ + +ALL_TESTS=" + ping_ipv4 + test_ip_dsfield + test_ip_dscp + test_ip_ecn + test_ip_dscp_ecn +" + +NUM_NETIFS=4 +source lib.sh +source tc_common.sh + +: ${HIT_TIMEOUT:=2000} # ms + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 2001:db8:1::1/64 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/28 2001:db8:1::1/64 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/28 2001:db8:1::2/64 + tc qdisc add dev $h2 clsact +} + +h2_destroy() +{ + tc qdisc del dev $h2 clsact + simple_if_fini $h2 192.0.2.2/28 2001:db8:1::2/64 +} + +switch_create() +{ + ip link add name br1 up type bridge vlan_filtering 1 + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + + tc qdisc add dev $swp1 clsact + tc qdisc add dev $swp2 clsact +} + +switch_destroy() +{ + tc qdisc del dev $swp2 clsact + tc qdisc del dev $swp1 clsact + + ip link set dev $swp2 nomaster + ip link set dev $swp1 nomaster + ip link del dev br1 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + h2mac=$(mac_get $h2) + + vrf_prepare + h1_create + h2_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h2_destroy + h1_destroy + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.2 +} + +do_test_pedit_dsfield_common() +{ + local pedit_locus=$1; shift + local pedit_action=$1; shift + local mz_flags=$1; shift + + RET=0 + + # TOS 125: DSCP 31, ECN 1. Used for testing that the relevant part is + # overwritten when zero is selected. + $MZ $mz_flags $h1 -c 10 -d 20msec -p 100 \ + -a own -b $h2mac -q -t tcp tos=0x7d,sp=54321,dp=12345 + + local pkts + pkts=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= 10" \ + tc_rule_handle_stats_get "dev $h2 ingress" 101) + check_err $? "Expected to get 10 packets, but got $pkts." + log_test "$pedit_locus pedit $pedit_action" +} + +do_test_pedit_dsfield() +{ + local pedit_locus=$1; shift + local pedit_action=$1; shift + local match_prot=$1; shift + local match_flower=$1; shift + local mz_flags=$1; shift + local saddr=$1; shift + local daddr=$1; shift + + tc filter add $pedit_locus handle 101 pref 1 \ + flower action pedit ex munge $pedit_action + tc filter add dev $h2 ingress handle 101 pref 1 prot $match_prot \ + flower skip_hw $match_flower action pass + + do_test_pedit_dsfield_common "$pedit_locus" "$pedit_action" "$mz_flags" + + tc filter del dev $h2 ingress pref 1 + tc filter del $pedit_locus pref 1 +} + +do_test_ip_dsfield() +{ + local locus=$1; shift + local dsfield + + for dsfield in 0 1 2 3 128 252 253 254 255; do + do_test_pedit_dsfield "$locus" \ + "ip dsfield set $dsfield" \ + ip "ip_tos $dsfield" \ + "-A 192.0.2.1 -B 192.0.2.2" + done +} + +test_ip_dsfield() +{ + do_test_ip_dsfield "dev $swp1 ingress" + do_test_ip_dsfield "dev $swp2 egress" +} + +do_test_ip_dscp() +{ + local locus=$1; shift + local dscp + + for dscp in 0 1 2 3 32 61 62 63; do + do_test_pedit_dsfield "$locus" \ + "ip dsfield set $((dscp << 2)) retain 0xfc" \ + ip "ip_tos $(((dscp << 2) | 1))" \ + "-A 192.0.2.1 -B 192.0.2.2" + done +} + +test_ip_dscp() +{ + do_test_ip_dscp "dev $swp1 ingress" + do_test_ip_dscp "dev $swp2 egress" +} + +do_test_ip_ecn() +{ + local locus=$1; shift + local ecn + + for ecn in 0 1 2 3; do + do_test_pedit_dsfield "$locus" \ + "ip dsfield set $ecn retain 0x03" \ + ip "ip_tos $((124 | $ecn))" \ + "-A 192.0.2.1 -B 192.0.2.2" + done +} + +test_ip_ecn() +{ + do_test_ip_ecn "dev $swp1 ingress" + do_test_ip_ecn "dev $swp2 egress" +} + +do_test_ip_dscp_ecn() +{ + local locus=$1; shift + + tc filter add $locus handle 101 pref 1 \ + flower action pedit ex munge ip dsfield set 124 retain 0xfc \ + action pedit ex munge ip dsfield set 1 retain 0x03 + tc filter add dev $h2 ingress handle 101 pref 1 prot ip \ + flower skip_hw ip_tos 125 action pass + + do_test_pedit_dsfield_common "$locus" "set DSCP + set ECN" \ + "-A 192.0.2.1 -B 192.0.2.2" + + tc filter del dev $h2 ingress pref 1 + tc filter del $locus pref 1 +} + +test_ip_dscp_ecn() +{ + do_test_ip_dscp_ecn "dev $swp1 ingress" + do_test_ip_dscp_ecn "dev $swp2 egress" +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/sch_ets.sh b/tools/testing/selftests/net/forwarding/sch_ets.sh index 40e0ad1bc4f2..e60c8b4818cc 100755 --- a/tools/testing/selftests/net/forwarding/sch_ets.sh +++ b/tools/testing/selftests/net/forwarding/sch_ets.sh @@ -34,11 +34,14 @@ switch_destroy() } # Callback from sch_ets_tests.sh -get_stats() +collect_stats() { - local stream=$1; shift + local -a streams=("$@") + local stream - link_stats_get $h2.1$stream rx bytes + for stream in ${streams[@]}; do + qdisc_parent_stats_get $swp2 10:$((stream + 1)) .bytes + done } ets_run diff --git a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh index 3c3b204d47e8..cdf689e99458 100644 --- a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh +++ b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh @@ -2,7 +2,7 @@ # Global interface: # $put -- port under test (e.g. $swp2) -# get_stats($band) -- A function to collect stats for band +# collect_stats($streams...) -- A function to get stats for individual streams # ets_start_traffic($band) -- Start traffic for this band # ets_change_qdisc($op, $dev, $nstrict, $quanta...) -- Add or change qdisc @@ -94,15 +94,11 @@ __ets_dwrr_test() sleep 10 - t0=($(for stream in ${streams[@]}; do - get_stats $stream - done)) + t0=($(collect_stats "${streams[@]}")) sleep 10 - t1=($(for stream in ${streams[@]}; do - get_stats $stream - done)) + t1=($(collect_stats "${streams[@]}")) d=($(for ((i = 0; i < ${#streams[@]}; i++)); do echo $((${t1[$i]} - ${t0[$i]})) done)) diff --git a/tools/testing/selftests/net/forwarding/skbedit_priority.sh b/tools/testing/selftests/net/forwarding/skbedit_priority.sh new file mode 100755 index 000000000000..e3bd8a6bb8b4 --- /dev/null +++ b/tools/testing/selftests/net/forwarding/skbedit_priority.sh @@ -0,0 +1,168 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This test sends traffic from H1 to H2. Either on ingress of $swp1, or on +# egress of $swp2, the traffic is acted upon by an action skbedit priority. The +# new priority should be taken into account when classifying traffic on the PRIO +# qdisc at $swp2. The test verifies that for different priority values, the +# traffic ends up in expected PRIO band. +# +# +----------------------+ +----------------------+ +# | H1 | | H2 | +# | + $h1 | | $h2 + | +# | | 192.0.2.1/28 | | 192.0.2.2/28 | | +# +----|-----------------+ +----------------|-----+ +# | | +# +----|----------------------------------------------------------------|-----+ +# | SW | | | +# | +-|----------------------------------------------------------------|-+ | +# | | + $swp1 BR $swp2 + | | +# | | PRIO | | +# | +--------------------------------------------------------------------+ | +# +---------------------------------------------------------------------------+ + +ALL_TESTS=" + ping_ipv4 + test_ingress + test_egress +" + +NUM_NETIFS=4 +source lib.sh + +: ${HIT_TIMEOUT:=2000} # ms + +h1_create() +{ + simple_if_init $h1 192.0.2.1/28 +} + +h1_destroy() +{ + simple_if_fini $h1 192.0.2.1/28 +} + +h2_create() +{ + simple_if_init $h2 192.0.2.2/28 +} + +h2_destroy() +{ + simple_if_fini $h2 192.0.2.2/28 +} + +switch_create() +{ + ip link add name br1 up type bridge vlan_filtering 1 + ip link set dev $swp1 master br1 + ip link set dev $swp1 up + ip link set dev $swp2 master br1 + ip link set dev $swp2 up + + tc qdisc add dev $swp1 clsact + tc qdisc add dev $swp2 clsact + tc qdisc add dev $swp2 root handle 10: \ + prio bands 8 priomap 7 6 5 4 3 2 1 0 +} + +switch_destroy() +{ + tc qdisc del dev $swp2 root + tc qdisc del dev $swp2 clsact + tc qdisc del dev $swp1 clsact + + ip link set dev $swp2 nomaster + ip link set dev $swp1 nomaster + ip link del dev br1 +} + +setup_prepare() +{ + h1=${NETIFS[p1]} + swp1=${NETIFS[p2]} + + swp2=${NETIFS[p3]} + h2=${NETIFS[p4]} + + h2mac=$(mac_get $h2) + + vrf_prepare + h1_create + h2_create + switch_create +} + +cleanup() +{ + pre_cleanup + + switch_destroy + h2_destroy + h1_destroy + vrf_cleanup +} + +ping_ipv4() +{ + ping_test $h1 192.0.2.2 +} + +test_skbedit_priority_one() +{ + local locus=$1; shift + local prio=$1; shift + local classid=$1; shift + + RET=0 + + tc filter add $locus handle 101 pref 1 \ + flower action skbedit priority $prio + + local pkt0=$(qdisc_parent_stats_get $swp2 $classid .packets) + local pkt2=$(tc_rule_handle_stats_get "$locus" 101) + $MZ $h1 -t udp "sp=54321,dp=12345" -c 10 -d 20msec -p 100 \ + -a own -b $h2mac -A 192.0.2.1 -B 192.0.2.2 -q + + local pkt1 + pkt1=$(busywait "$HIT_TIMEOUT" until_counter_is ">= $((pkt0 + 10))" \ + qdisc_parent_stats_get $swp2 $classid .packets) + check_err $? "Expected to get 10 packets on class $classid, but got $((pkt1 - pkt0))." + + local pkt3=$(tc_rule_handle_stats_get "$locus" 101) + ((pkt3 >= pkt2 + 10)) + check_err $? "Expected to get 10 packets on skbedit rule but got $((pkt3 - pkt2))." + + log_test "$locus skbedit priority $prio -> classid $classid" + + tc filter del $locus pref 1 +} + +test_ingress() +{ + local prio + + for prio in {0..7}; do + test_skbedit_priority_one "dev $swp1 ingress" \ + $prio 10:$((8 - prio)) + done +} + +test_egress() +{ + local prio + + for prio in {0..7}; do + test_skbedit_priority_one "dev $swp2 egress" \ + $prio 10:$((8 - prio)) + done +} + +trap cleanup EXIT + +setup_prepare +setup_wait + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/tc_common.sh b/tools/testing/selftests/net/forwarding/tc_common.sh index 64f652633585..0e18e8be6e2a 100644 --- a/tools/testing/selftests/net/forwarding/tc_common.sh +++ b/tools/testing/selftests/net/forwarding/tc_common.sh @@ -6,39 +6,14 @@ CHECK_TC="yes" # Can be overridden by the configuration file. See lib.sh TC_HIT_TIMEOUT=${TC_HIT_TIMEOUT:=1000} # ms -__tc_check_packets() -{ - local id=$1 - local handle=$2 - local count=$3 - local operator=$4 - - start_time="$(date -u +%s%3N)" - while true - do - cmd_jq "tc -j -s filter show $id" \ - ".[] | select(.options.handle == $handle) | \ - select(.options.actions[0].stats.packets $operator $count)" \ - &> /dev/null - ret=$? - if [[ $ret -eq 0 ]]; then - return $ret - fi - current_time="$(date -u +%s%3N)" - diff=$(expr $current_time - $start_time) - if [ "$diff" -gt "$TC_HIT_TIMEOUT" ]; then - return 1 - fi - done -} - tc_check_packets() { local id=$1 local handle=$2 local count=$3 - __tc_check_packets "$id" "$handle" "$count" "==" + busywait "$TC_HIT_TIMEOUT" until_counter_is "== $count" \ + tc_rule_handle_stats_get "$id" "$handle" > /dev/null } tc_check_packets_hitting() @@ -46,5 +21,6 @@ tc_check_packets_hitting() local id=$1 local handle=$2 - __tc_check_packets "$id" "$handle" 0 ">" + busywait "$TC_HIT_TIMEOUT" until_counter_is "> 0" \ + tc_rule_handle_stats_get "$id" "$handle" > /dev/null } diff --git a/tools/testing/selftests/net/hwtstamp_config.c b/tools/testing/selftests/net/hwtstamp_config.c new file mode 100644 index 000000000000..e1fdee841021 --- /dev/null +++ b/tools/testing/selftests/net/hwtstamp_config.c @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Test program for SIOC{G,S}HWTSTAMP + * Copyright 2013 Solarflare Communications + * Author: Ben Hutchings + */ + +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> + +#include <sys/socket.h> +#include <sys/ioctl.h> + +#include <linux/if.h> +#include <linux/net_tstamp.h> +#include <linux/sockios.h> + +static int +lookup_value(const char **names, int size, const char *name) +{ + int value; + + for (value = 0; value < size; value++) + if (names[value] && strcasecmp(names[value], name) == 0) + return value; + + return -1; +} + +static const char * +lookup_name(const char **names, int size, int value) +{ + return (value >= 0 && value < size) ? names[value] : NULL; +} + +static void list_names(FILE *f, const char **names, int size) +{ + int value; + + for (value = 0; value < size; value++) + if (names[value]) + fprintf(f, " %s\n", names[value]); +} + +static const char *tx_types[] = { +#define TX_TYPE(name) [HWTSTAMP_TX_ ## name] = #name + TX_TYPE(OFF), + TX_TYPE(ON), + TX_TYPE(ONESTEP_SYNC) +#undef TX_TYPE +}; +#define N_TX_TYPES ((int)(sizeof(tx_types) / sizeof(tx_types[0]))) + +static const char *rx_filters[] = { +#define RX_FILTER(name) [HWTSTAMP_FILTER_ ## name] = #name + RX_FILTER(NONE), + RX_FILTER(ALL), + RX_FILTER(SOME), + RX_FILTER(PTP_V1_L4_EVENT), + RX_FILTER(PTP_V1_L4_SYNC), + RX_FILTER(PTP_V1_L4_DELAY_REQ), + RX_FILTER(PTP_V2_L4_EVENT), + RX_FILTER(PTP_V2_L4_SYNC), + RX_FILTER(PTP_V2_L4_DELAY_REQ), + RX_FILTER(PTP_V2_L2_EVENT), + RX_FILTER(PTP_V2_L2_SYNC), + RX_FILTER(PTP_V2_L2_DELAY_REQ), + RX_FILTER(PTP_V2_EVENT), + RX_FILTER(PTP_V2_SYNC), + RX_FILTER(PTP_V2_DELAY_REQ), +#undef RX_FILTER +}; +#define N_RX_FILTERS ((int)(sizeof(rx_filters) / sizeof(rx_filters[0]))) + +static void usage(void) +{ + fputs("Usage: hwtstamp_config if_name [tx_type rx_filter]\n" + "tx_type is any of (case-insensitive):\n", + stderr); + list_names(stderr, tx_types, N_TX_TYPES); + fputs("rx_filter is any of (case-insensitive):\n", stderr); + list_names(stderr, rx_filters, N_RX_FILTERS); +} + +int main(int argc, char **argv) +{ + struct ifreq ifr; + struct hwtstamp_config config; + const char *name; + int sock; + + if ((argc != 2 && argc != 4) || (strlen(argv[1]) >= IFNAMSIZ)) { + usage(); + return 2; + } + + if (argc == 4) { + config.flags = 0; + config.tx_type = lookup_value(tx_types, N_TX_TYPES, argv[2]); + config.rx_filter = lookup_value(rx_filters, N_RX_FILTERS, argv[3]); + if (config.tx_type < 0 || config.rx_filter < 0) { + usage(); + return 2; + } + } + + sock = socket(AF_INET, SOCK_DGRAM, 0); + if (sock < 0) { + perror("socket"); + return 1; + } + + strcpy(ifr.ifr_name, argv[1]); + ifr.ifr_data = (caddr_t)&config; + + if (ioctl(sock, (argc == 2) ? SIOCGHWTSTAMP : SIOCSHWTSTAMP, &ifr)) { + perror("ioctl"); + return 1; + } + + printf("flags = %#x\n", config.flags); + name = lookup_name(tx_types, N_TX_TYPES, config.tx_type); + if (name) + printf("tx_type = %s\n", name); + else + printf("tx_type = %d\n", config.tx_type); + name = lookup_name(rx_filters, N_RX_FILTERS, config.rx_filter); + if (name) + printf("rx_filter = %s\n", name); + else + printf("rx_filter = %d\n", config.rx_filter); + + return 0; +} diff --git a/tools/testing/selftests/net/mptcp/.gitignore b/tools/testing/selftests/net/mptcp/.gitignore index d72f07642738..ea13b255a99d 100644 --- a/tools/testing/selftests/net/mptcp/.gitignore +++ b/tools/testing/selftests/net/mptcp/.gitignore @@ -1,2 +1,3 @@ mptcp_connect +pm_nl_ctl *.pcap diff --git a/tools/testing/selftests/net/mptcp/Makefile b/tools/testing/selftests/net/mptcp/Makefile index ba450e62dc5b..f50976ee7d44 100644 --- a/tools/testing/selftests/net/mptcp/Makefile +++ b/tools/testing/selftests/net/mptcp/Makefile @@ -1,12 +1,13 @@ # SPDX-License-Identifier: GPL-2.0 top_srcdir = ../../../../.. +KSFT_KHDR_INSTALL := 1 -CFLAGS = -Wall -Wl,--no-as-needed -O2 -g +CFLAGS = -Wall -Wl,--no-as-needed -O2 -g -I$(top_srcdir)/usr/include -TEST_PROGS := mptcp_connect.sh +TEST_PROGS := mptcp_connect.sh pm_netlink.sh mptcp_join.sh -TEST_GEN_FILES = mptcp_connect +TEST_GEN_FILES = mptcp_connect pm_nl_ctl TEST_FILES := settings diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index 99579c0223c1..cedee5b952ba 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -34,8 +34,8 @@ extern int optind; #define TCP_ULP 31 #endif +static int poll_timeout = 10 * 1000; static bool listen_mode; -static int poll_timeout; enum cfg_mode { CFG_MODE_POLL, @@ -50,11 +50,21 @@ static int cfg_sock_proto = IPPROTO_MPTCP; static bool tcpulp_audit; static int pf = AF_INET; static int cfg_sndbuf; +static int cfg_rcvbuf; +static bool cfg_join; static void die_usage(void) { - fprintf(stderr, "Usage: mptcp_connect [-6] [-u] [-s MPTCP|TCP] [-p port] -m mode]" - "[ -l ] [ -t timeout ] connect_address\n"); + fprintf(stderr, "Usage: mptcp_connect [-6] [-u] [-s MPTCP|TCP] [-p port] [-m mode]" + "[-l] connect_address\n"); + fprintf(stderr, "\t-6 use ipv6\n"); + fprintf(stderr, "\t-t num -- set poll timeout to num\n"); + fprintf(stderr, "\t-S num -- set SO_SNDBUF to num\n"); + fprintf(stderr, "\t-R num -- set SO_RCVBUF to num\n"); + fprintf(stderr, "\t-p num -- use port num\n"); + fprintf(stderr, "\t-m [MPTCP|TCP] -- use tcp or mptcp sockets\n"); + fprintf(stderr, "\t-s [mmap|poll] -- use poll (default) or mmap\n"); + fprintf(stderr, "\t-u -- check mptcp ulp\n"); exit(1); } @@ -97,6 +107,17 @@ static void xgetaddrinfo(const char *node, const char *service, } } +static void set_rcvbuf(int fd, unsigned int size) +{ + int err; + + err = setsockopt(fd, SOL_SOCKET, SO_RCVBUF, &size, sizeof(size)); + if (err) { + perror("set SO_RCVBUF"); + exit(1); + } +} + static void set_sndbuf(int fd, unsigned int size) { int err; @@ -230,6 +251,7 @@ static int sock_connect_mptcp(const char * const remoteaddr, static size_t do_rnd_write(const int fd, char *buf, const size_t len) { + static bool first = true; unsigned int do_w; ssize_t bw; @@ -237,10 +259,19 @@ static size_t do_rnd_write(const int fd, char *buf, const size_t len) if (do_w == 0 || do_w > len) do_w = len; + if (cfg_join && first && do_w > 100) + do_w = 100; + bw = write(fd, buf, do_w); if (bw < 0) perror("write"); + /* let the join handshake complete, before going on */ + if (cfg_join && first) { + usleep(200000); + first = false; + } + return bw; } @@ -365,8 +396,11 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd) break; /* ... but we still receive. - * Close our write side. + * Close our write side, ev. give some time + * for address notification */ + if (cfg_join) + usleep(400000); shutdown(peerfd, SHUT_WR); } else { if (errno == EINTR) @@ -383,6 +417,10 @@ static int copyfd_io_poll(int infd, int peerfd, int outfd) } } + /* leave some time for late join/announce */ + if (cfg_join) + usleep(400000); + close(peerfd); return 0; } @@ -638,7 +676,7 @@ static void maybe_close(int fd) { unsigned int r = rand(); - if (r & 1) + if (!cfg_join && (r & 1)) close(fd); } @@ -704,6 +742,8 @@ int main_loop(void) check_getpeername_connect(fd); + if (cfg_rcvbuf) + set_rcvbuf(fd, cfg_rcvbuf); if (cfg_sndbuf) set_sndbuf(fd, cfg_sndbuf); @@ -745,7 +785,7 @@ int parse_mode(const char *mode) return 0; } -int parse_sndbuf(const char *size) +static int parse_int(const char *size) { unsigned long s; @@ -765,17 +805,19 @@ int parse_sndbuf(const char *size) die_usage(); } - cfg_sndbuf = s; - - return 0; + return (int)s; } static void parse_opts(int argc, char **argv) { int c; - while ((c = getopt(argc, argv, "6lp:s:hut:m:b:")) != -1) { + while ((c = getopt(argc, argv, "6jlp:s:hut:m:S:R:")) != -1) { switch (c) { + case 'j': + cfg_join = true; + cfg_mode = CFG_MODE_POLL; + break; case 'l': listen_mode = true; break; @@ -802,8 +844,11 @@ static void parse_opts(int argc, char **argv) case 'm': cfg_mode = parse_mode(optarg); break; - case 'b': - cfg_sndbuf = parse_sndbuf(optarg); + case 'S': + cfg_sndbuf = parse_int(optarg); + break; + case 'R': + cfg_rcvbuf = parse_int(optarg); break; } } @@ -831,6 +876,8 @@ int main(int argc, char *argv[]) if (fd < 0) return 1; + if (cfg_rcvbuf) + set_rcvbuf(fd, cfg_rcvbuf); if (cfg_sndbuf) set_sndbuf(fd, cfg_sndbuf); diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index d573a0feb98d..acf02e156d20 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -3,7 +3,7 @@ time_start=$(date +%s) -optstring="b:d:e:l:r:h4cm:" +optstring="S:R:d:e:l:r:h4cm:" ret=0 sin="" sout="" @@ -19,6 +19,7 @@ tc_loss=$((RANDOM%101)) tc_reorder="" testmode="" sndbuf=0 +rcvbuf=0 options_log=true if [ $tc_loss -eq 100 ];then @@ -39,7 +40,8 @@ usage() { echo -e "\t-e: ethtool features to disable, e.g.: \"-e tso -e gso\" (default: randomly disable any of tso/gso/gro)" echo -e "\t-4: IPv4 only: disable IPv6 tests (default: test both IPv4 and IPv6)" echo -e "\t-c: capture packets for each test using tcpdump (default: no capture)" - echo -e "\t-b: set sndbuf value (default: use kernel default)" + echo -e "\t-S: set sndbuf value (default: use kernel default)" + echo -e "\t-R: set rcvbuf value (default: use kernel default)" echo -e "\t-m: test mode (poll, sendfile; default: poll)" } @@ -73,11 +75,19 @@ while getopts "$optstring" option;do "c") capture=true ;; - "b") + "S") if [ $OPTARG -ge 0 ];then sndbuf="$OPTARG" else - echo "-s requires numeric argument, got \"$OPTARG\"" 1>&2 + echo "-S requires numeric argument, got \"$OPTARG\"" 1>&2 + exit 1 + fi + ;; + "R") + if [ $OPTARG -ge 0 ];then + rcvbuf="$OPTARG" + else + echo "-R requires numeric argument, got \"$OPTARG\"" 1>&2 exit 1 fi ;; @@ -342,8 +352,12 @@ do_transfer() port=$((10000+$TEST_COUNT)) TEST_COUNT=$((TEST_COUNT+1)) + if [ "$rcvbuf" -gt 0 ]; then + extra_args="$extra_args -R $rcvbuf" + fi + if [ "$sndbuf" -gt 0 ]; then - extra_args="$extra_args -b $sndbuf" + extra_args="$extra_args -S $sndbuf" fi if [ -n "$testmode" ]; then diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh new file mode 100755 index 000000000000..dd42c2f692d0 --- /dev/null +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -0,0 +1,357 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ret=0 +sin="" +sout="" +cin="" +cout="" +ksft_skip=4 +timeout=30 +capture=0 + +TEST_COUNT=0 + +init() +{ + capout=$(mktemp) + + rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) + + ns1="ns1-$rndh" + ns2="ns2-$rndh" + + for netns in "$ns1" "$ns2";do + ip netns add $netns || exit $ksft_skip + ip -net $netns link set lo up + ip netns exec $netns sysctl -q net.mptcp.enabled=1 + ip netns exec $netns sysctl -q net.ipv4.conf.all.rp_filter=0 + ip netns exec $netns sysctl -q net.ipv4.conf.default.rp_filter=0 + done + + # ns1 ns2 + # ns1eth1 ns2eth1 + # ns1eth2 ns2eth2 + # ns1eth3 ns2eth3 + # ns1eth4 ns2eth4 + + for i in `seq 1 4`; do + ip link add ns1eth$i netns "$ns1" type veth peer name ns2eth$i netns "$ns2" + ip -net "$ns1" addr add 10.0.$i.1/24 dev ns1eth$i + ip -net "$ns1" addr add dead:beef:$i::1/64 dev ns1eth$i nodad + ip -net "$ns1" link set ns1eth$i up + + ip -net "$ns2" addr add 10.0.$i.2/24 dev ns2eth$i + ip -net "$ns2" addr add dead:beef:$i::2/64 dev ns2eth$i nodad + ip -net "$ns2" link set ns2eth$i up + + # let $ns2 reach any $ns1 address from any interface + ip -net "$ns2" route add default via 10.0.$i.1 dev ns2eth$i metric 10$i + done +} + +cleanup_partial() +{ + rm -f "$capout" + + for netns in "$ns1" "$ns2"; do + ip netns del $netns + done +} + +cleanup() +{ + rm -f "$cin" "$cout" + rm -f "$sin" "$sout" + cleanup_partial +} + +reset() +{ + cleanup_partial + init +} + +for arg in "$@"; do + if [ "$arg" = "-c" ]; then + capture=1 + fi +done + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + + +check_transfer() +{ + in=$1 + out=$2 + what=$3 + + cmp "$in" "$out" > /dev/null 2>&1 + if [ $? -ne 0 ] ;then + echo "[ FAIL ] $what does not match (in, out):" + print_file_err "$in" + print_file_err "$out" + + return 1 + fi + + return 0 +} + +do_ping() +{ + listener_ns="$1" + connector_ns="$2" + connect_addr="$3" + + ip netns exec ${connector_ns} ping -q -c 1 $connect_addr >/dev/null + if [ $? -ne 0 ] ; then + echo "$listener_ns -> $connect_addr connectivity [ FAIL ]" 1>&2 + ret=1 + fi +} + +do_transfer() +{ + listener_ns="$1" + connector_ns="$2" + cl_proto="$3" + srv_proto="$4" + connect_addr="$5" + + port=$((10000+$TEST_COUNT)) + TEST_COUNT=$((TEST_COUNT+1)) + + :> "$cout" + :> "$sout" + :> "$capout" + + if [ $capture -eq 1 ]; then + if [ -z $SUDO_USER ] ; then + capuser="" + else + capuser="-Z $SUDO_USER" + fi + + capfile="mp_join-${listener_ns}.pcap" + + echo "Capturing traffic for test $TEST_COUNT into $capfile" + ip netns exec ${listener_ns} tcpdump -i any -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 & + cappid=$! + + sleep 1 + fi + + ip netns exec ${listener_ns} ./mptcp_connect -j -t $timeout -l -p $port -s ${srv_proto} 0.0.0.0 < "$sin" > "$sout" & + spid=$! + + sleep 1 + + ip netns exec ${connector_ns} ./mptcp_connect -j -t $timeout -p $port -s ${cl_proto} $connect_addr < "$cin" > "$cout" & + cpid=$! + + wait $cpid + retc=$? + wait $spid + rets=$? + + if [ $capture -eq 1 ]; then + sleep 1 + kill $cappid + fi + + if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then + echo " client exit code $retc, server $rets" 1>&2 + echo "\nnetns ${listener_ns} socket stat for $port:" 1>&2 + ip netns exec ${listener_ns} ss -nita 1>&2 -o "sport = :$port" + echo "\nnetns ${connector_ns} socket stat for $port:" 1>&2 + ip netns exec ${connector_ns} ss -nita 1>&2 -o "dport = :$port" + + cat "$capout" + return 1 + fi + + check_transfer $sin $cout "file received by client" + retc=$? + check_transfer $cin $sout "file received by server" + rets=$? + + if [ $retc -eq 0 ] && [ $rets -eq 0 ];then + cat "$capout" + return 0 + fi + + cat "$capout" + return 1 +} + +make_file() +{ + name=$1 + who=$2 + + SIZE=1 + + dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null + echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name" + + echo "Created $name (size $SIZE KB) containing data sent by $who" +} + +run_tests() +{ + listener_ns="$1" + connector_ns="$2" + connect_addr="$3" + lret=0 + + do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP ${connect_addr} + lret=$? + if [ $lret -ne 0 ]; then + ret=$lret + return + fi +} + +chk_join_nr() +{ + local msg="$1" + local syn_nr=$2 + local syn_ack_nr=$3 + local ack_nr=$4 + local count + local dump_stats + + printf "%-36s %s" "$msg" "syn" + count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinSynRx | awk '{print $2}'` + [ -z "$count" ] && count=0 + if [ "$count" != "$syn_nr" ]; then + echo "[fail] got $count JOIN[s] syn expected $syn_nr" + ret=1 + dump_stats=1 + else + echo -n "[ ok ]" + fi + + echo -n " - synack" + count=`ip netns exec $ns2 nstat -as | grep MPTcpExtMPJoinSynAckRx | awk '{print $2}'` + [ -z "$count" ] && count=0 + if [ "$count" != "$syn_ack_nr" ]; then + echo "[fail] got $count JOIN[s] synack expected $syn_ack_nr" + ret=1 + dump_stats=1 + else + echo -n "[ ok ]" + fi + + echo -n " - ack" + count=`ip netns exec $ns1 nstat -as | grep MPTcpExtMPJoinAckRx | awk '{print $2}'` + [ -z "$count" ] && count=0 + if [ "$count" != "$ack_nr" ]; then + echo "[fail] got $count JOIN[s] ack expected $ack_nr" + ret=1 + dump_stats=1 + else + echo "[ ok ]" + fi + if [ "${dump_stats}" = 1 ]; then + echo Server ns stats + ip netns exec $ns1 nstat -as | grep MPTcp + echo Client ns stats + ip netns exec $ns2 nstat -as | grep MPTcp + fi +} + +sin=$(mktemp) +sout=$(mktemp) +cin=$(mktemp) +cout=$(mktemp) +init +make_file "$cin" "client" +make_file "$sin" "server" +trap cleanup EXIT + +run_tests $ns1 $ns2 10.0.1.1 +chk_join_nr "no JOIN" "0" "0" "0" + +# subflow limted by client +reset +ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow +run_tests $ns1 $ns2 10.0.1.1 +chk_join_nr "single subflow, limited by client" 0 0 0 + +# subflow limted by server +reset +ip netns exec $ns2 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow +run_tests $ns1 $ns2 10.0.1.1 +chk_join_nr "single subflow, limited by server" 1 1 0 + +# subflow +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow +run_tests $ns1 $ns2 10.0.1.1 +chk_join_nr "single subflow" 1 1 1 + +# multiple subflows +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 2 +ip netns exec $ns2 ./pm_nl_ctl limits 0 2 +ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow +ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow +run_tests $ns1 $ns2 10.0.1.1 +chk_join_nr "multiple subflows" 2 2 2 + +# multiple subflows limited by serverf +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl limits 0 2 +ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow +ip netns exec $ns2 ./pm_nl_ctl add 10.0.2.2 flags subflow +run_tests $ns1 $ns2 10.0.1.1 +chk_join_nr "multiple subflows, limited by server" 2 2 1 + +# add_address, unused +reset +ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal +run_tests $ns1 $ns2 10.0.1.1 +chk_join_nr "unused signal address" 0 0 0 + +# accept and use add_addr +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 1 +ip netns exec $ns2 ./pm_nl_ctl limits 1 1 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal +run_tests $ns1 $ns2 10.0.1.1 +chk_join_nr "signal address" 1 1 1 + +# accept and use add_addr with an additional subflow +# note: signal address in server ns and local addresses in client ns must +# belong to different subnets or one of the listed local address could be +# used for 'add_addr' subflow +reset +ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal +ip netns exec $ns1 ./pm_nl_ctl limits 0 2 +ip netns exec $ns2 ./pm_nl_ctl limits 1 2 +ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow +run_tests $ns1 $ns2 10.0.1.1 +chk_join_nr "subflow and signal" 2 2 2 + +# accept and use add_addr with additional subflows +reset +ip netns exec $ns1 ./pm_nl_ctl limits 0 3 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.2.1 flags signal +ip netns exec $ns2 ./pm_nl_ctl limits 1 3 +ip netns exec $ns2 ./pm_nl_ctl add 10.0.3.2 flags subflow +ip netns exec $ns2 ./pm_nl_ctl add 10.0.4.2 flags subflow +run_tests $ns1 $ns2 10.0.1.1 +chk_join_nr "multiple subflows and signal" 3 3 3 + +exit $ret diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh new file mode 100755 index 000000000000..9172746b6cf0 --- /dev/null +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -0,0 +1,130 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ksft_skip=4 +ret=0 + +usage() { + echo "Usage: $0 [ -h ]" +} + + +while getopts "$optstring" option;do + case "$option" in + "h") + usage $0 + exit 0 + ;; + "?") + usage $0 + exit 1 + ;; + esac +done + +sec=$(date +%s) +rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) +ns1="ns1-$rndh" +err=$(mktemp) +ret=0 + +cleanup() +{ + rm -f $out + ip netns del $ns1 +} + +ip -Version > /dev/null 2>&1 +if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip +fi + +trap cleanup EXIT + +ip netns add $ns1 || exit $ksft_skip +ip -net $ns1 link set lo up +ip netns exec $ns1 sysctl -q net.mptcp.enabled=1 + +check() +{ + local cmd="$1" + local expected="$2" + local msg="$3" + local out=`$cmd 2>$err` + local cmd_ret=$? + + printf "%-50s %s" "$msg" + if [ $cmd_ret -ne 0 ]; then + echo "[FAIL] command execution '$cmd' stderr " + cat $err + ret=1 + elif [ "$out" = "$expected" ]; then + echo "[ OK ]" + else + echo -n "[FAIL] " + echo "expected '$expected' got '$out'" + ret=1 + fi +} + +check "ip netns exec $ns1 ./pm_nl_ctl dump" "" "defaults addr list" +check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0 +subflows 0" "defaults limits" + +ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1 +ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.2 flags subflow dev lo +ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.3 flags signal,backup +check "ip netns exec $ns1 ./pm_nl_ctl get 1" "id 1 flags 10.0.1.1" "simple add/get addr" + +check "ip netns exec $ns1 ./pm_nl_ctl dump" \ +"id 1 flags 10.0.1.1 +id 2 flags subflow dev lo 10.0.1.2 +id 3 flags signal,backup 10.0.1.3" "dump addrs" + +ip netns exec $ns1 ./pm_nl_ctl del 2 +check "ip netns exec $ns1 ./pm_nl_ctl get 2" "" "simple del addr" +check "ip netns exec $ns1 ./pm_nl_ctl dump" \ +"id 1 flags 10.0.1.1 +id 3 flags signal,backup 10.0.1.3" "dump addrs after del" + +ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.3 +check "ip netns exec $ns1 ./pm_nl_ctl get 4" "" "duplicate addr" + +ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.4 id 10 flags signal +check "ip netns exec $ns1 ./pm_nl_ctl get 4" "id 4 flags signal 10.0.1.4" "id addr increment" + +for i in `seq 5 9`; do + ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.$i flags signal >/dev/null 2>&1 +done +check "ip netns exec $ns1 ./pm_nl_ctl get 9" "id 9 flags signal 10.0.1.9" "hard addr limit" +check "ip netns exec $ns1 ./pm_nl_ctl get 10" "" "above hard addr limit" + +for i in `seq 9 256`; do + ip netns exec $ns1 ./pm_nl_ctl del $i + ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.9 +done +check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags 10.0.1.1 +id 3 flags signal,backup 10.0.1.3 +id 4 flags signal 10.0.1.4 +id 5 flags signal 10.0.1.5 +id 6 flags signal 10.0.1.6 +id 7 flags signal 10.0.1.7 +id 8 flags signal 10.0.1.8" "id limit" + +ip netns exec $ns1 ./pm_nl_ctl flush +check "ip netns exec $ns1 ./pm_nl_ctl dump" "" "flush addrs" + +ip netns exec $ns1 ./pm_nl_ctl limits 9 1 +check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0 +subflows 0" "rcv addrs above hard limit" + +ip netns exec $ns1 ./pm_nl_ctl limits 1 9 +check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0 +subflows 0" "subflows above hard limit" + +ip netns exec $ns1 ./pm_nl_ctl limits 8 8 +check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 8 +subflows 8" "set limits" + +exit $ret diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c new file mode 100644 index 000000000000..b24a2f17d415 --- /dev/null +++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c @@ -0,0 +1,616 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <errno.h> +#include <error.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include <sys/socket.h> +#include <sys/types.h> + +#include <arpa/inet.h> +#include <net/if.h> + +#include <linux/rtnetlink.h> +#include <linux/genetlink.h> + +#include "linux/mptcp.h" + +#ifndef MPTCP_PM_NAME +#define MPTCP_PM_NAME "mptcp_pm" +#endif + +static void syntax(char *argv[]) +{ + fprintf(stderr, "%s add|get|del|flush|dump|accept [<args>]\n", argv[0]); + fprintf(stderr, "\tadd [flags signal|subflow|backup] [id <nr>] [dev <name>] <ip>\n"); + fprintf(stderr, "\tdel <id>\n"); + fprintf(stderr, "\tget <id>\n"); + fprintf(stderr, "\tflush\n"); + fprintf(stderr, "\tdump\n"); + fprintf(stderr, "\tlimits [<rcv addr max> <subflow max>]\n"); + exit(0); +} + +static int init_genl_req(char *data, int family, int cmd, int version) +{ + struct nlmsghdr *nh = (void *)data; + struct genlmsghdr *gh; + int off = 0; + + nh->nlmsg_type = family; + nh->nlmsg_flags = NLM_F_REQUEST; + nh->nlmsg_len = NLMSG_LENGTH(GENL_HDRLEN); + off += NLMSG_ALIGN(sizeof(*nh)); + + gh = (void *)(data + off); + gh->cmd = cmd; + gh->version = version; + off += NLMSG_ALIGN(sizeof(*gh)); + return off; +} + +static void nl_error(struct nlmsghdr *nh) +{ + struct nlmsgerr *err = (struct nlmsgerr *)NLMSG_DATA(nh); + int len = nh->nlmsg_len - sizeof(*nh); + uint32_t off; + + if (len < sizeof(struct nlmsgerr)) + error(1, 0, "netlink error message truncated %d min %ld", len, + sizeof(struct nlmsgerr)); + + if (!err->error) { + /* check messages from kernel */ + struct rtattr *attrs = (struct rtattr *)NLMSG_DATA(nh); + + while (RTA_OK(attrs, len)) { + if (attrs->rta_type == NLMSGERR_ATTR_MSG) + fprintf(stderr, "netlink ext ack msg: %s\n", + (char *)RTA_DATA(attrs)); + if (attrs->rta_type == NLMSGERR_ATTR_OFFS) { + memcpy(&off, RTA_DATA(attrs), 4); + fprintf(stderr, "netlink err off %d\n", + (int)off); + } + attrs = RTA_NEXT(attrs, len); + } + } else { + fprintf(stderr, "netlink error %d", err->error); + } +} + +/* do a netlink command and, if max > 0, fetch the reply */ +static int do_nl_req(int fd, struct nlmsghdr *nh, int len, int max) +{ + struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK }; + socklen_t addr_len; + void *data = nh; + int rem, ret; + int err = 0; + + nh->nlmsg_len = len; + ret = sendto(fd, data, len, 0, (void *)&nladdr, sizeof(nladdr)); + if (ret != len) + error(1, errno, "send netlink: %uB != %uB\n", ret, len); + if (max == 0) + return 0; + + addr_len = sizeof(nladdr); + rem = ret = recvfrom(fd, data, max, 0, (void *)&nladdr, &addr_len); + if (ret < 0) + error(1, errno, "recv netlink: %uB\n", ret); + + /* Beware: the NLMSG_NEXT macro updates the 'rem' argument */ + for (; NLMSG_OK(nh, rem); nh = NLMSG_NEXT(nh, rem)) { + if (nh->nlmsg_type == NLMSG_ERROR) { + nl_error(nh); + err = 1; + } + } + if (err) + error(1, 0, "bailing out due to netlink error[s]"); + return ret; +} + +static int genl_parse_getfamily(struct nlmsghdr *nlh) +{ + struct genlmsghdr *ghdr = NLMSG_DATA(nlh); + int len = nlh->nlmsg_len; + struct rtattr *attrs; + + if (nlh->nlmsg_type != GENL_ID_CTRL) + error(1, errno, "Not a controller message, len=%d type=0x%x\n", + nlh->nlmsg_len, nlh->nlmsg_type); + + len -= NLMSG_LENGTH(GENL_HDRLEN); + + if (len < 0) + error(1, errno, "wrong controller message len %d\n", len); + + if (ghdr->cmd != CTRL_CMD_NEWFAMILY) + error(1, errno, "Unknown controller command %d\n", ghdr->cmd); + + attrs = (struct rtattr *) ((char *) ghdr + GENL_HDRLEN); + while (RTA_OK(attrs, len)) { + if (attrs->rta_type == CTRL_ATTR_FAMILY_ID) + return *(__u16 *)RTA_DATA(attrs); + attrs = RTA_NEXT(attrs, len); + } + + error(1, errno, "can't find CTRL_ATTR_FAMILY_ID attr"); + return -1; +} + +static int resolve_mptcp_pm_netlink(int fd) +{ + char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + + NLMSG_ALIGN(sizeof(struct genlmsghdr)) + + 1024]; + struct nlmsghdr *nh; + struct rtattr *rta; + int namelen; + int off = 0; + + memset(data, 0, sizeof(data)); + nh = (void *)data; + off = init_genl_req(data, GENL_ID_CTRL, CTRL_CMD_GETFAMILY, 0); + + rta = (void *)(data + off); + namelen = strlen(MPTCP_PM_NAME) + 1; + rta->rta_type = CTRL_ATTR_FAMILY_NAME; + rta->rta_len = RTA_LENGTH(namelen); + memcpy(RTA_DATA(rta), MPTCP_PM_NAME, namelen); + off += NLMSG_ALIGN(rta->rta_len); + + do_nl_req(fd, nh, off, sizeof(data)); + return genl_parse_getfamily((void *)data); +} + +int add_addr(int fd, int pm_family, int argc, char *argv[]) +{ + char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + + NLMSG_ALIGN(sizeof(struct genlmsghdr)) + + 1024]; + struct rtattr *rta, *nest; + struct nlmsghdr *nh; + u_int16_t family; + u_int32_t flags; + int nest_start; + u_int8_t id; + int off = 0; + int arg; + + memset(data, 0, sizeof(data)); + nh = (void *)data; + off = init_genl_req(data, pm_family, MPTCP_PM_CMD_ADD_ADDR, + MPTCP_PM_VER); + + if (argc < 3) + syntax(argv); + + nest_start = off; + nest = (void *)(data + off); + nest->rta_type = NLA_F_NESTED | MPTCP_PM_ATTR_ADDR; + nest->rta_len = RTA_LENGTH(0); + off += NLMSG_ALIGN(nest->rta_len); + + /* addr data */ + rta = (void *)(data + off); + if (inet_pton(AF_INET, argv[2], RTA_DATA(rta))) { + family = AF_INET; + rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR4; + rta->rta_len = RTA_LENGTH(4); + } else if (inet_pton(AF_INET6, argv[2], RTA_DATA(rta))) { + family = AF_INET6; + rta->rta_type = MPTCP_PM_ADDR_ATTR_ADDR6; + rta->rta_len = RTA_LENGTH(16); + } else + error(1, errno, "can't parse ip %s", argv[2]); + off += NLMSG_ALIGN(rta->rta_len); + + /* family */ + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_FAMILY; + rta->rta_len = RTA_LENGTH(2); + memcpy(RTA_DATA(rta), &family, 2); + off += NLMSG_ALIGN(rta->rta_len); + + for (arg = 3; arg < argc; arg++) { + if (!strcmp(argv[arg], "flags")) { + char *tok, *str; + + /* flags */ + flags = 0; + if (++arg >= argc) + error(1, 0, " missing flags value"); + + /* do not support flag list yet */ + for (str = argv[arg]; (tok = strtok(str, ",")); + str = NULL) { + if (!strcmp(tok, "subflow")) + flags |= MPTCP_PM_ADDR_FLAG_SUBFLOW; + else if (!strcmp(tok, "signal")) + flags |= MPTCP_PM_ADDR_FLAG_SIGNAL; + else if (!strcmp(tok, "backup")) + flags |= MPTCP_PM_ADDR_FLAG_BACKUP; + else + error(1, errno, + "unknown flag %s", argv[arg]); + } + + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_FLAGS; + rta->rta_len = RTA_LENGTH(4); + memcpy(RTA_DATA(rta), &flags, 4); + off += NLMSG_ALIGN(rta->rta_len); + } else if (!strcmp(argv[arg], "id")) { + if (++arg >= argc) + error(1, 0, " missing id value"); + + id = atoi(argv[arg]); + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_ID; + rta->rta_len = RTA_LENGTH(1); + memcpy(RTA_DATA(rta), &id, 1); + off += NLMSG_ALIGN(rta->rta_len); + } else if (!strcmp(argv[arg], "dev")) { + int32_t ifindex; + + if (++arg >= argc) + error(1, 0, " missing dev name"); + + ifindex = if_nametoindex(argv[arg]); + if (!ifindex) + error(1, errno, "unknown device %s", argv[arg]); + + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_IF_IDX; + rta->rta_len = RTA_LENGTH(4); + memcpy(RTA_DATA(rta), &ifindex, 4); + off += NLMSG_ALIGN(rta->rta_len); + } else + error(1, 0, "unknown keyword %s", argv[arg]); + } + nest->rta_len = off - nest_start; + + do_nl_req(fd, nh, off, 0); + return 0; +} + +int del_addr(int fd, int pm_family, int argc, char *argv[]) +{ + char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + + NLMSG_ALIGN(sizeof(struct genlmsghdr)) + + 1024]; + struct rtattr *rta, *nest; + struct nlmsghdr *nh; + int nest_start; + u_int8_t id; + int off = 0; + + memset(data, 0, sizeof(data)); + nh = (void *)data; + off = init_genl_req(data, pm_family, MPTCP_PM_CMD_DEL_ADDR, + MPTCP_PM_VER); + + /* the only argument is the address id */ + if (argc != 3) + syntax(argv); + + id = atoi(argv[2]); + + nest_start = off; + nest = (void *)(data + off); + nest->rta_type = NLA_F_NESTED | MPTCP_PM_ATTR_ADDR; + nest->rta_len = RTA_LENGTH(0); + off += NLMSG_ALIGN(nest->rta_len); + + /* build a dummy addr with only the ID set */ + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_ID; + rta->rta_len = RTA_LENGTH(1); + memcpy(RTA_DATA(rta), &id, 1); + off += NLMSG_ALIGN(rta->rta_len); + nest->rta_len = off - nest_start; + + do_nl_req(fd, nh, off, 0); + return 0; +} + +static void print_addr(struct rtattr *attrs, int len) +{ + uint16_t family = 0; + char str[1024]; + uint32_t flags; + uint8_t id; + + while (RTA_OK(attrs, len)) { + if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_FAMILY) + memcpy(&family, RTA_DATA(attrs), 2); + if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_ADDR4) { + if (family != AF_INET) + error(1, errno, "wrong IP (v4) for family %d", + family); + inet_ntop(AF_INET, RTA_DATA(attrs), str, sizeof(str)); + printf("%s", str); + } + if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_ADDR6) { + if (family != AF_INET6) + error(1, errno, "wrong IP (v6) for family %d", + family); + inet_ntop(AF_INET6, RTA_DATA(attrs), str, sizeof(str)); + printf("%s", str); + } + if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_ID) { + memcpy(&id, RTA_DATA(attrs), 1); + printf("id %d ", id); + } + if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_FLAGS) { + memcpy(&flags, RTA_DATA(attrs), 4); + + printf("flags "); + if (flags & MPTCP_PM_ADDR_FLAG_SIGNAL) { + printf("signal"); + flags &= ~MPTCP_PM_ADDR_FLAG_SIGNAL; + if (flags) + printf(","); + } + + if (flags & MPTCP_PM_ADDR_FLAG_SUBFLOW) { + printf("subflow"); + flags &= ~MPTCP_PM_ADDR_FLAG_SUBFLOW; + if (flags) + printf(","); + } + + if (flags & MPTCP_PM_ADDR_FLAG_BACKUP) { + printf("backup"); + flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP; + if (flags) + printf(","); + } + + /* bump unknown flags, if any */ + if (flags) + printf("0x%x", flags); + printf(" "); + } + if (attrs->rta_type == MPTCP_PM_ADDR_ATTR_IF_IDX) { + char name[IF_NAMESIZE], *ret; + int32_t ifindex; + + memcpy(&ifindex, RTA_DATA(attrs), 4); + ret = if_indextoname(ifindex, name); + if (ret) + printf("dev %s ", ret); + else + printf("dev unknown/%d", ifindex); + } + + attrs = RTA_NEXT(attrs, len); + } + printf("\n"); +} + +static void print_addrs(struct nlmsghdr *nh, int pm_family, int total_len) +{ + struct rtattr *attrs; + + for (; NLMSG_OK(nh, total_len); nh = NLMSG_NEXT(nh, total_len)) { + int len = nh->nlmsg_len; + + if (nh->nlmsg_type == NLMSG_DONE) + break; + if (nh->nlmsg_type == NLMSG_ERROR) + nl_error(nh); + if (nh->nlmsg_type != pm_family) + continue; + + len -= NLMSG_LENGTH(GENL_HDRLEN); + attrs = (struct rtattr *) ((char *) NLMSG_DATA(nh) + + GENL_HDRLEN); + while (RTA_OK(attrs, len)) { + if (attrs->rta_type == + (MPTCP_PM_ATTR_ADDR | NLA_F_NESTED)) + print_addr((void *)RTA_DATA(attrs), + attrs->rta_len); + attrs = RTA_NEXT(attrs, len); + } + } +} + +int get_addr(int fd, int pm_family, int argc, char *argv[]) +{ + char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + + NLMSG_ALIGN(sizeof(struct genlmsghdr)) + + 1024]; + struct rtattr *rta, *nest; + struct nlmsghdr *nh; + int nest_start; + u_int8_t id; + int off = 0; + + memset(data, 0, sizeof(data)); + nh = (void *)data; + off = init_genl_req(data, pm_family, MPTCP_PM_CMD_GET_ADDR, + MPTCP_PM_VER); + + /* the only argument is the address id */ + if (argc != 3) + syntax(argv); + + id = atoi(argv[2]); + + nest_start = off; + nest = (void *)(data + off); + nest->rta_type = NLA_F_NESTED | MPTCP_PM_ATTR_ADDR; + nest->rta_len = RTA_LENGTH(0); + off += NLMSG_ALIGN(nest->rta_len); + + /* build a dummy addr with only the ID set */ + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_ID; + rta->rta_len = RTA_LENGTH(1); + memcpy(RTA_DATA(rta), &id, 1); + off += NLMSG_ALIGN(rta->rta_len); + nest->rta_len = off - nest_start; + + print_addrs(nh, pm_family, do_nl_req(fd, nh, off, sizeof(data))); + return 0; +} + +int dump_addrs(int fd, int pm_family, int argc, char *argv[]) +{ + char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + + NLMSG_ALIGN(sizeof(struct genlmsghdr)) + + 1024]; + pid_t pid = getpid(); + struct nlmsghdr *nh; + int off = 0; + + memset(data, 0, sizeof(data)); + nh = (void *)data; + off = init_genl_req(data, pm_family, MPTCP_PM_CMD_GET_ADDR, + MPTCP_PM_VER); + nh->nlmsg_flags |= NLM_F_DUMP; + nh->nlmsg_seq = 1; + nh->nlmsg_pid = pid; + nh->nlmsg_len = off; + + print_addrs(nh, pm_family, do_nl_req(fd, nh, off, sizeof(data))); + return 0; +} + +int flush_addrs(int fd, int pm_family, int argc, char *argv[]) +{ + char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + + NLMSG_ALIGN(sizeof(struct genlmsghdr)) + + 1024]; + struct nlmsghdr *nh; + int off = 0; + + memset(data, 0, sizeof(data)); + nh = (void *)data; + off = init_genl_req(data, pm_family, MPTCP_PM_CMD_FLUSH_ADDRS, + MPTCP_PM_VER); + + do_nl_req(fd, nh, off, 0); + return 0; +} + +static void print_limits(struct nlmsghdr *nh, int pm_family, int total_len) +{ + struct rtattr *attrs; + uint32_t max; + + for (; NLMSG_OK(nh, total_len); nh = NLMSG_NEXT(nh, total_len)) { + int len = nh->nlmsg_len; + + if (nh->nlmsg_type == NLMSG_DONE) + break; + if (nh->nlmsg_type == NLMSG_ERROR) + nl_error(nh); + if (nh->nlmsg_type != pm_family) + continue; + + len -= NLMSG_LENGTH(GENL_HDRLEN); + attrs = (struct rtattr *) ((char *) NLMSG_DATA(nh) + + GENL_HDRLEN); + while (RTA_OK(attrs, len)) { + int type = attrs->rta_type; + + if (type != MPTCP_PM_ATTR_RCV_ADD_ADDRS && + type != MPTCP_PM_ATTR_SUBFLOWS) + goto next; + + memcpy(&max, RTA_DATA(attrs), 4); + printf("%s %u\n", type == MPTCP_PM_ATTR_SUBFLOWS ? + "subflows" : "accept", max); + +next: + attrs = RTA_NEXT(attrs, len); + } + } +} + +int get_set_limits(int fd, int pm_family, int argc, char *argv[]) +{ + char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + + NLMSG_ALIGN(sizeof(struct genlmsghdr)) + + 1024]; + uint32_t rcv_addr = 0, subflows = 0; + int cmd, len = sizeof(data); + struct nlmsghdr *nh; + int off = 0; + + /* limit */ + if (argc == 4) { + rcv_addr = atoi(argv[2]); + subflows = atoi(argv[3]); + cmd = MPTCP_PM_CMD_SET_LIMITS; + } else { + cmd = MPTCP_PM_CMD_GET_LIMITS; + } + + memset(data, 0, sizeof(data)); + nh = (void *)data; + off = init_genl_req(data, pm_family, cmd, MPTCP_PM_VER); + + /* limit */ + if (cmd == MPTCP_PM_CMD_SET_LIMITS) { + struct rtattr *rta = (void *)(data + off); + + rta->rta_type = MPTCP_PM_ATTR_RCV_ADD_ADDRS; + rta->rta_len = RTA_LENGTH(4); + memcpy(RTA_DATA(rta), &rcv_addr, 4); + off += NLMSG_ALIGN(rta->rta_len); + + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ATTR_SUBFLOWS; + rta->rta_len = RTA_LENGTH(4); + memcpy(RTA_DATA(rta), &subflows, 4); + off += NLMSG_ALIGN(rta->rta_len); + + /* do not expect a reply */ + len = 0; + } + + len = do_nl_req(fd, nh, off, len); + if (cmd == MPTCP_PM_CMD_GET_LIMITS) + print_limits(nh, pm_family, len); + return 0; +} + +int main(int argc, char *argv[]) +{ + int fd, pm_family; + + if (argc < 2) + syntax(argv); + + fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_GENERIC); + if (fd == -1) + error(1, errno, "socket netlink"); + + pm_family = resolve_mptcp_pm_netlink(fd); + + if (!strcmp(argv[1], "add")) + return add_addr(fd, pm_family, argc, argv); + else if (!strcmp(argv[1], "del")) + return del_addr(fd, pm_family, argc, argv); + else if (!strcmp(argv[1], "flush")) + return flush_addrs(fd, pm_family, argc, argv); + else if (!strcmp(argv[1], "get")) + return get_addr(fd, pm_family, argc, argv); + else if (!strcmp(argv[1], "dump")) + return dump_addrs(fd, pm_family, argc, argv); + else if (!strcmp(argv[1], "limits")) + return get_set_limits(fd, pm_family, argc, argv); + + fprintf(stderr, "unknown sub-command: %s", argv[1]); + syntax(argv); + return 0; +} diff --git a/tools/testing/selftests/net/reuseaddr_ports_exhausted.c b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c new file mode 100644 index 000000000000..7b01b7c2ec10 --- /dev/null +++ b/tools/testing/selftests/net/reuseaddr_ports_exhausted.c @@ -0,0 +1,162 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Check if we can fully utilize 4-tuples for connect(). + * + * Rules to bind sockets to the same port when all ephemeral ports are + * exhausted. + * + * 1. if there are TCP_LISTEN sockets on the port, fail to bind. + * 2. if there are sockets without SO_REUSEADDR, fail to bind. + * 3. if SO_REUSEADDR is disabled, fail to bind. + * 4. if SO_REUSEADDR is enabled and SO_REUSEPORT is disabled, + * succeed to bind. + * 5. if SO_REUSEADDR and SO_REUSEPORT are enabled and + * there is no socket having the both options and the same EUID, + * succeed to bind. + * 6. fail to bind. + * + * Author: Kuniyuki Iwashima <kuniyu@amazon.co.jp> + */ +#include <arpa/inet.h> +#include <netinet/in.h> +#include <sys/socket.h> +#include <sys/types.h> +#include <unistd.h> +#include "../kselftest_harness.h" + +struct reuse_opts { + int reuseaddr[2]; + int reuseport[2]; +}; + +struct reuse_opts unreusable_opts[12] = { + {0, 0, 0, 0}, + {0, 0, 0, 1}, + {0, 0, 1, 0}, + {0, 0, 1, 1}, + {0, 1, 0, 0}, + {0, 1, 0, 1}, + {0, 1, 1, 0}, + {0, 1, 1, 1}, + {1, 0, 0, 0}, + {1, 0, 0, 1}, + {1, 0, 1, 0}, + {1, 0, 1, 1}, +}; + +struct reuse_opts reusable_opts[4] = { + {1, 1, 0, 0}, + {1, 1, 0, 1}, + {1, 1, 1, 0}, + {1, 1, 1, 1}, +}; + +int bind_port(struct __test_metadata *_metadata, int reuseaddr, int reuseport) +{ + struct sockaddr_in local_addr; + int len = sizeof(local_addr); + int fd, ret; + + fd = socket(AF_INET, SOCK_STREAM, 0); + ASSERT_NE(-1, fd) TH_LOG("failed to open socket."); + + ret = setsockopt(fd, SOL_SOCKET, SO_REUSEADDR, &reuseaddr, sizeof(int)); + ASSERT_EQ(0, ret) TH_LOG("failed to setsockopt: SO_REUSEADDR."); + + ret = setsockopt(fd, SOL_SOCKET, SO_REUSEPORT, &reuseport, sizeof(int)); + ASSERT_EQ(0, ret) TH_LOG("failed to setsockopt: SO_REUSEPORT."); + + local_addr.sin_family = AF_INET; + local_addr.sin_addr.s_addr = inet_addr("127.0.0.1"); + local_addr.sin_port = 0; + + if (bind(fd, (struct sockaddr *)&local_addr, len) == -1) { + close(fd); + return -1; + } + + return fd; +} + +TEST(reuseaddr_ports_exhausted_unreusable) +{ + struct reuse_opts *opts; + int i, j, fd[2]; + + for (i = 0; i < 12; i++) { + opts = &unreusable_opts[i]; + + for (j = 0; j < 2; j++) + fd[j] = bind_port(_metadata, opts->reuseaddr[j], opts->reuseport[j]); + + ASSERT_NE(-1, fd[0]) TH_LOG("failed to bind."); + EXPECT_EQ(-1, fd[1]) TH_LOG("should fail to bind."); + + for (j = 0; j < 2; j++) + if (fd[j] != -1) + close(fd[j]); + } +} + +TEST(reuseaddr_ports_exhausted_reusable_same_euid) +{ + struct reuse_opts *opts; + int i, j, fd[2]; + + for (i = 0; i < 4; i++) { + opts = &reusable_opts[i]; + + for (j = 0; j < 2; j++) + fd[j] = bind_port(_metadata, opts->reuseaddr[j], opts->reuseport[j]); + + ASSERT_NE(-1, fd[0]) TH_LOG("failed to bind."); + + if (opts->reuseport[0] && opts->reuseport[1]) { + EXPECT_EQ(-1, fd[1]) TH_LOG("should fail to bind because both sockets succeed to be listened."); + } else { + EXPECT_NE(-1, fd[1]) TH_LOG("should succeed to bind to connect to different destinations."); + } + + for (j = 0; j < 2; j++) + if (fd[j] != -1) + close(fd[j]); + } +} + +TEST(reuseaddr_ports_exhausted_reusable_different_euid) +{ + struct reuse_opts *opts; + int i, j, ret, fd[2]; + uid_t euid[2] = {10, 20}; + + for (i = 0; i < 4; i++) { + opts = &reusable_opts[i]; + + for (j = 0; j < 2; j++) { + ret = seteuid(euid[j]); + ASSERT_EQ(0, ret) TH_LOG("failed to seteuid: %d.", euid[j]); + + fd[j] = bind_port(_metadata, opts->reuseaddr[j], opts->reuseport[j]); + + ret = seteuid(0); + ASSERT_EQ(0, ret) TH_LOG("failed to seteuid: 0."); + } + + ASSERT_NE(-1, fd[0]) TH_LOG("failed to bind."); + EXPECT_NE(-1, fd[1]) TH_LOG("should succeed to bind because one socket can be bound in each euid."); + + if (fd[1] != -1) { + ret = listen(fd[0], 5); + ASSERT_EQ(0, ret) TH_LOG("failed to listen."); + + ret = listen(fd[1], 5); + EXPECT_EQ(-1, ret) TH_LOG("should fail to listen because only one uid reserves the port in TCP_LISTEN."); + } + + for (j = 0; j < 2; j++) + if (fd[j] != -1) + close(fd[j]); + } +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/reuseaddr_ports_exhausted.sh b/tools/testing/selftests/net/reuseaddr_ports_exhausted.sh new file mode 100755 index 000000000000..20e3a2913d06 --- /dev/null +++ b/tools/testing/selftests/net/reuseaddr_ports_exhausted.sh @@ -0,0 +1,35 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Run tests when all ephemeral ports are exhausted. +# +# Author: Kuniyuki Iwashima <kuniyu@amazon.co.jp> + +set +x +set -e + +readonly NETNS="ns-$(mktemp -u XXXXXX)" + +setup() { + ip netns add "${NETNS}" + ip -netns "${NETNS}" link set lo up + ip netns exec "${NETNS}" \ + sysctl -w net.ipv4.ip_local_port_range="32768 32768" \ + > /dev/null 2>&1 + ip netns exec "${NETNS}" \ + sysctl -w net.ipv4.ip_autobind_reuse=1 > /dev/null 2>&1 +} + +cleanup() { + ip netns del "${NETNS}" +} + +trap cleanup EXIT +setup + +do_test() { + ip netns exec "${NETNS}" ./reuseaddr_ports_exhausted +} + +do_test +echo "tests done" diff --git a/tools/testing/selftests/net/rxtimestamp.c b/tools/testing/selftests/net/rxtimestamp.c new file mode 100644 index 000000000000..6dee9e636a95 --- /dev/null +++ b/tools/testing/selftests/net/rxtimestamp.c @@ -0,0 +1,390 @@ +#include <errno.h> +#include <error.h> +#include <getopt.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include <sys/time.h> +#include <sys/socket.h> +#include <sys/select.h> +#include <sys/ioctl.h> +#include <arpa/inet.h> +#include <net/if.h> + +#include <asm/types.h> +#include <linux/net_tstamp.h> +#include <linux/errqueue.h> + +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) + +struct options { + int so_timestamp; + int so_timestampns; + int so_timestamping; +}; + +struct tstamps { + bool tstamp; + bool tstampns; + bool swtstamp; + bool hwtstamp; +}; + +struct socket_type { + char *friendly_name; + int type; + int protocol; + bool enabled; +}; + +struct test_case { + struct options sockopt; + struct tstamps expected; + bool enabled; +}; + +struct sof_flag { + int mask; + char *name; +}; + +static struct sof_flag sof_flags[] = { +#define SOF_FLAG(f) { f, #f } + SOF_FLAG(SOF_TIMESTAMPING_SOFTWARE), + SOF_FLAG(SOF_TIMESTAMPING_RX_SOFTWARE), + SOF_FLAG(SOF_TIMESTAMPING_RX_HARDWARE), +}; + +static struct socket_type socket_types[] = { + { "ip", SOCK_RAW, IPPROTO_EGP }, + { "udp", SOCK_DGRAM, IPPROTO_UDP }, + { "tcp", SOCK_STREAM, IPPROTO_TCP }, +}; + +static struct test_case test_cases[] = { + { {}, {} }, + { + { so_timestamp: 1 }, + { tstamp: true } + }, + { + { so_timestampns: 1 }, + { tstampns: true } + }, + { + { so_timestamp: 1, so_timestampns: 1 }, + { tstampns: true } + }, + { + { so_timestamping: SOF_TIMESTAMPING_RX_SOFTWARE }, + {} + }, + { + /* Loopback device does not support hw timestamps. */ + { so_timestamping: SOF_TIMESTAMPING_RX_HARDWARE }, + {} + }, + { + { so_timestamping: SOF_TIMESTAMPING_SOFTWARE }, + {} + }, + { + { so_timestamping: SOF_TIMESTAMPING_RX_SOFTWARE + | SOF_TIMESTAMPING_RX_HARDWARE }, + {} + }, + { + { so_timestamping: SOF_TIMESTAMPING_SOFTWARE + | SOF_TIMESTAMPING_RX_SOFTWARE }, + { swtstamp: true } + }, + { + { so_timestamp: 1, so_timestamping: SOF_TIMESTAMPING_SOFTWARE + | SOF_TIMESTAMPING_RX_SOFTWARE }, + { tstamp: true, swtstamp: true } + }, +}; + +static struct option long_options[] = { + { "list_tests", no_argument, 0, 'l' }, + { "test_num", required_argument, 0, 'n' }, + { "op_size", required_argument, 0, 's' }, + { "tcp", no_argument, 0, 't' }, + { "udp", no_argument, 0, 'u' }, + { "ip", no_argument, 0, 'i' }, +}; + +static int next_port = 19999; +static int op_size = 10 * 1024; + +void print_test_case(struct test_case *t) +{ + int f = 0; + + printf("sockopts {"); + if (t->sockopt.so_timestamp) + printf(" SO_TIMESTAMP "); + if (t->sockopt.so_timestampns) + printf(" SO_TIMESTAMPNS "); + if (t->sockopt.so_timestamping) { + printf(" SO_TIMESTAMPING: {"); + for (f = 0; f < ARRAY_SIZE(sof_flags); f++) + if (t->sockopt.so_timestamping & sof_flags[f].mask) + printf(" %s |", sof_flags[f].name); + printf("}"); + } + printf("} expected cmsgs: {"); + if (t->expected.tstamp) + printf(" SCM_TIMESTAMP "); + if (t->expected.tstampns) + printf(" SCM_TIMESTAMPNS "); + if (t->expected.swtstamp || t->expected.hwtstamp) { + printf(" SCM_TIMESTAMPING {"); + if (t->expected.swtstamp) + printf("0"); + if (t->expected.swtstamp && t->expected.hwtstamp) + printf(","); + if (t->expected.hwtstamp) + printf("2"); + printf("}"); + } + printf("}\n"); +} + +void do_send(int src) +{ + int r; + char *buf = malloc(op_size); + + memset(buf, 'z', op_size); + r = write(src, buf, op_size); + if (r < 0) + error(1, errno, "Failed to sendmsg"); + + free(buf); +} + +bool do_recv(int rcv, int read_size, struct tstamps expected) +{ + const int CMSG_SIZE = 1024; + + struct scm_timestamping *ts; + struct tstamps actual = {}; + char cmsg_buf[CMSG_SIZE]; + struct iovec recv_iov; + struct cmsghdr *cmsg; + bool failed = false; + struct msghdr hdr; + int flags = 0; + int r; + + memset(&hdr, 0, sizeof(hdr)); + hdr.msg_iov = &recv_iov; + hdr.msg_iovlen = 1; + recv_iov.iov_base = malloc(read_size); + recv_iov.iov_len = read_size; + + hdr.msg_control = cmsg_buf; + hdr.msg_controllen = sizeof(cmsg_buf); + + r = recvmsg(rcv, &hdr, flags); + if (r < 0) + error(1, errno, "Failed to recvmsg"); + if (r != read_size) + error(1, 0, "Only received %d bytes of payload.", r); + + if (hdr.msg_flags & (MSG_TRUNC | MSG_CTRUNC)) + error(1, 0, "Message was truncated."); + + for (cmsg = CMSG_FIRSTHDR(&hdr); cmsg != NULL; + cmsg = CMSG_NXTHDR(&hdr, cmsg)) { + if (cmsg->cmsg_level != SOL_SOCKET) + error(1, 0, "Unexpected cmsg_level %d", + cmsg->cmsg_level); + switch (cmsg->cmsg_type) { + case SCM_TIMESTAMP: + actual.tstamp = true; + break; + case SCM_TIMESTAMPNS: + actual.tstampns = true; + break; + case SCM_TIMESTAMPING: + ts = (struct scm_timestamping *)CMSG_DATA(cmsg); + actual.swtstamp = !!ts->ts[0].tv_sec; + if (ts->ts[1].tv_sec != 0) + error(0, 0, "ts[1] should not be set."); + actual.hwtstamp = !!ts->ts[2].tv_sec; + break; + default: + error(1, 0, "Unexpected cmsg_type %d", cmsg->cmsg_type); + } + } + +#define VALIDATE(field) \ + do { \ + if (expected.field != actual.field) { \ + if (expected.field) \ + error(0, 0, "Expected " #field " to be set."); \ + else \ + error(0, 0, \ + "Expected " #field " to not be set."); \ + failed = true; \ + } \ + } while (0) + + VALIDATE(tstamp); + VALIDATE(tstampns); + VALIDATE(swtstamp); + VALIDATE(hwtstamp); +#undef VALIDATE + + free(recv_iov.iov_base); + + return failed; +} + +void config_so_flags(int rcv, struct options o) +{ + int on = 1; + + if (setsockopt(rcv, SOL_SOCKET, SO_REUSEADDR, &on, sizeof(on)) < 0) + error(1, errno, "Failed to enable SO_REUSEADDR"); + + if (o.so_timestamp && + setsockopt(rcv, SOL_SOCKET, SO_TIMESTAMP, + &o.so_timestamp, sizeof(o.so_timestamp)) < 0) + error(1, errno, "Failed to enable SO_TIMESTAMP"); + + if (o.so_timestampns && + setsockopt(rcv, SOL_SOCKET, SO_TIMESTAMPNS, + &o.so_timestampns, sizeof(o.so_timestampns)) < 0) + error(1, errno, "Failed to enable SO_TIMESTAMPNS"); + + if (o.so_timestamping && + setsockopt(rcv, SOL_SOCKET, SO_TIMESTAMPING, + &o.so_timestamping, sizeof(o.so_timestamping)) < 0) + error(1, errno, "Failed to set SO_TIMESTAMPING"); +} + +bool run_test_case(struct socket_type s, struct test_case t) +{ + int port = (s.type == SOCK_RAW) ? 0 : next_port++; + int read_size = op_size; + struct sockaddr_in addr; + bool failed = false; + int src, dst, rcv; + + src = socket(AF_INET, s.type, s.protocol); + if (src < 0) + error(1, errno, "Failed to open src socket"); + + dst = socket(AF_INET, s.type, s.protocol); + if (dst < 0) + error(1, errno, "Failed to open dst socket"); + + memset(&addr, 0, sizeof(addr)); + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = htonl(INADDR_LOOPBACK); + addr.sin_port = htons(port); + + if (bind(dst, (struct sockaddr *)&addr, sizeof(addr)) < 0) + error(1, errno, "Failed to bind to port %d", port); + + if (s.type == SOCK_STREAM && (listen(dst, 1) < 0)) + error(1, errno, "Failed to listen"); + + if (connect(src, (struct sockaddr *)&addr, sizeof(addr)) < 0) + error(1, errno, "Failed to connect"); + + if (s.type == SOCK_STREAM) { + rcv = accept(dst, NULL, NULL); + if (rcv < 0) + error(1, errno, "Failed to accept"); + close(dst); + } else { + rcv = dst; + } + + config_so_flags(rcv, t.sockopt); + usleep(20000); /* setsockopt for SO_TIMESTAMPING is asynchronous */ + do_send(src); + + if (s.type == SOCK_RAW) + read_size += 20; /* for IP header */ + failed = do_recv(rcv, read_size, t.expected); + + close(rcv); + close(src); + + return failed; +} + +int main(int argc, char **argv) +{ + bool all_protocols = true; + bool all_tests = true; + int arg_index = 0; + int failures = 0; + int s, t; + char opt; + + while ((opt = getopt_long(argc, argv, "", long_options, + &arg_index)) != -1) { + switch (opt) { + case 'l': + for (t = 0; t < ARRAY_SIZE(test_cases); t++) { + printf("%d\t", t); + print_test_case(&test_cases[t]); + } + return 0; + case 'n': + t = atoi(optarg); + if (t >= ARRAY_SIZE(test_cases)) + error(1, 0, "Invalid test case: %d", t); + all_tests = false; + test_cases[t].enabled = true; + break; + case 's': + op_size = atoi(optarg); + break; + case 't': + all_protocols = false; + socket_types[2].enabled = true; + break; + case 'u': + all_protocols = false; + socket_types[1].enabled = true; + break; + case 'i': + all_protocols = false; + socket_types[0].enabled = true; + break; + default: + error(1, 0, "Failed to parse parameters."); + } + } + + for (s = 0; s < ARRAY_SIZE(socket_types); s++) { + if (!all_protocols && !socket_types[s].enabled) + continue; + + printf("Testing %s...\n", socket_types[s].friendly_name); + for (t = 0; t < ARRAY_SIZE(test_cases); t++) { + if (!all_tests && !test_cases[t].enabled) + continue; + + printf("Starting testcase %d...\n", t); + if (run_test_case(socket_types[s], test_cases[t])) { + failures++; + printf("FAILURE in test case "); + print_test_case(&test_cases[t]); + } + } + } + if (!failures) + printf("PASSED.\n"); + return failures; +} diff --git a/tools/testing/selftests/net/timestamping.c b/tools/testing/selftests/net/timestamping.c new file mode 100644 index 000000000000..aca3491174a1 --- /dev/null +++ b/tools/testing/selftests/net/timestamping.c @@ -0,0 +1,509 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * This program demonstrates how the various time stamping features in + * the Linux kernel work. It emulates the behavior of a PTP + * implementation in stand-alone master mode by sending PTPv1 Sync + * multicasts once every second. It looks for similar packets, but + * beyond that doesn't actually implement PTP. + * + * Outgoing packets are time stamped with SO_TIMESTAMPING with or + * without hardware support. + * + * Incoming packets are time stamped with SO_TIMESTAMPING with or + * without hardware support, SIOCGSTAMP[NS] (per-socket time stamp) and + * SO_TIMESTAMP[NS]. + * + * Copyright (C) 2009 Intel Corporation. + * Author: Patrick Ohly <patrick.ohly@intel.com> + */ + +#include <stdio.h> +#include <stdlib.h> +#include <errno.h> +#include <string.h> + +#include <sys/time.h> +#include <sys/socket.h> +#include <sys/select.h> +#include <sys/ioctl.h> +#include <arpa/inet.h> +#include <net/if.h> + +#include <asm/types.h> +#include <linux/net_tstamp.h> +#include <linux/errqueue.h> +#include <linux/sockios.h> + +#ifndef SO_TIMESTAMPING +# define SO_TIMESTAMPING 37 +# define SCM_TIMESTAMPING SO_TIMESTAMPING +#endif + +#ifndef SO_TIMESTAMPNS +# define SO_TIMESTAMPNS 35 +#endif + +static void usage(const char *error) +{ + if (error) + printf("invalid option: %s\n", error); + printf("timestamping interface option*\n\n" + "Options:\n" + " IP_MULTICAST_LOOP - looping outgoing multicasts\n" + " SO_TIMESTAMP - normal software time stamping, ms resolution\n" + " SO_TIMESTAMPNS - more accurate software time stamping\n" + " SOF_TIMESTAMPING_TX_HARDWARE - hardware time stamping of outgoing packets\n" + " SOF_TIMESTAMPING_TX_SOFTWARE - software fallback for outgoing packets\n" + " SOF_TIMESTAMPING_RX_HARDWARE - hardware time stamping of incoming packets\n" + " SOF_TIMESTAMPING_RX_SOFTWARE - software fallback for incoming packets\n" + " SOF_TIMESTAMPING_SOFTWARE - request reporting of software time stamps\n" + " SOF_TIMESTAMPING_RAW_HARDWARE - request reporting of raw HW time stamps\n" + " SIOCGSTAMP - check last socket time stamp\n" + " SIOCGSTAMPNS - more accurate socket time stamp\n"); + exit(1); +} + +static void bail(const char *error) +{ + printf("%s: %s\n", error, strerror(errno)); + exit(1); +} + +static const unsigned char sync[] = { + 0x00, 0x01, 0x00, 0x01, + 0x5f, 0x44, 0x46, 0x4c, + 0x54, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x01, 0x01, + + /* fake uuid */ + 0x00, 0x01, + 0x02, 0x03, 0x04, 0x05, + + 0x00, 0x01, 0x00, 0x37, + 0x00, 0x00, 0x00, 0x08, + 0x00, 0x00, 0x00, 0x00, + 0x49, 0x05, 0xcd, 0x01, + 0x29, 0xb1, 0x8d, 0xb0, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x01, + + /* fake uuid */ + 0x00, 0x01, + 0x02, 0x03, 0x04, 0x05, + + 0x00, 0x00, 0x00, 0x37, + 0x00, 0x00, 0x00, 0x04, + 0x44, 0x46, 0x4c, 0x54, + 0x00, 0x00, 0xf0, 0x60, + 0x00, 0x01, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x01, + 0x00, 0x00, 0xf0, 0x60, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x04, + 0x44, 0x46, 0x4c, 0x54, + 0x00, 0x01, + + /* fake uuid */ + 0x00, 0x01, + 0x02, 0x03, 0x04, 0x05, + + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00, + 0x00, 0x00, 0x00, 0x00 +}; + +static void sendpacket(int sock, struct sockaddr *addr, socklen_t addr_len) +{ + struct timeval now; + int res; + + res = sendto(sock, sync, sizeof(sync), 0, + addr, addr_len); + gettimeofday(&now, 0); + if (res < 0) + printf("%s: %s\n", "send", strerror(errno)); + else + printf("%ld.%06ld: sent %d bytes\n", + (long)now.tv_sec, (long)now.tv_usec, + res); +} + +static void printpacket(struct msghdr *msg, int res, + char *data, + int sock, int recvmsg_flags, + int siocgstamp, int siocgstampns) +{ + struct sockaddr_in *from_addr = (struct sockaddr_in *)msg->msg_name; + struct cmsghdr *cmsg; + struct timeval tv; + struct timespec ts; + struct timeval now; + + gettimeofday(&now, 0); + + printf("%ld.%06ld: received %s data, %d bytes from %s, %zu bytes control messages\n", + (long)now.tv_sec, (long)now.tv_usec, + (recvmsg_flags & MSG_ERRQUEUE) ? "error" : "regular", + res, + inet_ntoa(from_addr->sin_addr), + msg->msg_controllen); + for (cmsg = CMSG_FIRSTHDR(msg); + cmsg; + cmsg = CMSG_NXTHDR(msg, cmsg)) { + printf(" cmsg len %zu: ", cmsg->cmsg_len); + switch (cmsg->cmsg_level) { + case SOL_SOCKET: + printf("SOL_SOCKET "); + switch (cmsg->cmsg_type) { + case SO_TIMESTAMP: { + struct timeval *stamp = + (struct timeval *)CMSG_DATA(cmsg); + printf("SO_TIMESTAMP %ld.%06ld", + (long)stamp->tv_sec, + (long)stamp->tv_usec); + break; + } + case SO_TIMESTAMPNS: { + struct timespec *stamp = + (struct timespec *)CMSG_DATA(cmsg); + printf("SO_TIMESTAMPNS %ld.%09ld", + (long)stamp->tv_sec, + (long)stamp->tv_nsec); + break; + } + case SO_TIMESTAMPING: { + struct timespec *stamp = + (struct timespec *)CMSG_DATA(cmsg); + printf("SO_TIMESTAMPING "); + printf("SW %ld.%09ld ", + (long)stamp->tv_sec, + (long)stamp->tv_nsec); + stamp++; + /* skip deprecated HW transformed */ + stamp++; + printf("HW raw %ld.%09ld", + (long)stamp->tv_sec, + (long)stamp->tv_nsec); + break; + } + default: + printf("type %d", cmsg->cmsg_type); + break; + } + break; + case IPPROTO_IP: + printf("IPPROTO_IP "); + switch (cmsg->cmsg_type) { + case IP_RECVERR: { + struct sock_extended_err *err = + (struct sock_extended_err *)CMSG_DATA(cmsg); + printf("IP_RECVERR ee_errno '%s' ee_origin %d => %s", + strerror(err->ee_errno), + err->ee_origin, +#ifdef SO_EE_ORIGIN_TIMESTAMPING + err->ee_origin == SO_EE_ORIGIN_TIMESTAMPING ? + "bounced packet" : "unexpected origin" +#else + "probably SO_EE_ORIGIN_TIMESTAMPING" +#endif + ); + if (res < sizeof(sync)) + printf(" => truncated data?!"); + else if (!memcmp(sync, data + res - sizeof(sync), + sizeof(sync))) + printf(" => GOT OUR DATA BACK (HURRAY!)"); + break; + } + case IP_PKTINFO: { + struct in_pktinfo *pktinfo = + (struct in_pktinfo *)CMSG_DATA(cmsg); + printf("IP_PKTINFO interface index %u", + pktinfo->ipi_ifindex); + break; + } + default: + printf("type %d", cmsg->cmsg_type); + break; + } + break; + default: + printf("level %d type %d", + cmsg->cmsg_level, + cmsg->cmsg_type); + break; + } + printf("\n"); + } + + if (siocgstamp) { + if (ioctl(sock, SIOCGSTAMP, &tv)) + printf(" %s: %s\n", "SIOCGSTAMP", strerror(errno)); + else + printf("SIOCGSTAMP %ld.%06ld\n", + (long)tv.tv_sec, + (long)tv.tv_usec); + } + if (siocgstampns) { + if (ioctl(sock, SIOCGSTAMPNS, &ts)) + printf(" %s: %s\n", "SIOCGSTAMPNS", strerror(errno)); + else + printf("SIOCGSTAMPNS %ld.%09ld\n", + (long)ts.tv_sec, + (long)ts.tv_nsec); + } +} + +static void recvpacket(int sock, int recvmsg_flags, + int siocgstamp, int siocgstampns) +{ + char data[256]; + struct msghdr msg; + struct iovec entry; + struct sockaddr_in from_addr; + struct { + struct cmsghdr cm; + char control[512]; + } control; + int res; + + memset(&msg, 0, sizeof(msg)); + msg.msg_iov = &entry; + msg.msg_iovlen = 1; + entry.iov_base = data; + entry.iov_len = sizeof(data); + msg.msg_name = (caddr_t)&from_addr; + msg.msg_namelen = sizeof(from_addr); + msg.msg_control = &control; + msg.msg_controllen = sizeof(control); + + res = recvmsg(sock, &msg, recvmsg_flags|MSG_DONTWAIT); + if (res < 0) { + printf("%s %s: %s\n", + "recvmsg", + (recvmsg_flags & MSG_ERRQUEUE) ? "error" : "regular", + strerror(errno)); + } else { + printpacket(&msg, res, data, + sock, recvmsg_flags, + siocgstamp, siocgstampns); + } +} + +int main(int argc, char **argv) +{ + int so_timestamping_flags = 0; + int so_timestamp = 0; + int so_timestampns = 0; + int siocgstamp = 0; + int siocgstampns = 0; + int ip_multicast_loop = 0; + char *interface; + int i; + int enabled = 1; + int sock; + struct ifreq device; + struct ifreq hwtstamp; + struct hwtstamp_config hwconfig, hwconfig_requested; + struct sockaddr_in addr; + struct ip_mreq imr; + struct in_addr iaddr; + int val; + socklen_t len; + struct timeval next; + + if (argc < 2) + usage(0); + interface = argv[1]; + + for (i = 2; i < argc; i++) { + if (!strcasecmp(argv[i], "SO_TIMESTAMP")) + so_timestamp = 1; + else if (!strcasecmp(argv[i], "SO_TIMESTAMPNS")) + so_timestampns = 1; + else if (!strcasecmp(argv[i], "SIOCGSTAMP")) + siocgstamp = 1; + else if (!strcasecmp(argv[i], "SIOCGSTAMPNS")) + siocgstampns = 1; + else if (!strcasecmp(argv[i], "IP_MULTICAST_LOOP")) + ip_multicast_loop = 1; + else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_HARDWARE")) + so_timestamping_flags |= SOF_TIMESTAMPING_TX_HARDWARE; + else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_TX_SOFTWARE")) + so_timestamping_flags |= SOF_TIMESTAMPING_TX_SOFTWARE; + else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RX_HARDWARE")) + so_timestamping_flags |= SOF_TIMESTAMPING_RX_HARDWARE; + else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RX_SOFTWARE")) + so_timestamping_flags |= SOF_TIMESTAMPING_RX_SOFTWARE; + else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_SOFTWARE")) + so_timestamping_flags |= SOF_TIMESTAMPING_SOFTWARE; + else if (!strcasecmp(argv[i], "SOF_TIMESTAMPING_RAW_HARDWARE")) + so_timestamping_flags |= SOF_TIMESTAMPING_RAW_HARDWARE; + else + usage(argv[i]); + } + + sock = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDP); + if (sock < 0) + bail("socket"); + + memset(&device, 0, sizeof(device)); + strncpy(device.ifr_name, interface, sizeof(device.ifr_name)); + if (ioctl(sock, SIOCGIFADDR, &device) < 0) + bail("getting interface IP address"); + + memset(&hwtstamp, 0, sizeof(hwtstamp)); + strncpy(hwtstamp.ifr_name, interface, sizeof(hwtstamp.ifr_name)); + hwtstamp.ifr_data = (void *)&hwconfig; + memset(&hwconfig, 0, sizeof(hwconfig)); + hwconfig.tx_type = + (so_timestamping_flags & SOF_TIMESTAMPING_TX_HARDWARE) ? + HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF; + hwconfig.rx_filter = + (so_timestamping_flags & SOF_TIMESTAMPING_RX_HARDWARE) ? + HWTSTAMP_FILTER_PTP_V1_L4_SYNC : HWTSTAMP_FILTER_NONE; + hwconfig_requested = hwconfig; + if (ioctl(sock, SIOCSHWTSTAMP, &hwtstamp) < 0) { + if ((errno == EINVAL || errno == ENOTSUP) && + hwconfig_requested.tx_type == HWTSTAMP_TX_OFF && + hwconfig_requested.rx_filter == HWTSTAMP_FILTER_NONE) + printf("SIOCSHWTSTAMP: disabling hardware time stamping not possible\n"); + else + bail("SIOCSHWTSTAMP"); + } + printf("SIOCSHWTSTAMP: tx_type %d requested, got %d; rx_filter %d requested, got %d\n", + hwconfig_requested.tx_type, hwconfig.tx_type, + hwconfig_requested.rx_filter, hwconfig.rx_filter); + + /* bind to PTP port */ + addr.sin_family = AF_INET; + addr.sin_addr.s_addr = htonl(INADDR_ANY); + addr.sin_port = htons(319 /* PTP event port */); + if (bind(sock, + (struct sockaddr *)&addr, + sizeof(struct sockaddr_in)) < 0) + bail("bind"); + + /* set multicast group for outgoing packets */ + inet_aton("224.0.1.130", &iaddr); /* alternate PTP domain 1 */ + addr.sin_addr = iaddr; + imr.imr_multiaddr.s_addr = iaddr.s_addr; + imr.imr_interface.s_addr = + ((struct sockaddr_in *)&device.ifr_addr)->sin_addr.s_addr; + if (setsockopt(sock, IPPROTO_IP, IP_MULTICAST_IF, + &imr.imr_interface.s_addr, sizeof(struct in_addr)) < 0) + bail("set multicast"); + + /* join multicast group, loop our own packet */ + if (setsockopt(sock, IPPROTO_IP, IP_ADD_MEMBERSHIP, + &imr, sizeof(struct ip_mreq)) < 0) + bail("join multicast group"); + + if (setsockopt(sock, IPPROTO_IP, IP_MULTICAST_LOOP, + &ip_multicast_loop, sizeof(enabled)) < 0) { + bail("loop multicast"); + } + + /* set socket options for time stamping */ + if (so_timestamp && + setsockopt(sock, SOL_SOCKET, SO_TIMESTAMP, + &enabled, sizeof(enabled)) < 0) + bail("setsockopt SO_TIMESTAMP"); + + if (so_timestampns && + setsockopt(sock, SOL_SOCKET, SO_TIMESTAMPNS, + &enabled, sizeof(enabled)) < 0) + bail("setsockopt SO_TIMESTAMPNS"); + + if (so_timestamping_flags && + setsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING, + &so_timestamping_flags, + sizeof(so_timestamping_flags)) < 0) + bail("setsockopt SO_TIMESTAMPING"); + + /* request IP_PKTINFO for debugging purposes */ + if (setsockopt(sock, SOL_IP, IP_PKTINFO, + &enabled, sizeof(enabled)) < 0) + printf("%s: %s\n", "setsockopt IP_PKTINFO", strerror(errno)); + + /* verify socket options */ + len = sizeof(val); + if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMP, &val, &len) < 0) + printf("%s: %s\n", "getsockopt SO_TIMESTAMP", strerror(errno)); + else + printf("SO_TIMESTAMP %d\n", val); + + if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMPNS, &val, &len) < 0) + printf("%s: %s\n", "getsockopt SO_TIMESTAMPNS", + strerror(errno)); + else + printf("SO_TIMESTAMPNS %d\n", val); + + if (getsockopt(sock, SOL_SOCKET, SO_TIMESTAMPING, &val, &len) < 0) { + printf("%s: %s\n", "getsockopt SO_TIMESTAMPING", + strerror(errno)); + } else { + printf("SO_TIMESTAMPING %d\n", val); + if (val != so_timestamping_flags) + printf(" not the expected value %d\n", + so_timestamping_flags); + } + + /* send packets forever every five seconds */ + gettimeofday(&next, 0); + next.tv_sec = (next.tv_sec + 1) / 5 * 5; + next.tv_usec = 0; + while (1) { + struct timeval now; + struct timeval delta; + long delta_us; + int res; + fd_set readfs, errorfs; + + gettimeofday(&now, 0); + delta_us = (long)(next.tv_sec - now.tv_sec) * 1000000 + + (long)(next.tv_usec - now.tv_usec); + if (delta_us > 0) { + /* continue waiting for timeout or data */ + delta.tv_sec = delta_us / 1000000; + delta.tv_usec = delta_us % 1000000; + + FD_ZERO(&readfs); + FD_ZERO(&errorfs); + FD_SET(sock, &readfs); + FD_SET(sock, &errorfs); + printf("%ld.%06ld: select %ldus\n", + (long)now.tv_sec, (long)now.tv_usec, + delta_us); + res = select(sock + 1, &readfs, 0, &errorfs, &delta); + gettimeofday(&now, 0); + printf("%ld.%06ld: select returned: %d, %s\n", + (long)now.tv_sec, (long)now.tv_usec, + res, + res < 0 ? strerror(errno) : "success"); + if (res > 0) { + if (FD_ISSET(sock, &readfs)) + printf("ready for reading\n"); + if (FD_ISSET(sock, &errorfs)) + printf("has error\n"); + recvpacket(sock, 0, + siocgstamp, + siocgstampns); + recvpacket(sock, MSG_ERRQUEUE, + siocgstamp, + siocgstampns); + } + } else { + /* write one packet */ + sendpacket(sock, + (struct sockaddr *)&addr, + sizeof(addr)); + next.tv_sec += 5; + continue; + } + } + + return 0; +} diff --git a/tools/testing/selftests/net/txtimestamp.c b/tools/testing/selftests/net/txtimestamp.c new file mode 100644 index 000000000000..011b0da6b033 --- /dev/null +++ b/tools/testing/selftests/net/txtimestamp.c @@ -0,0 +1,916 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Copyright 2014 Google Inc. + * Author: willemb@google.com (Willem de Bruijn) + * + * Test software tx timestamping, including + * + * - SCHED, SND and ACK timestamps + * - RAW, UDP and TCP + * - IPv4 and IPv6 + * - various packet sizes (to test GSO and TSO) + * + * Consult the command line arguments for help on running + * the various testcases. + * + * This test requires a dummy TCP server. + * A simple `nc6 [-u] -l -p $DESTPORT` will do + */ + +#define _GNU_SOURCE + +#include <arpa/inet.h> +#include <asm/types.h> +#include <error.h> +#include <errno.h> +#include <inttypes.h> +#include <linux/errqueue.h> +#include <linux/if_ether.h> +#include <linux/ipv6.h> +#include <linux/net_tstamp.h> +#include <netdb.h> +#include <net/if.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <netinet/udp.h> +#include <netinet/tcp.h> +#include <netpacket/packet.h> +#include <poll.h> +#include <stdarg.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/epoll.h> +#include <sys/ioctl.h> +#include <sys/select.h> +#include <sys/socket.h> +#include <sys/time.h> +#include <sys/types.h> +#include <time.h> +#include <unistd.h> + +#define NSEC_PER_USEC 1000L +#define USEC_PER_SEC 1000000L +#define NSEC_PER_SEC 1000000000LL + +/* command line parameters */ +static int cfg_proto = SOCK_STREAM; +static int cfg_ipproto = IPPROTO_TCP; +static int cfg_num_pkts = 4; +static int do_ipv4 = 1; +static int do_ipv6 = 1; +static int cfg_payload_len = 10; +static int cfg_poll_timeout = 100; +static int cfg_delay_snd; +static int cfg_delay_ack; +static bool cfg_show_payload; +static bool cfg_do_pktinfo; +static bool cfg_busy_poll; +static int cfg_sleep_usec = 50 * 1000; +static bool cfg_loop_nodata; +static bool cfg_use_cmsg; +static bool cfg_use_pf_packet; +static bool cfg_use_epoll; +static bool cfg_epollet; +static bool cfg_do_listen; +static uint16_t dest_port = 9000; +static bool cfg_print_nsec; + +static struct sockaddr_in daddr; +static struct sockaddr_in6 daddr6; +static struct timespec ts_usr; + +static int saved_tskey = -1; +static int saved_tskey_type = -1; + +struct timing_event { + int64_t min; + int64_t max; + int64_t total; + int count; +}; + +static struct timing_event usr_enq; +static struct timing_event usr_snd; +static struct timing_event usr_ack; + +static bool test_failed; + +static int64_t timespec_to_ns64(struct timespec *ts) +{ + return ts->tv_sec * NSEC_PER_SEC + ts->tv_nsec; +} + +static int64_t timespec_to_us64(struct timespec *ts) +{ + return ts->tv_sec * USEC_PER_SEC + ts->tv_nsec / NSEC_PER_USEC; +} + +static void init_timing_event(struct timing_event *te) +{ + te->min = INT64_MAX; + te->max = 0; + te->total = 0; + te->count = 0; +} + +static void add_timing_event(struct timing_event *te, + struct timespec *t_start, struct timespec *t_end) +{ + int64_t ts_delta = timespec_to_ns64(t_end) - timespec_to_ns64(t_start); + + te->count++; + if (ts_delta < te->min) + te->min = ts_delta; + if (ts_delta > te->max) + te->max = ts_delta; + te->total += ts_delta; +} + +static void validate_key(int tskey, int tstype) +{ + int stepsize; + + /* compare key for each subsequent request + * must only test for one type, the first one requested + */ + if (saved_tskey == -1) + saved_tskey_type = tstype; + else if (saved_tskey_type != tstype) + return; + + stepsize = cfg_proto == SOCK_STREAM ? cfg_payload_len : 1; + if (tskey != saved_tskey + stepsize) { + fprintf(stderr, "ERROR: key %d, expected %d\n", + tskey, saved_tskey + stepsize); + test_failed = true; + } + + saved_tskey = tskey; +} + +static void validate_timestamp(struct timespec *cur, int min_delay) +{ + int max_delay = min_delay + 500 /* processing time upper bound */; + int64_t cur64, start64; + + cur64 = timespec_to_us64(cur); + start64 = timespec_to_us64(&ts_usr); + + if (cur64 < start64 + min_delay || cur64 > start64 + max_delay) { + fprintf(stderr, "ERROR: %lu us expected between %d and %d\n", + cur64 - start64, min_delay, max_delay); + test_failed = true; + } +} + +static void __print_ts_delta_formatted(int64_t ts_delta) +{ + if (cfg_print_nsec) + fprintf(stderr, "%lu ns", ts_delta); + else + fprintf(stderr, "%lu us", ts_delta / NSEC_PER_USEC); +} + +static void __print_timestamp(const char *name, struct timespec *cur, + uint32_t key, int payload_len) +{ + int64_t ts_delta; + + if (!(cur->tv_sec | cur->tv_nsec)) + return; + + if (cfg_print_nsec) + fprintf(stderr, " %s: %lu s %lu ns (seq=%u, len=%u)", + name, cur->tv_sec, cur->tv_nsec, + key, payload_len); + else + fprintf(stderr, " %s: %lu s %lu us (seq=%u, len=%u)", + name, cur->tv_sec, cur->tv_nsec / NSEC_PER_USEC, + key, payload_len); + + if (cur != &ts_usr) { + ts_delta = timespec_to_ns64(cur) - timespec_to_ns64(&ts_usr); + fprintf(stderr, " (USR +"); + __print_ts_delta_formatted(ts_delta); + fprintf(stderr, ")"); + } + + fprintf(stderr, "\n"); +} + +static void print_timestamp_usr(void) +{ + if (clock_gettime(CLOCK_REALTIME, &ts_usr)) + error(1, errno, "clock_gettime"); + + __print_timestamp(" USR", &ts_usr, 0, 0); +} + +static void print_timestamp(struct scm_timestamping *tss, int tstype, + int tskey, int payload_len) +{ + const char *tsname; + + validate_key(tskey, tstype); + + switch (tstype) { + case SCM_TSTAMP_SCHED: + tsname = " ENQ"; + validate_timestamp(&tss->ts[0], 0); + add_timing_event(&usr_enq, &ts_usr, &tss->ts[0]); + break; + case SCM_TSTAMP_SND: + tsname = " SND"; + validate_timestamp(&tss->ts[0], cfg_delay_snd); + add_timing_event(&usr_snd, &ts_usr, &tss->ts[0]); + break; + case SCM_TSTAMP_ACK: + tsname = " ACK"; + validate_timestamp(&tss->ts[0], cfg_delay_ack); + add_timing_event(&usr_ack, &ts_usr, &tss->ts[0]); + break; + default: + error(1, 0, "unknown timestamp type: %u", + tstype); + } + __print_timestamp(tsname, &tss->ts[0], tskey, payload_len); +} + +static void print_timing_event(char *name, struct timing_event *te) +{ + if (!te->count) + return; + + fprintf(stderr, " %s: count=%d", name, te->count); + fprintf(stderr, ", avg="); + __print_ts_delta_formatted((int64_t)(te->total / te->count)); + fprintf(stderr, ", min="); + __print_ts_delta_formatted(te->min); + fprintf(stderr, ", max="); + __print_ts_delta_formatted(te->max); + fprintf(stderr, "\n"); +} + +/* TODO: convert to check_and_print payload once API is stable */ +static void print_payload(char *data, int len) +{ + int i; + + if (!len) + return; + + if (len > 70) + len = 70; + + fprintf(stderr, "payload: "); + for (i = 0; i < len; i++) + fprintf(stderr, "%02hhx ", data[i]); + fprintf(stderr, "\n"); +} + +static void print_pktinfo(int family, int ifindex, void *saddr, void *daddr) +{ + char sa[INET6_ADDRSTRLEN], da[INET6_ADDRSTRLEN]; + + fprintf(stderr, " pktinfo: ifindex=%u src=%s dst=%s\n", + ifindex, + saddr ? inet_ntop(family, saddr, sa, sizeof(sa)) : "unknown", + daddr ? inet_ntop(family, daddr, da, sizeof(da)) : "unknown"); +} + +static void __epoll(int epfd) +{ + struct epoll_event events; + int ret; + + memset(&events, 0, sizeof(events)); + ret = epoll_wait(epfd, &events, 1, cfg_poll_timeout); + if (ret != 1) + error(1, errno, "epoll_wait"); +} + +static void __poll(int fd) +{ + struct pollfd pollfd; + int ret; + + memset(&pollfd, 0, sizeof(pollfd)); + pollfd.fd = fd; + ret = poll(&pollfd, 1, cfg_poll_timeout); + if (ret != 1) + error(1, errno, "poll"); +} + +static void __recv_errmsg_cmsg(struct msghdr *msg, int payload_len) +{ + struct sock_extended_err *serr = NULL; + struct scm_timestamping *tss = NULL; + struct cmsghdr *cm; + int batch = 0; + + for (cm = CMSG_FIRSTHDR(msg); + cm && cm->cmsg_len; + cm = CMSG_NXTHDR(msg, cm)) { + if (cm->cmsg_level == SOL_SOCKET && + cm->cmsg_type == SCM_TIMESTAMPING) { + tss = (void *) CMSG_DATA(cm); + } else if ((cm->cmsg_level == SOL_IP && + cm->cmsg_type == IP_RECVERR) || + (cm->cmsg_level == SOL_IPV6 && + cm->cmsg_type == IPV6_RECVERR) || + (cm->cmsg_level == SOL_PACKET && + cm->cmsg_type == PACKET_TX_TIMESTAMP)) { + serr = (void *) CMSG_DATA(cm); + if (serr->ee_errno != ENOMSG || + serr->ee_origin != SO_EE_ORIGIN_TIMESTAMPING) { + fprintf(stderr, "unknown ip error %d %d\n", + serr->ee_errno, + serr->ee_origin); + serr = NULL; + } + } else if (cm->cmsg_level == SOL_IP && + cm->cmsg_type == IP_PKTINFO) { + struct in_pktinfo *info = (void *) CMSG_DATA(cm); + print_pktinfo(AF_INET, info->ipi_ifindex, + &info->ipi_spec_dst, &info->ipi_addr); + } else if (cm->cmsg_level == SOL_IPV6 && + cm->cmsg_type == IPV6_PKTINFO) { + struct in6_pktinfo *info6 = (void *) CMSG_DATA(cm); + print_pktinfo(AF_INET6, info6->ipi6_ifindex, + NULL, &info6->ipi6_addr); + } else + fprintf(stderr, "unknown cmsg %d,%d\n", + cm->cmsg_level, cm->cmsg_type); + + if (serr && tss) { + print_timestamp(tss, serr->ee_info, serr->ee_data, + payload_len); + serr = NULL; + tss = NULL; + batch++; + } + } + + if (batch > 1) + fprintf(stderr, "batched %d timestamps\n", batch); +} + +static int recv_errmsg(int fd) +{ + static char ctrl[1024 /* overprovision*/]; + static struct msghdr msg; + struct iovec entry; + static char *data; + int ret = 0; + + data = malloc(cfg_payload_len); + if (!data) + error(1, 0, "malloc"); + + memset(&msg, 0, sizeof(msg)); + memset(&entry, 0, sizeof(entry)); + memset(ctrl, 0, sizeof(ctrl)); + + entry.iov_base = data; + entry.iov_len = cfg_payload_len; + msg.msg_iov = &entry; + msg.msg_iovlen = 1; + msg.msg_name = NULL; + msg.msg_namelen = 0; + msg.msg_control = ctrl; + msg.msg_controllen = sizeof(ctrl); + + ret = recvmsg(fd, &msg, MSG_ERRQUEUE); + if (ret == -1 && errno != EAGAIN) + error(1, errno, "recvmsg"); + + if (ret >= 0) { + __recv_errmsg_cmsg(&msg, ret); + if (cfg_show_payload) + print_payload(data, cfg_payload_len); + } + + free(data); + return ret == -1; +} + +static uint16_t get_ip_csum(const uint16_t *start, int num_words, + unsigned long sum) +{ + int i; + + for (i = 0; i < num_words; i++) + sum += start[i]; + + while (sum >> 16) + sum = (sum & 0xFFFF) + (sum >> 16); + + return ~sum; +} + +static uint16_t get_udp_csum(const struct udphdr *udph, int alen) +{ + unsigned long pseudo_sum, csum_len; + const void *csum_start = udph; + + pseudo_sum = htons(IPPROTO_UDP); + pseudo_sum += udph->len; + + /* checksum ip(v6) addresses + udp header + payload */ + csum_start -= alen * 2; + csum_len = ntohs(udph->len) + alen * 2; + + return get_ip_csum(csum_start, csum_len >> 1, pseudo_sum); +} + +static int fill_header_ipv4(void *p) +{ + struct iphdr *iph = p; + + memset(iph, 0, sizeof(*iph)); + + iph->ihl = 5; + iph->version = 4; + iph->ttl = 2; + iph->saddr = daddr.sin_addr.s_addr; /* set for udp csum calc */ + iph->daddr = daddr.sin_addr.s_addr; + iph->protocol = IPPROTO_UDP; + + /* kernel writes saddr, csum, len */ + + return sizeof(*iph); +} + +static int fill_header_ipv6(void *p) +{ + struct ipv6hdr *ip6h = p; + + memset(ip6h, 0, sizeof(*ip6h)); + + ip6h->version = 6; + ip6h->payload_len = htons(sizeof(struct udphdr) + cfg_payload_len); + ip6h->nexthdr = IPPROTO_UDP; + ip6h->hop_limit = 64; + + ip6h->saddr = daddr6.sin6_addr; + ip6h->daddr = daddr6.sin6_addr; + + /* kernel does not write saddr in case of ipv6 */ + + return sizeof(*ip6h); +} + +static void fill_header_udp(void *p, bool is_ipv4) +{ + struct udphdr *udph = p; + + udph->source = ntohs(dest_port + 1); /* spoof */ + udph->dest = ntohs(dest_port); + udph->len = ntohs(sizeof(*udph) + cfg_payload_len); + udph->check = 0; + + udph->check = get_udp_csum(udph, is_ipv4 ? sizeof(struct in_addr) : + sizeof(struct in6_addr)); +} + +static void do_test(int family, unsigned int report_opt) +{ + char control[CMSG_SPACE(sizeof(uint32_t))]; + struct sockaddr_ll laddr; + unsigned int sock_opt; + struct cmsghdr *cmsg; + struct msghdr msg; + struct iovec iov; + char *buf; + int fd, i, val = 1, total_len, epfd = 0; + + init_timing_event(&usr_enq); + init_timing_event(&usr_snd); + init_timing_event(&usr_ack); + + total_len = cfg_payload_len; + if (cfg_use_pf_packet || cfg_proto == SOCK_RAW) { + total_len += sizeof(struct udphdr); + if (cfg_use_pf_packet || cfg_ipproto == IPPROTO_RAW) + if (family == PF_INET) + total_len += sizeof(struct iphdr); + else + total_len += sizeof(struct ipv6hdr); + + /* special case, only rawv6_sendmsg: + * pass proto in sin6_port if not connected + * also see ANK comment in net/ipv4/raw.c + */ + daddr6.sin6_port = htons(cfg_ipproto); + } + + buf = malloc(total_len); + if (!buf) + error(1, 0, "malloc"); + + fd = socket(cfg_use_pf_packet ? PF_PACKET : family, + cfg_proto, cfg_ipproto); + if (fd < 0) + error(1, errno, "socket"); + + if (cfg_use_epoll) { + struct epoll_event ev; + + memset(&ev, 0, sizeof(ev)); + ev.data.fd = fd; + if (cfg_epollet) + ev.events |= EPOLLET; + epfd = epoll_create(1); + if (epfd <= 0) + error(1, errno, "epoll_create"); + if (epoll_ctl(epfd, EPOLL_CTL_ADD, fd, &ev)) + error(1, errno, "epoll_ctl"); + } + + /* reset expected key on each new socket */ + saved_tskey = -1; + + if (cfg_proto == SOCK_STREAM) { + if (setsockopt(fd, IPPROTO_TCP, TCP_NODELAY, + (char*) &val, sizeof(val))) + error(1, 0, "setsockopt no nagle"); + + if (family == PF_INET) { + if (connect(fd, (void *) &daddr, sizeof(daddr))) + error(1, errno, "connect ipv4"); + } else { + if (connect(fd, (void *) &daddr6, sizeof(daddr6))) + error(1, errno, "connect ipv6"); + } + } + + if (cfg_do_pktinfo) { + if (family == AF_INET6) { + if (setsockopt(fd, SOL_IPV6, IPV6_RECVPKTINFO, + &val, sizeof(val))) + error(1, errno, "setsockopt pktinfo ipv6"); + } else { + if (setsockopt(fd, SOL_IP, IP_PKTINFO, + &val, sizeof(val))) + error(1, errno, "setsockopt pktinfo ipv4"); + } + } + + sock_opt = SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_OPT_CMSG | + SOF_TIMESTAMPING_OPT_ID; + + if (!cfg_use_cmsg) + sock_opt |= report_opt; + + if (cfg_loop_nodata) + sock_opt |= SOF_TIMESTAMPING_OPT_TSONLY; + + if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, + (char *) &sock_opt, sizeof(sock_opt))) + error(1, 0, "setsockopt timestamping"); + + for (i = 0; i < cfg_num_pkts; i++) { + memset(&msg, 0, sizeof(msg)); + memset(buf, 'a' + i, total_len); + + if (cfg_use_pf_packet || cfg_proto == SOCK_RAW) { + int off = 0; + + if (cfg_use_pf_packet || cfg_ipproto == IPPROTO_RAW) { + if (family == PF_INET) + off = fill_header_ipv4(buf); + else + off = fill_header_ipv6(buf); + } + + fill_header_udp(buf + off, family == PF_INET); + } + + print_timestamp_usr(); + + iov.iov_base = buf; + iov.iov_len = total_len; + + if (cfg_proto != SOCK_STREAM) { + if (cfg_use_pf_packet) { + memset(&laddr, 0, sizeof(laddr)); + + laddr.sll_family = AF_PACKET; + laddr.sll_ifindex = 1; + laddr.sll_protocol = htons(family == AF_INET ? ETH_P_IP : ETH_P_IPV6); + laddr.sll_halen = ETH_ALEN; + + msg.msg_name = (void *)&laddr; + msg.msg_namelen = sizeof(laddr); + } else if (family == PF_INET) { + msg.msg_name = (void *)&daddr; + msg.msg_namelen = sizeof(daddr); + } else { + msg.msg_name = (void *)&daddr6; + msg.msg_namelen = sizeof(daddr6); + } + } + + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + + if (cfg_use_cmsg) { + memset(control, 0, sizeof(control)); + + msg.msg_control = control; + msg.msg_controllen = sizeof(control); + + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SO_TIMESTAMPING; + cmsg->cmsg_len = CMSG_LEN(sizeof(uint32_t)); + + *((uint32_t *) CMSG_DATA(cmsg)) = report_opt; + } + + val = sendmsg(fd, &msg, 0); + if (val != total_len) + error(1, errno, "send"); + + /* wait for all errors to be queued, else ACKs arrive OOO */ + if (cfg_sleep_usec) + usleep(cfg_sleep_usec); + + if (!cfg_busy_poll) { + if (cfg_use_epoll) + __epoll(epfd); + else + __poll(fd); + } + + while (!recv_errmsg(fd)) {} + } + + print_timing_event("USR-ENQ", &usr_enq); + print_timing_event("USR-SND", &usr_snd); + print_timing_event("USR-ACK", &usr_ack); + + if (close(fd)) + error(1, errno, "close"); + + free(buf); + usleep(100 * NSEC_PER_USEC); +} + +static void __attribute__((noreturn)) usage(const char *filepath) +{ + fprintf(stderr, "\nUsage: %s [options] hostname\n" + "\nwhere options are:\n" + " -4: only IPv4\n" + " -6: only IPv6\n" + " -h: show this message\n" + " -b: busy poll to read from error queue\n" + " -c N: number of packets for each test\n" + " -C: use cmsg to set tstamp recording options\n" + " -e: use level-triggered epoll() instead of poll()\n" + " -E: use event-triggered epoll() instead of poll()\n" + " -F: poll()/epoll() waits forever for an event\n" + " -I: request PKTINFO\n" + " -l N: send N bytes at a time\n" + " -L listen on hostname and port\n" + " -n: set no-payload option\n" + " -N: print timestamps and durations in nsec (instead of usec)\n" + " -p N: connect to port N\n" + " -P: use PF_PACKET\n" + " -r: use raw\n" + " -R: use raw (IP_HDRINCL)\n" + " -S N: usec to sleep before reading error queue\n" + " -u: use udp\n" + " -v: validate SND delay (usec)\n" + " -V: validate ACK delay (usec)\n" + " -x: show payload (up to 70 bytes)\n", + filepath); + exit(1); +} + +static void parse_opt(int argc, char **argv) +{ + int proto_count = 0; + int c; + + while ((c = getopt(argc, argv, + "46bc:CeEFhIl:LnNp:PrRS:uv:V:x")) != -1) { + switch (c) { + case '4': + do_ipv6 = 0; + break; + case '6': + do_ipv4 = 0; + break; + case 'b': + cfg_busy_poll = true; + break; + case 'c': + cfg_num_pkts = strtoul(optarg, NULL, 10); + break; + case 'C': + cfg_use_cmsg = true; + break; + case 'e': + cfg_use_epoll = true; + break; + case 'E': + cfg_use_epoll = true; + cfg_epollet = true; + case 'F': + cfg_poll_timeout = -1; + break; + case 'I': + cfg_do_pktinfo = true; + break; + case 'l': + cfg_payload_len = strtoul(optarg, NULL, 10); + break; + case 'L': + cfg_do_listen = true; + break; + case 'n': + cfg_loop_nodata = true; + break; + case 'N': + cfg_print_nsec = true; + break; + case 'p': + dest_port = strtoul(optarg, NULL, 10); + break; + case 'P': + proto_count++; + cfg_use_pf_packet = true; + cfg_proto = SOCK_DGRAM; + cfg_ipproto = 0; + break; + case 'r': + proto_count++; + cfg_proto = SOCK_RAW; + cfg_ipproto = IPPROTO_UDP; + break; + case 'R': + proto_count++; + cfg_proto = SOCK_RAW; + cfg_ipproto = IPPROTO_RAW; + break; + case 'S': + cfg_sleep_usec = strtoul(optarg, NULL, 10); + break; + case 'u': + proto_count++; + cfg_proto = SOCK_DGRAM; + cfg_ipproto = IPPROTO_UDP; + break; + case 'v': + cfg_delay_snd = strtoul(optarg, NULL, 10); + break; + case 'V': + cfg_delay_ack = strtoul(optarg, NULL, 10); + break; + case 'x': + cfg_show_payload = true; + break; + case 'h': + default: + usage(argv[0]); + } + } + + if (!cfg_payload_len) + error(1, 0, "payload may not be nonzero"); + if (cfg_proto != SOCK_STREAM && cfg_payload_len > 1472) + error(1, 0, "udp packet might exceed expected MTU"); + if (!do_ipv4 && !do_ipv6) + error(1, 0, "pass -4 or -6, not both"); + if (proto_count > 1) + error(1, 0, "pass -P, -r, -R or -u, not multiple"); + if (cfg_do_pktinfo && cfg_use_pf_packet) + error(1, 0, "cannot ask for pktinfo over pf_packet"); + if (cfg_busy_poll && cfg_use_epoll) + error(1, 0, "pass epoll or busy_poll, not both"); + + if (optind != argc - 1) + error(1, 0, "missing required hostname argument"); +} + +static void resolve_hostname(const char *hostname) +{ + struct addrinfo hints = { .ai_family = do_ipv4 ? AF_INET : AF_INET6 }; + struct addrinfo *addrs, *cur; + int have_ipv4 = 0, have_ipv6 = 0; + +retry: + if (getaddrinfo(hostname, NULL, &hints, &addrs)) + error(1, errno, "getaddrinfo"); + + cur = addrs; + while (cur && !have_ipv4 && !have_ipv6) { + if (!have_ipv4 && cur->ai_family == AF_INET) { + memcpy(&daddr, cur->ai_addr, sizeof(daddr)); + daddr.sin_port = htons(dest_port); + have_ipv4 = 1; + } + else if (!have_ipv6 && cur->ai_family == AF_INET6) { + memcpy(&daddr6, cur->ai_addr, sizeof(daddr6)); + daddr6.sin6_port = htons(dest_port); + have_ipv6 = 1; + } + cur = cur->ai_next; + } + if (addrs) + freeaddrinfo(addrs); + + if (do_ipv6 && hints.ai_family != AF_INET6) { + hints.ai_family = AF_INET6; + goto retry; + } + + do_ipv4 &= have_ipv4; + do_ipv6 &= have_ipv6; +} + +static void do_listen(int family, void *addr, int alen) +{ + int fd, type; + + type = cfg_proto == SOCK_RAW ? SOCK_DGRAM : cfg_proto; + + fd = socket(family, type, 0); + if (fd == -1) + error(1, errno, "socket rx"); + + if (bind(fd, addr, alen)) + error(1, errno, "bind rx"); + + if (type == SOCK_STREAM && listen(fd, 10)) + error(1, errno, "listen rx"); + + /* leave fd open, will be closed on process exit. + * this enables connect() to succeed and avoids icmp replies + */ +} + +static void do_main(int family) +{ + fprintf(stderr, "family: %s %s\n", + family == PF_INET ? "INET" : "INET6", + cfg_use_pf_packet ? "(PF_PACKET)" : ""); + + fprintf(stderr, "test SND\n"); + do_test(family, SOF_TIMESTAMPING_TX_SOFTWARE); + + fprintf(stderr, "test ENQ\n"); + do_test(family, SOF_TIMESTAMPING_TX_SCHED); + + fprintf(stderr, "test ENQ + SND\n"); + do_test(family, SOF_TIMESTAMPING_TX_SCHED | + SOF_TIMESTAMPING_TX_SOFTWARE); + + if (cfg_proto == SOCK_STREAM) { + fprintf(stderr, "\ntest ACK\n"); + do_test(family, SOF_TIMESTAMPING_TX_ACK); + + fprintf(stderr, "\ntest SND + ACK\n"); + do_test(family, SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_TX_ACK); + + fprintf(stderr, "\ntest ENQ + SND + ACK\n"); + do_test(family, SOF_TIMESTAMPING_TX_SCHED | + SOF_TIMESTAMPING_TX_SOFTWARE | + SOF_TIMESTAMPING_TX_ACK); + } +} + +const char *sock_names[] = { NULL, "TCP", "UDP", "RAW" }; + +int main(int argc, char **argv) +{ + if (argc == 1) + usage(argv[0]); + + parse_opt(argc, argv); + resolve_hostname(argv[argc - 1]); + + fprintf(stderr, "protocol: %s\n", sock_names[cfg_proto]); + fprintf(stderr, "payload: %u\n", cfg_payload_len); + fprintf(stderr, "server port: %u\n", dest_port); + fprintf(stderr, "\n"); + + if (do_ipv4) { + if (cfg_do_listen) + do_listen(PF_INET, &daddr, sizeof(daddr)); + do_main(PF_INET); + } + + if (do_ipv6) { + if (cfg_do_listen) + do_listen(PF_INET6, &daddr6, sizeof(daddr6)); + do_main(PF_INET6); + } + + return test_failed; +} diff --git a/tools/testing/selftests/net/txtimestamp.sh b/tools/testing/selftests/net/txtimestamp.sh new file mode 100755 index 000000000000..eea6f5193693 --- /dev/null +++ b/tools/testing/selftests/net/txtimestamp.sh @@ -0,0 +1,82 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Send packets with transmit timestamps over loopback with netem +# Verify that timestamps correspond to netem delay + +set -e + +setup() { + # set 1ms delay on lo egress + tc qdisc add dev lo root netem delay 1ms + + # set 2ms delay on ifb0 egress + modprobe ifb + ip link add ifb_netem0 type ifb + ip link set dev ifb_netem0 up + tc qdisc add dev ifb_netem0 root netem delay 2ms + + # redirect lo ingress through ifb0 egress + tc qdisc add dev lo handle ffff: ingress + tc filter add dev lo parent ffff: \ + u32 match mark 0 0xffff \ + action mirred egress redirect dev ifb_netem0 +} + +run_test_v4v6() { + # SND will be delayed 1000us + # ACK will be delayed 6000us: 1 + 2 ms round-trip + local -r args="$@ -v 1000 -V 6000" + + ./txtimestamp ${args} -4 -L 127.0.0.1 + ./txtimestamp ${args} -6 -L ::1 +} + +run_test_tcpudpraw() { + local -r args=$@ + + run_test_v4v6 ${args} # tcp + run_test_v4v6 ${args} -u # udp + run_test_v4v6 ${args} -r # raw + run_test_v4v6 ${args} -R # raw (IPPROTO_RAW) + run_test_v4v6 ${args} -P # pf_packet +} + +run_test_all() { + setup + run_test_tcpudpraw # setsockopt + run_test_tcpudpraw -C # cmsg + run_test_tcpudpraw -n # timestamp w/o data + echo "OK. All tests passed" +} + +run_test_one() { + setup + ./txtimestamp $@ +} + +usage() { + echo "Usage: $0 [ -r | --run ] <txtimestamp args> | [ -h | --help ]" + echo " (no args) Run all tests" + echo " -r|--run Run an individual test with arguments" + echo " -h|--help Help" +} + +main() { + if [[ $# -eq 0 ]]; then + run_test_all + else + if [[ "$1" = "-r" || "$1" == "--run" ]]; then + shift + run_test_one $@ + else + usage + fi + fi +} + +if [[ "$(ip netns identify)" == "root" ]]; then + ./in_netns.sh $0 $@ +else + main $@ +fi |