#!/bin/bash # SPDX-License-Identifier: GPL-2.0 # # Copyright (c) 2019 Facebook # # This program is free software; you can redistribute it and/or # modify it under the terms of version 2 of the GNU General Public # License as published by the Free Software Foundation. Usage() { echo "Script for testing HBM (Host Bandwidth Manager) framework." echo "It creates a cgroup to use for testing and load a BPF program to limit" echo "egress or ingress bandwidht. It then uses iperf3 or netperf to create" echo "loads. The output is the goodput in Mbps (unless -D was used)." echo "" echo "USAGE: $name [out] [-b=<prog>|--bpf=<prog>] [-c=<cc>|--cc=<cc>]" echo " [-D] [-d=<delay>|--delay=<delay>] [--debug] [-E] [--edt]" echo " [-f=<#flows>|--flows=<#flows>] [-h] [-i=<id>|--id=<id >]" echo " [-l] [-N] [--no_cn] [-p=<port>|--port=<port>] [-P]" echo " [-q=<qdisc>] [-R] [-s=<server>|--server=<server]" echo " [-S|--stats] -t=<time>|--time=<time>] [-w] [cubic|dctcp]" echo " Where:" echo " out egress (default)" echo " -b or --bpf BPF program filename to load and attach." echo " Default is hbm_out_kern.o for egress," echo " -c or -cc TCP congestion control (cubic or dctcp)" echo " --debug print BPF trace buffer" echo " -d or --delay add a delay in ms using netem" echo " -D In addition to the goodput in Mbps, it also outputs" echo " other detailed information. This information is" echo " test dependent (i.e. iperf3 or netperf)." echo " -E enable ECN (not required for dctcp)" echo " --edt use fq's Earliest Departure Time (requires fq)" echo " -f or --flows number of concurrent flows (default=1)" echo " -i or --id cgroup id (an integer, default is 1)" echo " -N use netperf instead of iperf3" echo " --no_cn Do not return CN notifications" echo " -l do not limit flows using loopback" echo " -h Help" echo " -p or --port iperf3 port (default is 5201)" echo " -P use an iperf3 instance for each flow" echo " -q use the specified qdisc" echo " -r or --rate rate in Mbps (default 1s 1Gbps)" echo " -R Use TCP_RR for netperf. 1st flow has req" echo " size of 10KB, rest of 1MB. Reply in all" echo " cases is 1 byte." echo " More detailed output for each flow can be found" echo " in the files netperf.<cg>.<flow>, where <cg> is the" echo " cgroup id as specified with the -i flag, and <flow>" echo " is the flow id starting at 1 and increasing by 1 for" echo " flow (as specified by -f)." echo " -s or --server hostname of netperf server. Used to create netperf" echo " test traffic between to hosts (default is within host)" echo " netserver must be running on the host." echo " -S or --stats whether to update hbm stats (default is yes)." echo " -t or --time duration of iperf3 in seconds (default=5)" echo " -w Work conserving flag. cgroup can increase its" echo " bandwidth beyond the rate limit specified" echo " while there is available bandwidth. Current" echo " implementation assumes there is only one NIC" echo " (eth0), but can be extended to support multiple" echo " NICs." echo " cubic or dctcp specify which TCP CC to use" echo " " exit } #set -x debug_flag=0 args="$@" name="$0" netem=0 cc=x dir="-o" dir_name="out" dur=5 flows=1 id=1 prog="" port=5201 rate=1000 multi_iperf=0 flow_cnt=1 use_netperf=0 rr=0 ecn=0 details=0 server="" qdisc="" flags="" do_stats=0 BPFFS=/sys/fs/bpf function config_bpffs () { if mount | grep $BPFFS > /dev/null; then echo "bpffs already mounted" else echo "bpffs not mounted. Mounting..." mount -t bpf none $BPFFS fi } function start_hbm () { rm -f hbm.out echo "./hbm $dir -n $id -r $rate -t $dur $flags $dbg $prog" > hbm.out echo " " >> hbm.out ./hbm $dir -n $id -r $rate -t $dur $flags $dbg $prog >> hbm.out 2>&1 & echo $! } processArgs () { for i in $args ; do case $i in # Support for upcomming ingress rate limiting #in) # support for upcoming ingress rate limiting # dir="-i" # dir_name="in" # ;; out) dir="-o" dir_name="out" ;; -b=*|--bpf=*) prog="${i#*=}" ;; -c=*|--cc=*) cc="${i#*=}" ;; --no_cn) flags="$flags --no_cn" ;; --debug) flags="$flags -d" debug_flag=1 ;; -d=*|--delay=*) netem="${i#*=}" ;; -D) details=1 ;; -E) ecn=1 ;; --edt) flags="$flags --edt" qdisc="fq" ;; -f=*|--flows=*) flows="${i#*=}" ;; -i=*|--id=*) id="${i#*=}" ;; -l) flags="$flags -l" ;; -N) use_netperf=1 ;; -p=*|--port=*) port="${i#*=}" ;; -P) multi_iperf=1 ;; -q=*) qdisc="${i#*=}" ;; -r=*|--rate=*) rate="${i#*=}" ;; -R) rr=1 ;; -s=*|--server=*) server="${i#*=}" ;; -S|--stats) flags="$flags -s" do_stats=1 ;; -t=*|--time=*) dur="${i#*=}" ;; -w) flags="$flags -w" ;; cubic) cc=cubic ;; dctcp) cc=dctcp ;; *) echo "Unknown arg:$i" Usage ;; esac done } processArgs config_bpffs if [ $debug_flag -eq 1 ] ; then rm -f hbm_out.log fi hbm_pid=$(start_hbm) usleep 100000 host=`hostname` cg_base_dir=/sys/fs/cgroup/unified cg_dir="$cg_base_dir/cgroup-test-work-dir/hbm$id" echo $$ >> $cg_dir/cgroup.procs ulimit -l unlimited rm -f ss.out rm -f hbm.[0-9]*.$dir_name if [ $ecn -ne 0 ] ; then sysctl -w -q -n net.ipv4.tcp_ecn=1 fi if [ $use_netperf -eq 0 ] ; then cur_cc=`sysctl -n net.ipv4.tcp_congestion_control` if [ "$cc" != "x" ] ; then sysctl -w -q -n net.ipv4.tcp_congestion_control=$cc fi fi if [ "$netem" -ne "0" ] ; then if [ "$qdisc" != "" ] ; then echo "WARNING: Ignoring -q options because -d option used" fi tc qdisc del dev lo root > /dev/null 2>&1 tc qdisc add dev lo root netem delay $netem\ms > /dev/null 2>&1 elif [ "$qdisc" != "" ] ; then tc qdisc del dev eth0 root > /dev/null 2>&1 tc qdisc add dev eth0 root $qdisc > /dev/null 2>&1 fi n=0 m=$[$dur * 5] hn="::1" if [ $use_netperf -ne 0 ] ; then if [ "$server" != "" ] ; then hn=$server fi fi ( ping6 -i 0.2 -c $m $hn > ping.out 2>&1 ) & if [ $use_netperf -ne 0 ] ; then begNetserverPid=`ps ax | grep netserver | grep --invert-match "grep" | \ awk '{ print $1 }'` if [ "$begNetserverPid" == "" ] ; then if [ "$server" == "" ] ; then ( ./netserver > /dev/null 2>&1) & usleep 100000 fi fi flow_cnt=1 if [ "$server" == "" ] ; then np_server=$host else np_server=$server fi if [ "$cc" == "x" ] ; then np_cc="" else np_cc="-K $cc,$cc" fi replySize=1 while [ $flow_cnt -le $flows ] ; do if [ $rr -ne 0 ] ; then reqSize=1M if [ $flow_cnt -eq 1 ] ; then reqSize=10K fi if [ "$dir" == "-i" ] ; then replySize=$reqSize reqSize=1 fi ( ./netperf -H $np_server -l $dur -f m -j -t TCP_RR -- -r $reqSize,$replySize $np_cc -k P50_lATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,REMOTE_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,LOCAL_RECV_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) & else if [ "$dir" == "-i" ] ; then ( ./netperf -H $np_server -l $dur -f m -j -t TCP_RR -- -r 1,10M $np_cc -k P50_LATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,REMOTE_TRANSPORT_RETRANS,REMOTE_SEND_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) & else ( ./netperf -H $np_server -l $dur -f m -j -t TCP_STREAM -- $np_cc -k P50_lATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) & fi fi flow_cnt=$[flow_cnt+1] done # sleep for duration of test (plus some buffer) n=$[dur+2] sleep $n # force graceful termination of netperf pids=`pgrep netperf` for p in $pids ; do kill -SIGALRM $p done flow_cnt=1 rate=0 if [ $details -ne 0 ] ; then echo "" echo "Details for HBM in cgroup $id" if [ $do_stats -eq 1 ] ; then if [ -e hbm.$id.$dir_name ] ; then cat hbm.$id.$dir_name fi fi fi while [ $flow_cnt -le $flows ] ; do if [ "$dir" == "-i" ] ; then r=`cat netperf.$id.$flow_cnt | grep -o "REMOTE_SEND_THROUGHPUT=[0-9]*" | grep -o "[0-9]*"` else r=`cat netperf.$id.$flow_cnt | grep -o "LOCAL_SEND_THROUGHPUT=[0-9]*" | grep -o "[0-9]*"` fi echo "rate for flow $flow_cnt: $r" rate=$[rate+r] if [ $details -ne 0 ] ; then echo "-----" echo "Details for cgroup $id, flow $flow_cnt" cat netperf.$id.$flow_cnt fi flow_cnt=$[flow_cnt+1] done if [ $details -ne 0 ] ; then echo "" delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"` echo "PING AVG DELAY:$delay" echo "AGGREGATE_GOODPUT:$rate" else echo $rate fi elif [ $multi_iperf -eq 0 ] ; then (iperf3 -s -p $port -1 > /dev/null 2>&1) & usleep 100000 iperf3 -c $host -p $port -i 0 -P $flows -f m -t $dur > iperf.$id rates=`grep receiver iperf.$id | grep -o "[0-9.]* Mbits" | grep -o "^[0-9]*"` rate=`echo $rates | grep -o "[0-9]*$"` if [ $details -ne 0 ] ; then echo "" echo "Details for HBM in cgroup $id" if [ $do_stats -eq 1 ] ; then if [ -e hbm.$id.$dir_name ] ; then cat hbm.$id.$dir_name fi fi delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"` echo "PING AVG DELAY:$delay" echo "AGGREGATE_GOODPUT:$rate" else echo $rate fi else flow_cnt=1 while [ $flow_cnt -le $flows ] ; do (iperf3 -s -p $port -1 > /dev/null 2>&1) & ( iperf3 -c $host -p $port -i 0 -P 1 -f m -t $dur | grep receiver | grep -o "[0-9.]* Mbits" | grep -o "^[0-9]*" | grep -o "[0-9]*$" > iperf3.$id.$flow_cnt ) & port=$[port+1] flow_cnt=$[flow_cnt+1] done n=$[dur+1] sleep $n flow_cnt=1 rate=0 if [ $details -ne 0 ] ; then echo "" echo "Details for HBM in cgroup $id" if [ $do_stats -eq 1 ] ; then if [ -e hbm.$id.$dir_name ] ; then cat hbm.$id.$dir_name fi fi fi while [ $flow_cnt -le $flows ] ; do r=`cat iperf3.$id.$flow_cnt` # echo "rate for flow $flow_cnt: $r" if [ $details -ne 0 ] ; then echo "Rate for cgroup $id, flow $flow_cnt LOCAL_SEND_THROUGHPUT=$r" fi rate=$[rate+r] flow_cnt=$[flow_cnt+1] done if [ $details -ne 0 ] ; then delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"` echo "PING AVG DELAY:$delay" echo "AGGREGATE_GOODPUT:$rate" else echo $rate fi fi if [ $use_netperf -eq 0 ] ; then sysctl -w -q -n net.ipv4.tcp_congestion_control=$cur_cc fi if [ $ecn -ne 0 ] ; then sysctl -w -q -n net.ipv4.tcp_ecn=0 fi if [ "$netem" -ne "0" ] ; then tc qdisc del dev lo root > /dev/null 2>&1 fi if [ "$qdisc" != "" ] ; then tc qdisc del dev eth0 root > /dev/null 2>&1 fi sleep 2 hbmPid=`ps ax | grep "hbm " | grep --invert-match "grep" | awk '{ print $1 }'` if [ "$hbmPid" == "$hbm_pid" ] ; then kill $hbm_pid fi sleep 1 # Detach any pinned BPF programs that may have lingered rm -rf $BPFFS/hbm* if [ $use_netperf -ne 0 ] ; then if [ "$server" == "" ] ; then if [ "$begNetserverPid" == "" ] ; then netserverPid=`ps ax | grep netserver | grep --invert-match "grep" | awk '{ print $1 }'` if [ "$netserverPid" != "" ] ; then kill $netserverPid fi fi fi fi exit