1#!/bin/bash 2# SPDX-License-Identifier: GPL-2.0 3# 4# Copyright (c) 2019 Facebook 5# 6# This program is free software; you can redistribute it and/or 7# modify it under the terms of version 2 of the GNU General Public 8# License as published by the Free Software Foundation. 9 10Usage() { 11 echo "Script for testing HBM (Host Bandwidth Manager) framework." 12 echo "It creates a cgroup to use for testing and load a BPF program to limit" 13 echo "egress or ingress bandwidht. It then uses iperf3 or netperf to create" 14 echo "loads. The output is the goodput in Mbps (unless -D was used)." 15 echo "" 16 echo "USAGE: $name [out] [-b=<prog>|--bpf=<prog>] [-c=<cc>|--cc=<cc>] [-D]" 17 echo " [-d=<delay>|--delay=<delay>] [--debug] [-E]" 18 echo " [-f=<#flows>|--flows=<#flows>] [-h] [-i=<id>|--id=<id >]" 19 echo " [-l] [-N] [-p=<port>|--port=<port>] [-P]" 20 echo " [-q=<qdisc>] [-R] [-s=<server>|--server=<server]" 21 echo " [-S|--stats] -t=<time>|--time=<time>] [-w] [cubic|dctcp]" 22 echo " Where:" 23 echo " out egress (default)" 24 echo " -b or --bpf BPF program filename to load and attach." 25 echo " Default is hbm_out_kern.o for egress," 26 echo " -c or -cc TCP congestion control (cubic or dctcp)" 27 echo " --debug print BPF trace buffer" 28 echo " -d or --delay add a delay in ms using netem" 29 echo " -D In addition to the goodput in Mbps, it also outputs" 30 echo " other detailed information. This information is" 31 echo " test dependent (i.e. iperf3 or netperf)." 32 echo " -E enable ECN (not required for dctcp)" 33 echo " -f or --flows number of concurrent flows (default=1)" 34 echo " -i or --id cgroup id (an integer, default is 1)" 35 echo " -N use netperf instead of iperf3" 36 echo " -l do not limit flows using loopback" 37 echo " -h Help" 38 echo " -p or --port iperf3 port (default is 5201)" 39 echo " -P use an iperf3 instance for each flow" 40 echo " -q use the specified qdisc" 41 echo " -r or --rate rate in Mbps (default 1s 1Gbps)" 42 echo " -R Use TCP_RR for netperf. 1st flow has req" 43 echo " size of 10KB, rest of 1MB. Reply in all" 44 echo " cases is 1 byte." 45 echo " More detailed output for each flow can be found" 46 echo " in the files netperf.<cg>.<flow>, where <cg> is the" 47 echo " cgroup id as specified with the -i flag, and <flow>" 48 echo " is the flow id starting at 1 and increasing by 1 for" 49 echo " flow (as specified by -f)." 50 echo " -s or --server hostname of netperf server. Used to create netperf" 51 echo " test traffic between to hosts (default is within host)" 52 echo " netserver must be running on the host." 53 echo " -S or --stats whether to update hbm stats (default is yes)." 54 echo " -t or --time duration of iperf3 in seconds (default=5)" 55 echo " -w Work conserving flag. cgroup can increase its" 56 echo " bandwidth beyond the rate limit specified" 57 echo " while there is available bandwidth. Current" 58 echo " implementation assumes there is only one NIC" 59 echo " (eth0), but can be extended to support multiple" 60 echo " NICs." 61 echo " cubic or dctcp specify which TCP CC to use" 62 echo " " 63 exit 64} 65 66#set -x 67 68debug_flag=0 69args="$@" 70name="$0" 71netem=0 72cc=x 73dir="-o" 74dir_name="out" 75dur=5 76flows=1 77id=1 78prog="" 79port=5201 80rate=1000 81multi_iperf=0 82flow_cnt=1 83use_netperf=0 84rr=0 85ecn=0 86details=0 87server="" 88qdisc="" 89flags="" 90do_stats=0 91 92function start_hbm () { 93 rm -f hbm.out 94 echo "./hbm $dir -n $id -r $rate -t $dur $flags $dbg $prog" > hbm.out 95 echo " " >> hbm.out 96 ./hbm $dir -n $id -r $rate -t $dur $flags $dbg $prog >> hbm.out 2>&1 & 97 echo $! 98} 99 100processArgs () { 101 for i in $args ; do 102 case $i in 103 # Support for upcomming ingress rate limiting 104 #in) # support for upcoming ingress rate limiting 105 # dir="-i" 106 # dir_name="in" 107 # ;; 108 out) 109 dir="-o" 110 dir_name="out" 111 ;; 112 -b=*|--bpf=*) 113 prog="${i#*=}" 114 ;; 115 -c=*|--cc=*) 116 cc="${i#*=}" 117 ;; 118 --debug) 119 flags="$flags -d" 120 debug_flag=1 121 ;; 122 -d=*|--delay=*) 123 netem="${i#*=}" 124 ;; 125 -D) 126 details=1 127 ;; 128 -E) 129 ecn=1 130 ;; 131 # Support for upcomming fq Early Departure Time egress rate limiting 132 #--edt) 133 # prog="hbm_out_edt_kern.o" 134 # qdisc="fq" 135 # ;; 136 -f=*|--flows=*) 137 flows="${i#*=}" 138 ;; 139 -i=*|--id=*) 140 id="${i#*=}" 141 ;; 142 -l) 143 flags="$flags -l" 144 ;; 145 -N) 146 use_netperf=1 147 ;; 148 -p=*|--port=*) 149 port="${i#*=}" 150 ;; 151 -P) 152 multi_iperf=1 153 ;; 154 -q=*) 155 qdisc="${i#*=}" 156 ;; 157 -r=*|--rate=*) 158 rate="${i#*=}" 159 ;; 160 -R) 161 rr=1 162 ;; 163 -s=*|--server=*) 164 server="${i#*=}" 165 ;; 166 -S|--stats) 167 flags="$flags -s" 168 do_stats=1 169 ;; 170 -t=*|--time=*) 171 dur="${i#*=}" 172 ;; 173 -w) 174 flags="$flags -w" 175 ;; 176 cubic) 177 cc=cubic 178 ;; 179 dctcp) 180 cc=dctcp 181 ;; 182 *) 183 echo "Unknown arg:$i" 184 Usage 185 ;; 186 esac 187 done 188} 189 190processArgs 191 192if [ $debug_flag -eq 1 ] ; then 193 rm -f hbm_out.log 194fi 195 196hbm_pid=$(start_hbm) 197usleep 100000 198 199host=`hostname` 200cg_base_dir=/sys/fs/cgroup 201cg_dir="$cg_base_dir/cgroup-test-work-dir/hbm$id" 202 203echo $$ >> $cg_dir/cgroup.procs 204 205ulimit -l unlimited 206 207rm -f ss.out 208rm -f hbm.[0-9]*.$dir_name 209if [ $ecn -ne 0 ] ; then 210 sysctl -w -q -n net.ipv4.tcp_ecn=1 211fi 212 213if [ $use_netperf -eq 0 ] ; then 214 cur_cc=`sysctl -n net.ipv4.tcp_congestion_control` 215 if [ "$cc" != "x" ] ; then 216 sysctl -w -q -n net.ipv4.tcp_congestion_control=$cc 217 fi 218fi 219 220if [ "$netem" -ne "0" ] ; then 221 if [ "$qdisc" != "" ] ; then 222 echo "WARNING: Ignoring -q options because -d option used" 223 fi 224 tc qdisc del dev lo root > /dev/null 2>&1 225 tc qdisc add dev lo root netem delay $netem\ms > /dev/null 2>&1 226elif [ "$qdisc" != "" ] ; then 227 tc qdisc del dev lo root > /dev/null 2>&1 228 tc qdisc add dev lo root $qdisc > /dev/null 2>&1 229fi 230 231n=0 232m=$[$dur * 5] 233hn="::1" 234if [ $use_netperf -ne 0 ] ; then 235 if [ "$server" != "" ] ; then 236 hn=$server 237 fi 238fi 239 240( ping6 -i 0.2 -c $m $hn > ping.out 2>&1 ) & 241 242if [ $use_netperf -ne 0 ] ; then 243 begNetserverPid=`ps ax | grep netserver | grep --invert-match "grep" | \ 244 awk '{ print $1 }'` 245 if [ "$begNetserverPid" == "" ] ; then 246 if [ "$server" == "" ] ; then 247 ( ./netserver > /dev/null 2>&1) & 248 usleep 100000 249 fi 250 fi 251 flow_cnt=1 252 if [ "$server" == "" ] ; then 253 np_server=$host 254 else 255 np_server=$server 256 fi 257 if [ "$cc" == "x" ] ; then 258 np_cc="" 259 else 260 np_cc="-K $cc,$cc" 261 fi 262 replySize=1 263 while [ $flow_cnt -le $flows ] ; do 264 if [ $rr -ne 0 ] ; then 265 reqSize=1M 266 if [ $flow_cnt -eq 1 ] ; then 267 reqSize=10K 268 fi 269 if [ "$dir" == "-i" ] ; then 270 replySize=$reqSize 271 reqSize=1 272 fi 273 ( ./netperf -H $np_server -l $dur -f m -j -t TCP_RR -- -r $reqSize,$replySize $np_cc -k P50_lATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,REMOTE_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,LOCAL_RECV_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) & 274 else 275 if [ "$dir" == "-i" ] ; then 276 ( ./netperf -H $np_server -l $dur -f m -j -t TCP_RR -- -r 1,10M $np_cc -k P50_LATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,REMOTE_TRANSPORT_RETRANS,REMOTE_SEND_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) & 277 else 278 ( ./netperf -H $np_server -l $dur -f m -j -t TCP_STREAM -- $np_cc -k P50_lATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) & 279 fi 280 fi 281 flow_cnt=$[flow_cnt+1] 282 done 283 284# sleep for duration of test (plus some buffer) 285 n=$[dur+2] 286 sleep $n 287 288# force graceful termination of netperf 289 pids=`pgrep netperf` 290 for p in $pids ; do 291 kill -SIGALRM $p 292 done 293 294 flow_cnt=1 295 rate=0 296 if [ $details -ne 0 ] ; then 297 echo "" 298 echo "Details for HBM in cgroup $id" 299 if [ $do_stats -eq 1 ] ; then 300 if [ -e hbm.$id.$dir_name ] ; then 301 cat hbm.$id.$dir_name 302 fi 303 fi 304 fi 305 while [ $flow_cnt -le $flows ] ; do 306 if [ "$dir" == "-i" ] ; then 307 r=`cat netperf.$id.$flow_cnt | grep -o "REMOTE_SEND_THROUGHPUT=[0-9]*" | grep -o "[0-9]*"` 308 else 309 r=`cat netperf.$id.$flow_cnt | grep -o "LOCAL_SEND_THROUGHPUT=[0-9]*" | grep -o "[0-9]*"` 310 fi 311 echo "rate for flow $flow_cnt: $r" 312 rate=$[rate+r] 313 if [ $details -ne 0 ] ; then 314 echo "-----" 315 echo "Details for cgroup $id, flow $flow_cnt" 316 cat netperf.$id.$flow_cnt 317 fi 318 flow_cnt=$[flow_cnt+1] 319 done 320 if [ $details -ne 0 ] ; then 321 echo "" 322 delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"` 323 echo "PING AVG DELAY:$delay" 324 echo "AGGREGATE_GOODPUT:$rate" 325 else 326 echo $rate 327 fi 328elif [ $multi_iperf -eq 0 ] ; then 329 (iperf3 -s -p $port -1 > /dev/null 2>&1) & 330 usleep 100000 331 iperf3 -c $host -p $port -i 0 -P $flows -f m -t $dur > iperf.$id 332 rates=`grep receiver iperf.$id | grep -o "[0-9.]* Mbits" | grep -o "^[0-9]*"` 333 rate=`echo $rates | grep -o "[0-9]*$"` 334 335 if [ $details -ne 0 ] ; then 336 echo "" 337 echo "Details for HBM in cgroup $id" 338 if [ $do_stats -eq 1 ] ; then 339 if [ -e hbm.$id.$dir_name ] ; then 340 cat hbm.$id.$dir_name 341 fi 342 fi 343 delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"` 344 echo "PING AVG DELAY:$delay" 345 echo "AGGREGATE_GOODPUT:$rate" 346 else 347 echo $rate 348 fi 349else 350 flow_cnt=1 351 while [ $flow_cnt -le $flows ] ; do 352 (iperf3 -s -p $port -1 > /dev/null 2>&1) & 353 ( iperf3 -c $host -p $port -i 0 -P 1 -f m -t $dur | grep receiver | grep -o "[0-9.]* Mbits" | grep -o "^[0-9]*" | grep -o "[0-9]*$" > iperf3.$id.$flow_cnt ) & 354 port=$[port+1] 355 flow_cnt=$[flow_cnt+1] 356 done 357 n=$[dur+1] 358 sleep $n 359 flow_cnt=1 360 rate=0 361 if [ $details -ne 0 ] ; then 362 echo "" 363 echo "Details for HBM in cgroup $id" 364 if [ $do_stats -eq 1 ] ; then 365 if [ -e hbm.$id.$dir_name ] ; then 366 cat hbm.$id.$dir_name 367 fi 368 fi 369 fi 370 371 while [ $flow_cnt -le $flows ] ; do 372 r=`cat iperf3.$id.$flow_cnt` 373# echo "rate for flow $flow_cnt: $r" 374 if [ $details -ne 0 ] ; then 375 echo "Rate for cgroup $id, flow $flow_cnt LOCAL_SEND_THROUGHPUT=$r" 376 fi 377 rate=$[rate+r] 378 flow_cnt=$[flow_cnt+1] 379 done 380 if [ $details -ne 0 ] ; then 381 delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"` 382 echo "PING AVG DELAY:$delay" 383 echo "AGGREGATE_GOODPUT:$rate" 384 else 385 echo $rate 386 fi 387fi 388 389if [ $use_netperf -eq 0 ] ; then 390 sysctl -w -q -n net.ipv4.tcp_congestion_control=$cur_cc 391fi 392if [ $ecn -ne 0 ] ; then 393 sysctl -w -q -n net.ipv4.tcp_ecn=0 394fi 395if [ "$netem" -ne "0" ] ; then 396 tc qdisc del dev lo root > /dev/null 2>&1 397fi 398 399sleep 2 400 401hbmPid=`ps ax | grep "hbm " | grep --invert-match "grep" | awk '{ print $1 }'` 402if [ "$hbmPid" == "$hbm_pid" ] ; then 403 kill $hbm_pid 404fi 405 406sleep 1 407 408# Detach any BPF programs that may have lingered 409ttx=`bpftool cgroup tree | grep hbm` 410v=2 411for x in $ttx ; do 412 if [ "${x:0:36}" == "/sys/fs/cgroup/cgroup-test-work-dir/" ] ; then 413 cg=$x ; v=0 414 else 415 if [ $v -eq 0 ] ; then 416 id=$x ; v=1 417 else 418 if [ $v -eq 1 ] ; then 419 type=$x ; bpftool cgroup detach $cg $type id $id 420 v=0 421 fi 422 fi 423 fi 424done 425 426if [ $use_netperf -ne 0 ] ; then 427 if [ "$server" == "" ] ; then 428 if [ "$begNetserverPid" == "" ] ; then 429 netserverPid=`ps ax | grep netserver | grep --invert-match "grep" | awk '{ print $1 }'` 430 if [ "$netserverPid" != "" ] ; then 431 kill $netserverPid 432 fi 433 fi 434 fi 435fi 436exit 437