1#!/bin/bash 2# SPDX-License-Identifier: GPL-2.0 3# 4# Copyright (c) 2019 Facebook 5# 6# This program is free software; you can redistribute it and/or 7# modify it under the terms of version 2 of the GNU General Public 8# License as published by the Free Software Foundation. 9 10Usage() { 11 echo "Script for testing HBM (Host Bandwidth Manager) framework." 12 echo "It creates a cgroup to use for testing and load a BPF program to limit" 13 echo "egress or ingress bandwidht. It then uses iperf3 or netperf to create" 14 echo "loads. The output is the goodput in Mbps (unless -D was used)." 15 echo "" 16 echo "USAGE: $name [out] [-b=<prog>|--bpf=<prog>] [-c=<cc>|--cc=<cc>]" 17 echo " [-D] [-d=<delay>|--delay=<delay>] [--debug] [-E] [--edt]" 18 echo " [-f=<#flows>|--flows=<#flows>] [-h] [-i=<id>|--id=<id >]" 19 echo " [-l] [-N] [--no_cn] [-p=<port>|--port=<port>] [-P]" 20 echo " [-q=<qdisc>] [-R] [-s=<server>|--server=<server]" 21 echo " [-S|--stats] -t=<time>|--time=<time>] [-w] [cubic|dctcp]" 22 echo " Where:" 23 echo " out egress (default)" 24 echo " -b or --bpf BPF program filename to load and attach." 25 echo " Default is hbm_out_kern.o for egress," 26 echo " -c or -cc TCP congestion control (cubic or dctcp)" 27 echo " --debug print BPF trace buffer" 28 echo " -d or --delay add a delay in ms using netem" 29 echo " -D In addition to the goodput in Mbps, it also outputs" 30 echo " other detailed information. This information is" 31 echo " test dependent (i.e. iperf3 or netperf)." 32 echo " -E enable ECN (not required for dctcp)" 33 echo " --edt use fq's Earliest Departure Time (requires fq)" 34 echo " -f or --flows number of concurrent flows (default=1)" 35 echo " -i or --id cgroup id (an integer, default is 1)" 36 echo " -N use netperf instead of iperf3" 37 echo " --no_cn Do not return CN notifications" 38 echo " -l do not limit flows using loopback" 39 echo " -h Help" 40 echo " -p or --port iperf3 port (default is 5201)" 41 echo " -P use an iperf3 instance for each flow" 42 echo " -q use the specified qdisc" 43 echo " -r or --rate rate in Mbps (default 1s 1Gbps)" 44 echo " -R Use TCP_RR for netperf. 1st flow has req" 45 echo " size of 10KB, rest of 1MB. Reply in all" 46 echo " cases is 1 byte." 47 echo " More detailed output for each flow can be found" 48 echo " in the files netperf.<cg>.<flow>, where <cg> is the" 49 echo " cgroup id as specified with the -i flag, and <flow>" 50 echo " is the flow id starting at 1 and increasing by 1 for" 51 echo " flow (as specified by -f)." 52 echo " -s or --server hostname of netperf server. Used to create netperf" 53 echo " test traffic between to hosts (default is within host)" 54 echo " netserver must be running on the host." 55 echo " -S or --stats whether to update hbm stats (default is yes)." 56 echo " -t or --time duration of iperf3 in seconds (default=5)" 57 echo " -w Work conserving flag. cgroup can increase its" 58 echo " bandwidth beyond the rate limit specified" 59 echo " while there is available bandwidth. Current" 60 echo " implementation assumes there is only one NIC" 61 echo " (eth0), but can be extended to support multiple" 62 echo " NICs." 63 echo " cubic or dctcp specify which TCP CC to use" 64 echo " " 65 exit 66} 67 68#set -x 69 70debug_flag=0 71args="$@" 72name="$0" 73netem=0 74cc=x 75dir="-o" 76dir_name="out" 77dur=5 78flows=1 79id=1 80prog="" 81port=5201 82rate=1000 83multi_iperf=0 84flow_cnt=1 85use_netperf=0 86rr=0 87ecn=0 88details=0 89server="" 90qdisc="" 91flags="" 92do_stats=0 93 94function start_hbm () { 95 rm -f hbm.out 96 echo "./hbm $dir -n $id -r $rate -t $dur $flags $dbg $prog" > hbm.out 97 echo " " >> hbm.out 98 ./hbm $dir -n $id -r $rate -t $dur $flags $dbg $prog >> hbm.out 2>&1 & 99 echo $! 100} 101 102processArgs () { 103 for i in $args ; do 104 case $i in 105 # Support for upcomming ingress rate limiting 106 #in) # support for upcoming ingress rate limiting 107 # dir="-i" 108 # dir_name="in" 109 # ;; 110 out) 111 dir="-o" 112 dir_name="out" 113 ;; 114 -b=*|--bpf=*) 115 prog="${i#*=}" 116 ;; 117 -c=*|--cc=*) 118 cc="${i#*=}" 119 ;; 120 --no_cn) 121 flags="$flags --no_cn" 122 ;; 123 --debug) 124 flags="$flags -d" 125 debug_flag=1 126 ;; 127 -d=*|--delay=*) 128 netem="${i#*=}" 129 ;; 130 -D) 131 details=1 132 ;; 133 -E) 134 ecn=1 135 ;; 136 --edt) 137 flags="$flags --edt" 138 qdisc="fq" 139 ;; 140 -f=*|--flows=*) 141 flows="${i#*=}" 142 ;; 143 -i=*|--id=*) 144 id="${i#*=}" 145 ;; 146 -l) 147 flags="$flags -l" 148 ;; 149 -N) 150 use_netperf=1 151 ;; 152 -p=*|--port=*) 153 port="${i#*=}" 154 ;; 155 -P) 156 multi_iperf=1 157 ;; 158 -q=*) 159 qdisc="${i#*=}" 160 ;; 161 -r=*|--rate=*) 162 rate="${i#*=}" 163 ;; 164 -R) 165 rr=1 166 ;; 167 -s=*|--server=*) 168 server="${i#*=}" 169 ;; 170 -S|--stats) 171 flags="$flags -s" 172 do_stats=1 173 ;; 174 -t=*|--time=*) 175 dur="${i#*=}" 176 ;; 177 -w) 178 flags="$flags -w" 179 ;; 180 cubic) 181 cc=cubic 182 ;; 183 dctcp) 184 cc=dctcp 185 ;; 186 *) 187 echo "Unknown arg:$i" 188 Usage 189 ;; 190 esac 191 done 192} 193 194processArgs 195 196if [ $debug_flag -eq 1 ] ; then 197 rm -f hbm_out.log 198fi 199 200hbm_pid=$(start_hbm) 201usleep 100000 202 203host=`hostname` 204cg_base_dir=/sys/fs/cgroup 205cg_dir="$cg_base_dir/cgroup-test-work-dir/hbm$id" 206 207echo $$ >> $cg_dir/cgroup.procs 208 209ulimit -l unlimited 210 211rm -f ss.out 212rm -f hbm.[0-9]*.$dir_name 213if [ $ecn -ne 0 ] ; then 214 sysctl -w -q -n net.ipv4.tcp_ecn=1 215fi 216 217if [ $use_netperf -eq 0 ] ; then 218 cur_cc=`sysctl -n net.ipv4.tcp_congestion_control` 219 if [ "$cc" != "x" ] ; then 220 sysctl -w -q -n net.ipv4.tcp_congestion_control=$cc 221 fi 222fi 223 224if [ "$netem" -ne "0" ] ; then 225 if [ "$qdisc" != "" ] ; then 226 echo "WARNING: Ignoring -q options because -d option used" 227 fi 228 tc qdisc del dev lo root > /dev/null 2>&1 229 tc qdisc add dev lo root netem delay $netem\ms > /dev/null 2>&1 230elif [ "$qdisc" != "" ] ; then 231 tc qdisc del dev eth0 root > /dev/null 2>&1 232 tc qdisc add dev eth0 root $qdisc > /dev/null 2>&1 233fi 234 235n=0 236m=$[$dur * 5] 237hn="::1" 238if [ $use_netperf -ne 0 ] ; then 239 if [ "$server" != "" ] ; then 240 hn=$server 241 fi 242fi 243 244( ping6 -i 0.2 -c $m $hn > ping.out 2>&1 ) & 245 246if [ $use_netperf -ne 0 ] ; then 247 begNetserverPid=`ps ax | grep netserver | grep --invert-match "grep" | \ 248 awk '{ print $1 }'` 249 if [ "$begNetserverPid" == "" ] ; then 250 if [ "$server" == "" ] ; then 251 ( ./netserver > /dev/null 2>&1) & 252 usleep 100000 253 fi 254 fi 255 flow_cnt=1 256 if [ "$server" == "" ] ; then 257 np_server=$host 258 else 259 np_server=$server 260 fi 261 if [ "$cc" == "x" ] ; then 262 np_cc="" 263 else 264 np_cc="-K $cc,$cc" 265 fi 266 replySize=1 267 while [ $flow_cnt -le $flows ] ; do 268 if [ $rr -ne 0 ] ; then 269 reqSize=1M 270 if [ $flow_cnt -eq 1 ] ; then 271 reqSize=10K 272 fi 273 if [ "$dir" == "-i" ] ; then 274 replySize=$reqSize 275 reqSize=1 276 fi 277 ( ./netperf -H $np_server -l $dur -f m -j -t TCP_RR -- -r $reqSize,$replySize $np_cc -k P50_lATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,REMOTE_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,LOCAL_RECV_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) & 278 else 279 if [ "$dir" == "-i" ] ; then 280 ( ./netperf -H $np_server -l $dur -f m -j -t TCP_RR -- -r 1,10M $np_cc -k P50_LATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,REMOTE_TRANSPORT_RETRANS,REMOTE_SEND_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) & 281 else 282 ( ./netperf -H $np_server -l $dur -f m -j -t TCP_STREAM -- $np_cc -k P50_lATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) & 283 fi 284 fi 285 flow_cnt=$[flow_cnt+1] 286 done 287 288# sleep for duration of test (plus some buffer) 289 n=$[dur+2] 290 sleep $n 291 292# force graceful termination of netperf 293 pids=`pgrep netperf` 294 for p in $pids ; do 295 kill -SIGALRM $p 296 done 297 298 flow_cnt=1 299 rate=0 300 if [ $details -ne 0 ] ; then 301 echo "" 302 echo "Details for HBM in cgroup $id" 303 if [ $do_stats -eq 1 ] ; then 304 if [ -e hbm.$id.$dir_name ] ; then 305 cat hbm.$id.$dir_name 306 fi 307 fi 308 fi 309 while [ $flow_cnt -le $flows ] ; do 310 if [ "$dir" == "-i" ] ; then 311 r=`cat netperf.$id.$flow_cnt | grep -o "REMOTE_SEND_THROUGHPUT=[0-9]*" | grep -o "[0-9]*"` 312 else 313 r=`cat netperf.$id.$flow_cnt | grep -o "LOCAL_SEND_THROUGHPUT=[0-9]*" | grep -o "[0-9]*"` 314 fi 315 echo "rate for flow $flow_cnt: $r" 316 rate=$[rate+r] 317 if [ $details -ne 0 ] ; then 318 echo "-----" 319 echo "Details for cgroup $id, flow $flow_cnt" 320 cat netperf.$id.$flow_cnt 321 fi 322 flow_cnt=$[flow_cnt+1] 323 done 324 if [ $details -ne 0 ] ; then 325 echo "" 326 delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"` 327 echo "PING AVG DELAY:$delay" 328 echo "AGGREGATE_GOODPUT:$rate" 329 else 330 echo $rate 331 fi 332elif [ $multi_iperf -eq 0 ] ; then 333 (iperf3 -s -p $port -1 > /dev/null 2>&1) & 334 usleep 100000 335 iperf3 -c $host -p $port -i 0 -P $flows -f m -t $dur > iperf.$id 336 rates=`grep receiver iperf.$id | grep -o "[0-9.]* Mbits" | grep -o "^[0-9]*"` 337 rate=`echo $rates | grep -o "[0-9]*$"` 338 339 if [ $details -ne 0 ] ; then 340 echo "" 341 echo "Details for HBM in cgroup $id" 342 if [ $do_stats -eq 1 ] ; then 343 if [ -e hbm.$id.$dir_name ] ; then 344 cat hbm.$id.$dir_name 345 fi 346 fi 347 delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"` 348 echo "PING AVG DELAY:$delay" 349 echo "AGGREGATE_GOODPUT:$rate" 350 else 351 echo $rate 352 fi 353else 354 flow_cnt=1 355 while [ $flow_cnt -le $flows ] ; do 356 (iperf3 -s -p $port -1 > /dev/null 2>&1) & 357 ( iperf3 -c $host -p $port -i 0 -P 1 -f m -t $dur | grep receiver | grep -o "[0-9.]* Mbits" | grep -o "^[0-9]*" | grep -o "[0-9]*$" > iperf3.$id.$flow_cnt ) & 358 port=$[port+1] 359 flow_cnt=$[flow_cnt+1] 360 done 361 n=$[dur+1] 362 sleep $n 363 flow_cnt=1 364 rate=0 365 if [ $details -ne 0 ] ; then 366 echo "" 367 echo "Details for HBM in cgroup $id" 368 if [ $do_stats -eq 1 ] ; then 369 if [ -e hbm.$id.$dir_name ] ; then 370 cat hbm.$id.$dir_name 371 fi 372 fi 373 fi 374 375 while [ $flow_cnt -le $flows ] ; do 376 r=`cat iperf3.$id.$flow_cnt` 377# echo "rate for flow $flow_cnt: $r" 378 if [ $details -ne 0 ] ; then 379 echo "Rate for cgroup $id, flow $flow_cnt LOCAL_SEND_THROUGHPUT=$r" 380 fi 381 rate=$[rate+r] 382 flow_cnt=$[flow_cnt+1] 383 done 384 if [ $details -ne 0 ] ; then 385 delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"` 386 echo "PING AVG DELAY:$delay" 387 echo "AGGREGATE_GOODPUT:$rate" 388 else 389 echo $rate 390 fi 391fi 392 393if [ $use_netperf -eq 0 ] ; then 394 sysctl -w -q -n net.ipv4.tcp_congestion_control=$cur_cc 395fi 396if [ $ecn -ne 0 ] ; then 397 sysctl -w -q -n net.ipv4.tcp_ecn=0 398fi 399if [ "$netem" -ne "0" ] ; then 400 tc qdisc del dev lo root > /dev/null 2>&1 401fi 402if [ "$qdisc" != "" ] ; then 403 tc qdisc del dev eth0 root > /dev/null 2>&1 404fi 405sleep 2 406 407hbmPid=`ps ax | grep "hbm " | grep --invert-match "grep" | awk '{ print $1 }'` 408if [ "$hbmPid" == "$hbm_pid" ] ; then 409 kill $hbm_pid 410fi 411 412sleep 1 413 414# Detach any BPF programs that may have lingered 415ttx=`bpftool cgroup tree | grep hbm` 416v=2 417for x in $ttx ; do 418 if [ "${x:0:36}" == "/sys/fs/cgroup/cgroup-test-work-dir/" ] ; then 419 cg=$x ; v=0 420 else 421 if [ $v -eq 0 ] ; then 422 id=$x ; v=1 423 else 424 if [ $v -eq 1 ] ; then 425 type=$x ; bpftool cgroup detach $cg $type id $id 426 v=0 427 fi 428 fi 429 fi 430done 431 432if [ $use_netperf -ne 0 ] ; then 433 if [ "$server" == "" ] ; then 434 if [ "$begNetserverPid" == "" ] ; then 435 netserverPid=`ps ax | grep netserver | grep --invert-match "grep" | awk '{ print $1 }'` 436 if [ "$netserverPid" != "" ] ; then 437 kill $netserverPid 438 fi 439 fi 440 fi 441fi 442exit 443