xref: /openbmc/linux/samples/bpf/do_hbm_test.sh (revision 9adc8050)
1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3#
4# Copyright (c) 2019 Facebook
5#
6# This program is free software; you can redistribute it and/or
7# modify it under the terms of version 2 of the GNU General Public
8# License as published by the Free Software Foundation.
9
10Usage() {
11  echo "Script for testing HBM (Host Bandwidth Manager) framework."
12  echo "It creates a cgroup to use for testing and load a BPF program to limit"
13  echo "egress or ingress bandwidht. It then uses iperf3 or netperf to create"
14  echo "loads. The output is the goodput in Mbps (unless -D was used)."
15  echo ""
16  echo "USAGE: $name [out] [-b=<prog>|--bpf=<prog>] [-c=<cc>|--cc=<cc>]"
17  echo "             [-D] [-d=<delay>|--delay=<delay>] [--debug] [-E] [--edt]"
18  echo "             [-f=<#flows>|--flows=<#flows>] [-h] [-i=<id>|--id=<id >]"
19  echo "             [-l] [-N] [--no_cn] [-p=<port>|--port=<port>] [-P]"
20  echo "             [-q=<qdisc>] [-R] [-s=<server>|--server=<server]"
21  echo "             [-S|--stats] -t=<time>|--time=<time>] [-w] [cubic|dctcp]"
22  echo "  Where:"
23  echo "    out               egress (default)"
24  echo "    -b or --bpf       BPF program filename to load and attach."
25  echo "                      Default is hbm_out_kern.o for egress,"
26  echo "    -c or -cc         TCP congestion control (cubic or dctcp)"
27  echo "    --debug           print BPF trace buffer"
28  echo "    -d or --delay     add a delay in ms using netem"
29  echo "    -D                In addition to the goodput in Mbps, it also outputs"
30  echo "                      other detailed information. This information is"
31  echo "                      test dependent (i.e. iperf3 or netperf)."
32  echo "    -E                enable ECN (not required for dctcp)"
33  echo "    --edt             use fq's Earliest Departure Time (requires fq)"
34  echo "    -f or --flows     number of concurrent flows (default=1)"
35  echo "    -i or --id        cgroup id (an integer, default is 1)"
36  echo "    -N                use netperf instead of iperf3"
37  echo "    --no_cn           Do not return CN notifications"
38  echo "    -l                do not limit flows using loopback"
39  echo "    -h                Help"
40  echo "    -p or --port      iperf3 port (default is 5201)"
41  echo "    -P                use an iperf3 instance for each flow"
42  echo "    -q                use the specified qdisc"
43  echo "    -r or --rate      rate in Mbps (default 1s 1Gbps)"
44  echo "    -R                Use TCP_RR for netperf. 1st flow has req"
45  echo "                      size of 10KB, rest of 1MB. Reply in all"
46  echo "                      cases is 1 byte."
47  echo "                      More detailed output for each flow can be found"
48  echo "                      in the files netperf.<cg>.<flow>, where <cg> is the"
49  echo "                      cgroup id as specified with the -i flag, and <flow>"
50  echo "                      is the flow id starting at 1 and increasing by 1 for"
51  echo "                      flow (as specified by -f)."
52  echo "    -s or --server    hostname of netperf server. Used to create netperf"
53  echo "                      test traffic between to hosts (default is within host)"
54  echo "                      netserver must be running on the host."
55  echo "    -S or --stats     whether to update hbm stats (default is yes)."
56  echo "    -t or --time      duration of iperf3 in seconds (default=5)"
57  echo "    -w                Work conserving flag. cgroup can increase its"
58  echo "                      bandwidth beyond the rate limit specified"
59  echo "                      while there is available bandwidth. Current"
60  echo "                      implementation assumes there is only one NIC"
61  echo "                      (eth0), but can be extended to support multiple"
62  echo "                       NICs."
63  echo "    cubic or dctcp    specify which TCP CC to use"
64  echo " "
65  exit
66}
67
68#set -x
69
70debug_flag=0
71args="$@"
72name="$0"
73netem=0
74cc=x
75dir="-o"
76dir_name="out"
77dur=5
78flows=1
79id=1
80prog=""
81port=5201
82rate=1000
83multi_iperf=0
84flow_cnt=1
85use_netperf=0
86rr=0
87ecn=0
88details=0
89server=""
90qdisc=""
91flags=""
92do_stats=0
93
94function start_hbm () {
95  rm -f hbm.out
96  echo "./hbm $dir -n $id -r $rate -t $dur $flags $dbg $prog" > hbm.out
97  echo " " >> hbm.out
98  ./hbm $dir -n $id -r $rate -t $dur $flags $dbg $prog >> hbm.out 2>&1  &
99  echo $!
100}
101
102processArgs () {
103  for i in $args ; do
104    case $i in
105    # Support for upcomming ingress rate limiting
106    #in)         # support for upcoming ingress rate limiting
107    #  dir="-i"
108    #  dir_name="in"
109    #  ;;
110    out)
111      dir="-o"
112      dir_name="out"
113      ;;
114    -b=*|--bpf=*)
115      prog="${i#*=}"
116      ;;
117    -c=*|--cc=*)
118      cc="${i#*=}"
119      ;;
120    --no_cn)
121      flags="$flags --no_cn"
122      ;;
123    --debug)
124      flags="$flags -d"
125      debug_flag=1
126      ;;
127    -d=*|--delay=*)
128      netem="${i#*=}"
129      ;;
130    -D)
131      details=1
132      ;;
133    -E)
134      ecn=1
135      ;;
136    --edt)
137      flags="$flags --edt"
138      qdisc="fq"
139     ;;
140    -f=*|--flows=*)
141      flows="${i#*=}"
142      ;;
143    -i=*|--id=*)
144      id="${i#*=}"
145      ;;
146    -l)
147      flags="$flags -l"
148      ;;
149    -N)
150      use_netperf=1
151      ;;
152    -p=*|--port=*)
153      port="${i#*=}"
154      ;;
155    -P)
156      multi_iperf=1
157      ;;
158    -q=*)
159      qdisc="${i#*=}"
160      ;;
161    -r=*|--rate=*)
162      rate="${i#*=}"
163      ;;
164    -R)
165      rr=1
166      ;;
167    -s=*|--server=*)
168      server="${i#*=}"
169      ;;
170    -S|--stats)
171      flags="$flags -s"
172      do_stats=1
173      ;;
174    -t=*|--time=*)
175      dur="${i#*=}"
176      ;;
177    -w)
178      flags="$flags -w"
179      ;;
180    cubic)
181      cc=cubic
182      ;;
183    dctcp)
184      cc=dctcp
185      ;;
186    *)
187      echo "Unknown arg:$i"
188      Usage
189      ;;
190    esac
191  done
192}
193
194processArgs
195
196if [ $debug_flag -eq 1 ] ; then
197  rm -f hbm_out.log
198fi
199
200hbm_pid=$(start_hbm)
201usleep 100000
202
203host=`hostname`
204cg_base_dir=/sys/fs/cgroup
205cg_dir="$cg_base_dir/cgroup-test-work-dir/hbm$id"
206
207echo $$ >> $cg_dir/cgroup.procs
208
209ulimit -l unlimited
210
211rm -f ss.out
212rm -f hbm.[0-9]*.$dir_name
213if [ $ecn -ne 0 ] ; then
214  sysctl -w -q -n net.ipv4.tcp_ecn=1
215fi
216
217if [ $use_netperf -eq 0 ] ; then
218  cur_cc=`sysctl -n net.ipv4.tcp_congestion_control`
219  if [ "$cc" != "x" ] ; then
220    sysctl -w -q -n net.ipv4.tcp_congestion_control=$cc
221  fi
222fi
223
224if [ "$netem" -ne "0" ] ; then
225  if [ "$qdisc" != "" ] ; then
226    echo "WARNING: Ignoring -q options because -d option used"
227  fi
228  tc qdisc del dev lo root > /dev/null 2>&1
229  tc qdisc add dev lo root netem delay $netem\ms > /dev/null 2>&1
230elif [ "$qdisc" != "" ] ; then
231  tc qdisc del dev eth0 root > /dev/null 2>&1
232  tc qdisc add dev eth0 root $qdisc > /dev/null 2>&1
233fi
234
235n=0
236m=$[$dur * 5]
237hn="::1"
238if [ $use_netperf -ne 0 ] ; then
239  if [ "$server" != "" ] ; then
240    hn=$server
241  fi
242fi
243
244( ping6 -i 0.2 -c $m $hn > ping.out 2>&1 ) &
245
246if [ $use_netperf -ne 0 ] ; then
247  begNetserverPid=`ps ax | grep netserver | grep --invert-match "grep" | \
248                   awk '{ print $1 }'`
249  if [ "$begNetserverPid" == "" ] ; then
250    if [ "$server" == "" ] ; then
251      ( ./netserver > /dev/null 2>&1) &
252      usleep 100000
253    fi
254  fi
255  flow_cnt=1
256  if [ "$server" == "" ] ; then
257    np_server=$host
258  else
259    np_server=$server
260  fi
261  if [ "$cc" == "x" ] ; then
262    np_cc=""
263  else
264    np_cc="-K $cc,$cc"
265  fi
266  replySize=1
267  while [ $flow_cnt -le $flows ] ; do
268    if [ $rr -ne 0 ] ; then
269      reqSize=1M
270      if [ $flow_cnt -eq 1 ] ; then
271        reqSize=10K
272      fi
273      if [ "$dir" == "-i" ] ; then
274        replySize=$reqSize
275        reqSize=1
276      fi
277      ( ./netperf -H $np_server -l $dur -f m -j -t TCP_RR  -- -r $reqSize,$replySize $np_cc -k P50_lATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,REMOTE_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,LOCAL_RECV_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) &
278    else
279      if [ "$dir" == "-i" ] ; then
280        ( ./netperf -H $np_server -l $dur -f m -j -t TCP_RR -- -r 1,10M $np_cc -k P50_LATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,REMOTE_TRANSPORT_RETRANS,REMOTE_SEND_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) &
281      else
282        ( ./netperf -H $np_server -l $dur -f m -j -t TCP_STREAM -- $np_cc -k P50_lATENCY,P90_LATENCY,LOCAL_TRANSPORT_RETRANS,LOCAL_SEND_THROUGHPUT,REQUEST_SIZE,RESPONSE_SIZE > netperf.$id.$flow_cnt ) &
283      fi
284    fi
285    flow_cnt=$[flow_cnt+1]
286  done
287
288# sleep for duration of test (plus some buffer)
289  n=$[dur+2]
290  sleep $n
291
292# force graceful termination of netperf
293  pids=`pgrep netperf`
294  for p in $pids ; do
295    kill -SIGALRM $p
296  done
297
298  flow_cnt=1
299  rate=0
300  if [ $details -ne 0 ] ; then
301    echo ""
302    echo "Details for HBM in cgroup $id"
303    if [ $do_stats -eq 1 ] ; then
304      if [ -e hbm.$id.$dir_name ] ; then
305        cat hbm.$id.$dir_name
306      fi
307    fi
308  fi
309  while [ $flow_cnt -le $flows ] ; do
310    if [ "$dir" == "-i" ] ; then
311      r=`cat netperf.$id.$flow_cnt | grep -o "REMOTE_SEND_THROUGHPUT=[0-9]*" | grep -o "[0-9]*"`
312    else
313      r=`cat netperf.$id.$flow_cnt | grep -o "LOCAL_SEND_THROUGHPUT=[0-9]*" | grep -o "[0-9]*"`
314    fi
315    echo "rate for flow $flow_cnt: $r"
316    rate=$[rate+r]
317    if [ $details -ne 0 ] ; then
318      echo "-----"
319      echo "Details for cgroup $id, flow $flow_cnt"
320      cat netperf.$id.$flow_cnt
321    fi
322    flow_cnt=$[flow_cnt+1]
323  done
324  if [ $details -ne 0 ] ; then
325    echo ""
326    delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"`
327    echo "PING AVG DELAY:$delay"
328    echo "AGGREGATE_GOODPUT:$rate"
329  else
330    echo $rate
331  fi
332elif [ $multi_iperf -eq 0 ] ; then
333  (iperf3 -s -p $port -1 > /dev/null 2>&1) &
334  usleep 100000
335  iperf3 -c $host -p $port -i 0 -P $flows -f m -t $dur > iperf.$id
336  rates=`grep receiver iperf.$id | grep -o "[0-9.]* Mbits" | grep -o "^[0-9]*"`
337  rate=`echo $rates | grep -o "[0-9]*$"`
338
339  if [ $details -ne 0 ] ; then
340    echo ""
341    echo "Details for HBM in cgroup $id"
342    if [ $do_stats -eq 1 ] ; then
343      if [ -e hbm.$id.$dir_name ] ; then
344        cat hbm.$id.$dir_name
345      fi
346    fi
347    delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"`
348    echo "PING AVG DELAY:$delay"
349    echo "AGGREGATE_GOODPUT:$rate"
350  else
351    echo $rate
352  fi
353else
354  flow_cnt=1
355  while [ $flow_cnt -le $flows ] ; do
356    (iperf3 -s -p $port -1 > /dev/null 2>&1) &
357    ( iperf3 -c $host -p $port -i 0 -P 1 -f m -t $dur | grep receiver | grep -o "[0-9.]* Mbits" | grep -o "^[0-9]*" | grep -o "[0-9]*$" > iperf3.$id.$flow_cnt ) &
358    port=$[port+1]
359    flow_cnt=$[flow_cnt+1]
360  done
361  n=$[dur+1]
362  sleep $n
363  flow_cnt=1
364  rate=0
365  if [ $details -ne 0 ] ; then
366    echo ""
367    echo "Details for HBM in cgroup $id"
368    if [ $do_stats -eq 1 ] ; then
369      if [ -e hbm.$id.$dir_name ] ; then
370        cat hbm.$id.$dir_name
371      fi
372    fi
373  fi
374
375  while [ $flow_cnt -le $flows ] ; do
376    r=`cat iperf3.$id.$flow_cnt`
377#    echo "rate for flow $flow_cnt: $r"
378  if [ $details -ne 0 ] ; then
379    echo "Rate for cgroup $id, flow $flow_cnt LOCAL_SEND_THROUGHPUT=$r"
380  fi
381    rate=$[rate+r]
382    flow_cnt=$[flow_cnt+1]
383  done
384  if [ $details -ne 0 ] ; then
385    delay=`grep "avg" ping.out | grep -o "= [0-9.]*/[0-9.]*" | grep -o "[0-9.]*$"`
386    echo "PING AVG DELAY:$delay"
387    echo "AGGREGATE_GOODPUT:$rate"
388  else
389    echo $rate
390  fi
391fi
392
393if [ $use_netperf -eq 0 ] ; then
394  sysctl -w -q -n net.ipv4.tcp_congestion_control=$cur_cc
395fi
396if [ $ecn -ne 0 ] ; then
397  sysctl -w -q -n net.ipv4.tcp_ecn=0
398fi
399if [ "$netem" -ne "0" ] ; then
400  tc qdisc del dev lo root > /dev/null 2>&1
401fi
402if [ "$qdisc" != "" ] ; then
403  tc qdisc del dev eth0 root > /dev/null 2>&1
404fi
405sleep 2
406
407hbmPid=`ps ax | grep "hbm " | grep --invert-match "grep" | awk '{ print $1 }'`
408if [ "$hbmPid" == "$hbm_pid" ] ; then
409  kill $hbm_pid
410fi
411
412sleep 1
413
414# Detach any BPF programs that may have lingered
415ttx=`bpftool cgroup tree | grep hbm`
416v=2
417for x in $ttx ; do
418    if [ "${x:0:36}" == "/sys/fs/cgroup/cgroup-test-work-dir/" ] ; then
419	cg=$x ; v=0
420    else
421	if [ $v -eq 0 ] ; then
422	    id=$x ; v=1
423	else
424	    if [ $v -eq 1 ] ; then
425		type=$x ; bpftool cgroup detach $cg $type id $id
426		v=0
427	    fi
428	fi
429    fi
430done
431
432if [ $use_netperf -ne 0 ] ; then
433  if [ "$server" == "" ] ; then
434    if [ "$begNetserverPid" == "" ] ; then
435      netserverPid=`ps ax | grep netserver | grep --invert-match "grep" | awk '{ print $1 }'`
436      if [ "$netserverPid" != "" ] ; then
437        kill $netserverPid
438      fi
439    fi
440  fi
441fi
442exit
443