1#!/bin/bash 2# SPDX-License-Identifier: GPL-2.0 3# 4# A test for switch behavior under MC overload. An issue in Spectrum chips 5# causes throughput of UC traffic to drop severely when a switch is under heavy 6# MC load. This issue can be overcome by putting the switch to MC-aware mode. 7# This test verifies that UC performance stays intact even as the switch is 8# under MC flood, and therefore that the MC-aware mode is enabled and correctly 9# configured. 10# 11# Because mlxsw throttles CPU port, the traffic can't actually reach userspace 12# at full speed. That makes it impossible to use iperf3 to simply measure the 13# throughput, because many packets (that reach $h3) don't get to the kernel at 14# all even in UDP mode (the situation is even worse in TCP mode, where one can't 15# hope to see more than a couple Mbps). 16# 17# So instead we send traffic with mausezahn and use RX ethtool counters at $h3. 18# Multicast traffic is untagged, unicast traffic is tagged with PCP 1. Therefore 19# each gets a different priority and we can use per-prio ethtool counters to 20# measure the throughput. In order to avoid prioritizing unicast traffic, prio 21# qdisc is installed on $swp3 and maps all priorities to the same band #7 (and 22# thus TC 0). 23# 24# Mausezahn can't actually saturate the links unless it's using large frames. 25# Thus we set MTU to 10K on all involved interfaces. Then both unicast and 26# multicast traffic uses 8K frames. 27# 28# +-----------------------+ +----------------------------------+ 29# | H1 | | H2 | 30# | | | unicast --> + $h2.111 | 31# | | | traffic | 192.0.2.129/28 | 32# | multicast | | | e-qos-map 0:1 | 33# | traffic | | | | 34# | $h1 + <----- | | + $h2 | 35# +-----|-----------------+ +--------------|-------------------+ 36# | | 37# +-----|-------------------------------------------------|-------------------+ 38# | + $swp1 + $swp2 | 39# | | >1Gbps | >1Gbps | 40# | +---|----------------+ +----------|----------------+ | 41# | | + $swp1.1 | | + $swp2.111 | | 42# | | BR1 | SW | BR111 | | 43# | | + $swp3.1 | | + $swp3.111 | | 44# | +---|----------------+ +----------|----------------+ | 45# | \_________________________________________________/ | 46# | | | 47# | + $swp3 | 48# | | 1Gbps bottleneck | 49# | | prio qdisc: {0..7} -> 7 | 50# +------------------------------------|--------------------------------------+ 51# | 52# +--|-----------------+ 53# | + $h3 H3 | 54# | | | 55# | + $h3.111 | 56# | 192.0.2.130/28 | 57# +--------------------+ 58 59ALL_TESTS=" 60 ping_ipv4 61 test_mc_aware 62" 63 64lib_dir=$(dirname $0)/../../../net/forwarding 65 66NUM_NETIFS=6 67source $lib_dir/lib.sh 68 69h1_create() 70{ 71 simple_if_init $h1 72 mtu_set $h1 10000 73} 74 75h1_destroy() 76{ 77 mtu_restore $h1 78 simple_if_fini $h1 79} 80 81h2_create() 82{ 83 simple_if_init $h2 84 mtu_set $h2 10000 85 86 vlan_create $h2 111 v$h2 192.0.2.129/28 87 ip link set dev $h2.111 type vlan egress-qos-map 0:1 88} 89 90h2_destroy() 91{ 92 vlan_destroy $h2 111 93 94 mtu_restore $h2 95 simple_if_fini $h2 96} 97 98h3_create() 99{ 100 simple_if_init $h3 101 mtu_set $h3 10000 102 103 vlan_create $h3 111 v$h3 192.0.2.130/28 104} 105 106h3_destroy() 107{ 108 vlan_destroy $h3 111 109 110 mtu_restore $h3 111 simple_if_fini $h3 112} 113 114switch_create() 115{ 116 ip link set dev $swp1 up 117 mtu_set $swp1 10000 118 119 ip link set dev $swp2 up 120 mtu_set $swp2 10000 121 122 ip link set dev $swp3 up 123 mtu_set $swp3 10000 124 125 vlan_create $swp2 111 126 vlan_create $swp3 111 127 128 ethtool -s $swp3 speed 1000 autoneg off 129 tc qdisc replace dev $swp3 root handle 3: \ 130 prio bands 8 priomap 7 7 7 7 7 7 7 7 131 132 ip link add name br1 type bridge vlan_filtering 0 133 ip link set dev br1 up 134 ip link set dev $swp1 master br1 135 ip link set dev $swp3 master br1 136 137 ip link add name br111 type bridge vlan_filtering 0 138 ip link set dev br111 up 139 ip link set dev $swp2.111 master br111 140 ip link set dev $swp3.111 master br111 141} 142 143switch_destroy() 144{ 145 ip link del dev br111 146 ip link del dev br1 147 148 tc qdisc del dev $swp3 root handle 3: 149 ethtool -s $swp3 autoneg on 150 151 vlan_destroy $swp3 111 152 vlan_destroy $swp2 111 153 154 mtu_restore $swp3 155 ip link set dev $swp3 down 156 157 mtu_restore $swp2 158 ip link set dev $swp2 down 159 160 mtu_restore $swp1 161 ip link set dev $swp1 down 162} 163 164setup_prepare() 165{ 166 h1=${NETIFS[p1]} 167 swp1=${NETIFS[p2]} 168 169 swp2=${NETIFS[p3]} 170 h2=${NETIFS[p4]} 171 172 swp3=${NETIFS[p5]} 173 h3=${NETIFS[p6]} 174 175 h3mac=$(mac_get $h3) 176 177 vrf_prepare 178 179 h1_create 180 h2_create 181 h3_create 182 switch_create 183} 184 185cleanup() 186{ 187 pre_cleanup 188 189 switch_destroy 190 h3_destroy 191 h2_destroy 192 h1_destroy 193 194 vrf_cleanup 195} 196 197ping_ipv4() 198{ 199 ping_test $h2 192.0.2.130 200} 201 202humanize() 203{ 204 local speed=$1; shift 205 206 for unit in bps Kbps Mbps Gbps; do 207 if (($(echo "$speed < 1024" | bc))); then 208 break 209 fi 210 211 speed=$(echo "scale=1; $speed / 1024" | bc) 212 done 213 214 echo "$speed${unit}" 215} 216 217rate() 218{ 219 local t0=$1; shift 220 local t1=$1; shift 221 local interval=$1; shift 222 223 echo $((8 * (t1 - t0) / interval)) 224} 225 226check_rate() 227{ 228 local rate=$1; shift 229 local min=$1; shift 230 local what=$1; shift 231 232 if ((rate > min)); then 233 return 0 234 fi 235 236 echo "$what $(humanize $ir) < $(humanize $min_ingress)" > /dev/stderr 237 return 1 238} 239 240measure_uc_rate() 241{ 242 local what=$1; shift 243 244 local interval=10 245 local i 246 local ret=0 247 248 # Dips in performance might cause momentary ingress rate to drop below 249 # 1Gbps. That wouldn't saturate egress and MC would thus get through, 250 # seemingly winning bandwidth on account of UC. Demand at least 2Gbps 251 # average ingress rate to somewhat mitigate this. 252 local min_ingress=2147483648 253 254 mausezahn $h2.111 -p 8000 -A 192.0.2.129 -B 192.0.2.130 -c 0 \ 255 -a own -b $h3mac -t udp -q & 256 sleep 1 257 258 for i in {5..0}; do 259 local t0=$(ethtool_stats_get $h3 rx_octets_prio_1) 260 local u0=$(ethtool_stats_get $swp2 rx_octets_prio_1) 261 sleep $interval 262 local t1=$(ethtool_stats_get $h3 rx_octets_prio_1) 263 local u1=$(ethtool_stats_get $swp2 rx_octets_prio_1) 264 265 local ir=$(rate $u0 $u1 $interval) 266 local er=$(rate $t0 $t1 $interval) 267 268 if check_rate $ir $min_ingress "$what ingress rate"; then 269 break 270 fi 271 272 # Fail the test if we can't get the throughput. 273 if ((i == 0)); then 274 ret=1 275 fi 276 done 277 278 # Suppress noise from killing mausezahn. 279 { kill %% && wait; } 2>/dev/null 280 281 echo $ir $er 282 exit $ret 283} 284 285test_mc_aware() 286{ 287 RET=0 288 289 local -a uc_rate 290 uc_rate=($(measure_uc_rate "UC-only")) 291 check_err $? "Could not get high enough UC-only ingress rate" 292 local ucth1=${uc_rate[1]} 293 294 mausezahn $h1 -p 8000 -c 0 -a own -b bc -t udp -q & 295 296 local d0=$(date +%s) 297 local t0=$(ethtool_stats_get $h3 rx_octets_prio_0) 298 local u0=$(ethtool_stats_get $swp1 rx_octets_prio_0) 299 300 local -a uc_rate_2 301 uc_rate_2=($(measure_uc_rate "UC+MC")) 302 check_err $? "Could not get high enough UC+MC ingress rate" 303 local ucth2=${uc_rate_2[1]} 304 305 local d1=$(date +%s) 306 local t1=$(ethtool_stats_get $h3 rx_octets_prio_0) 307 local u1=$(ethtool_stats_get $swp1 rx_octets_prio_0) 308 309 local deg=$(bc <<< " 310 scale=2 311 ret = 100 * ($ucth1 - $ucth2) / $ucth1 312 if (ret > 0) { ret } else { 0 } 313 ") 314 check_err $(bc <<< "$deg > 10") 315 316 local interval=$((d1 - d0)) 317 local mc_ir=$(rate $u0 $u1 $interval) 318 local mc_er=$(rate $t0 $t1 $interval) 319 320 # Suppress noise from killing mausezahn. 321 { kill %% && wait; } 2>/dev/null 322 323 log_test "UC performace under MC overload" 324 325 echo "UC-only throughput $(humanize $ucth1)" 326 echo "UC+MC throughput $(humanize $ucth2)" 327 echo "Degradation $deg %" 328 echo 329 echo "Full report:" 330 echo " UC only:" 331 echo " ingress UC throughput $(humanize ${uc_rate[0]})" 332 echo " egress UC throughput $(humanize ${uc_rate[1]})" 333 echo " UC+MC:" 334 echo " ingress UC throughput $(humanize ${uc_rate_2[0]})" 335 echo " egress UC throughput $(humanize ${uc_rate_2[1]})" 336 echo " ingress MC throughput $(humanize $mc_ir)" 337 echo " egress MC throughput $(humanize $mc_er)" 338} 339 340trap cleanup EXIT 341 342setup_prepare 343setup_wait 344 345tests_run 346 347exit $EXIT_STATUS 348