1#!/bin/bash 2# SPDX-License-Identifier: GPL-2.0 3# 4# This test injects a 10-MB burst of traffic with VLAN tag and 802.1p priority 5# of 1. This stream is consistently prioritized as priority 1, is put to PG 6# buffer 1, and scheduled at TC 1. 7# 8# - the stream first ingresses through $swp1, where it is forwarded to $swp3 9# 10# - then it ingresses through $swp4. Here it is put to a lossless buffer and put 11# to a small pool ("PFC pool"). The traffic is forwarded to $swp2, which is 12# shaped, and thus the PFC pool eventually fills, therefore the headroom 13# fills, and $swp3 is paused. 14# 15# - since $swp3 now can't send traffic, the traffic ingressing $swp1 is kept at 16# a pool ("overflow pool"). The overflow pool needs to be large enough to 17# contain the whole burst. 18# 19# - eventually the PFC pool gets some traffic out, headroom therefore gets some 20# traffic to the pool, and $swp3 is unpaused again. This way the traffic is 21# gradually forwarded from the overflow pool, through the PFC pool, out of 22# $swp2, and eventually to $h2. 23# 24# - if PFC works, all lossless flow packets that ingress through $swp1 should 25# also be seen ingressing $h2. If it doesn't, there will be drops due to 26# discrepancy between the speeds of $swp1 and $h2. 27# 28# - it should all play out relatively quickly, so that SLL and HLL will not 29# cause drops. 30# 31# +-----------------------+ 32# | H1 | 33# | + $h1.111 | 34# | | 192.0.2.33/28 | 35# | | | 36# | + $h1 | 37# +---|-------------------+ +--------------------+ 38# | | | 39# +---|----------------------|--------------------|---------------------------+ 40# | + $swp1 $swp3 + + $swp4 | 41# | | iPOOL1 iPOOL0 | | iPOOL2 | 42# | | ePOOL4 ePOOL5 | | ePOOL4 | 43# | | 1Gbps | | 1Gbps | 44# | | PFC:enabled=1 | | PFC:enabled=1 | 45# | +-|----------------------|-+ +-|------------------------+ | 46# | | + $swp1.111 $swp3.111 + | | + $swp4.111 | | 47# | | | | | | 48# | | BR1 | | BR2 | | 49# | | | | | | 50# | | | | + $swp2.111 | | 51# | +--------------------------+ +---------|----------------+ | 52# | | | 53# | iPOOL0: 500KB dynamic | | 54# | iPOOL1: 10MB static | | 55# | iPOOL2: 1MB static + $swp2 | 56# | ePOOL4: 500KB dynamic | iPOOL0 | 57# | ePOOL5: 10MB static | ePOOL6 | 58# | ePOOL6: "infinite" static | 200Mbps shaper | 59# +-------------------------------------------------------|-------------------+ 60# | 61# +---|-------------------+ 62# | + $h2 H2 | 63# | | | 64# | + $h2.111 | 65# | 192.0.2.34/28 | 66# +-----------------------+ 67# 68# iPOOL0+ePOOL4 is a helper pool for control traffic etc. 69# iPOOL1+ePOOL5 are overflow pools. 70# iPOOL2+ePOOL6 are PFC pools. 71 72ALL_TESTS=" 73 ping_ipv4 74 test_qos_pfc 75" 76 77lib_dir=$(dirname $0)/../../../net/forwarding 78 79NUM_NETIFS=6 80source $lib_dir/lib.sh 81source $lib_dir/devlink_lib.sh 82source qos_lib.sh 83 84_1KB=1000 85_100KB=$((100 * _1KB)) 86_500KB=$((500 * _1KB)) 87_1MB=$((1000 * _1KB)) 88_10MB=$((10 * _1MB)) 89 90h1_create() 91{ 92 simple_if_init $h1 93 mtu_set $h1 10000 94 95 vlan_create $h1 111 v$h1 192.0.2.33/28 96} 97 98h1_destroy() 99{ 100 vlan_destroy $h1 111 101 102 mtu_restore $h1 103 simple_if_fini $h1 104} 105 106h2_create() 107{ 108 simple_if_init $h2 109 mtu_set $h2 10000 110 111 vlan_create $h2 111 v$h2 192.0.2.34/28 112} 113 114h2_destroy() 115{ 116 vlan_destroy $h2 111 117 118 mtu_restore $h2 119 simple_if_fini $h2 120} 121 122switch_create() 123{ 124 # pools 125 # ----- 126 127 devlink_pool_size_thtype_save 0 128 devlink_pool_size_thtype_save 4 129 devlink_pool_size_thtype_save 1 130 devlink_pool_size_thtype_save 5 131 devlink_pool_size_thtype_save 2 132 devlink_pool_size_thtype_save 6 133 134 devlink_port_pool_th_save $swp1 1 135 devlink_port_pool_th_save $swp2 6 136 devlink_port_pool_th_save $swp3 5 137 devlink_port_pool_th_save $swp4 2 138 139 devlink_tc_bind_pool_th_save $swp1 1 ingress 140 devlink_tc_bind_pool_th_save $swp2 1 egress 141 devlink_tc_bind_pool_th_save $swp3 1 egress 142 devlink_tc_bind_pool_th_save $swp4 1 ingress 143 144 # Control traffic pools. Just reduce the size. Keep them dynamic so that 145 # we don't need to change all the uninteresting quotas. 146 devlink_pool_size_thtype_set 0 dynamic $_500KB 147 devlink_pool_size_thtype_set 4 dynamic $_500KB 148 149 # Overflow pools. 150 devlink_pool_size_thtype_set 1 static $_10MB 151 devlink_pool_size_thtype_set 5 static $_10MB 152 153 # PFC pools. As per the writ, the size of egress PFC pool should be 154 # infinice, but actually it just needs to be large enough to not matter 155 # in practice, so reuse the 10MB limit. 156 devlink_pool_size_thtype_set 2 static $_1MB 157 devlink_pool_size_thtype_set 6 static $_10MB 158 159 # $swp1 160 # ----- 161 162 ip link set dev $swp1 up 163 mtu_set $swp1 10000 164 vlan_create $swp1 111 165 ip link set dev $swp1.111 type vlan ingress-qos-map 0:0 1:1 166 167 devlink_port_pool_th_set $swp1 1 $_10MB 168 devlink_tc_bind_pool_th_set $swp1 1 ingress 1 $_10MB 169 170 # Configure qdisc so that we can configure PG and therefore pool 171 # assignment. 172 tc qdisc replace dev $swp1 root handle 1: \ 173 ets bands 8 strict 8 priomap 7 6 174 __mlnx_qos -i $swp1 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null 175 176 # $swp2 177 # ----- 178 179 ip link set dev $swp2 up 180 mtu_set $swp2 10000 181 vlan_create $swp2 111 182 ip link set dev $swp2.111 type vlan egress-qos-map 0:0 1:1 183 184 devlink_port_pool_th_set $swp2 6 $_10MB 185 devlink_tc_bind_pool_th_set $swp2 1 egress 6 $_10MB 186 187 # prio 0->TC0 (band 7), 1->TC1 (band 6). TC1 is shaped. 188 tc qdisc replace dev $swp2 root handle 1: \ 189 ets bands 8 strict 8 priomap 7 6 190 tc qdisc replace dev $swp2 parent 1:7 handle 17: \ 191 tbf rate 200Mbit burst 131072 limit 1M 192 193 # $swp3 194 # ----- 195 196 ip link set dev $swp3 up 197 mtu_set $swp3 10000 198 vlan_create $swp3 111 199 ip link set dev $swp3.111 type vlan egress-qos-map 0:0 1:1 200 201 devlink_port_pool_th_set $swp3 5 $_10MB 202 devlink_tc_bind_pool_th_set $swp3 1 egress 5 $_10MB 203 204 # prio 0->TC0 (band 7), 1->TC1 (band 6) 205 tc qdisc replace dev $swp3 root handle 1: \ 206 ets bands 8 strict 8 priomap 7 6 207 208 # Need to enable PFC so that PAUSE takes effect. Therefore need to put 209 # the lossless prio into a buffer of its own. Don't bother with buffer 210 # sizes though, there is not going to be any pressure in the "backward" 211 # direction. 212 __mlnx_qos -i $swp3 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null 213 __mlnx_qos -i $swp3 --pfc=0,1,0,0,0,0,0,0 >/dev/null 214 215 # $swp4 216 # ----- 217 218 ip link set dev $swp4 up 219 mtu_set $swp4 10000 220 vlan_create $swp4 111 221 ip link set dev $swp4.111 type vlan ingress-qos-map 0:0 1:1 222 223 devlink_port_pool_th_set $swp4 2 $_1MB 224 devlink_tc_bind_pool_th_set $swp4 1 ingress 2 $_1MB 225 226 # Configure qdisc so that we can hand-tune headroom. 227 tc qdisc replace dev $swp4 root handle 1: \ 228 ets bands 8 strict 8 priomap 7 6 229 __mlnx_qos -i $swp4 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null 230 __mlnx_qos -i $swp4 --pfc=0,1,0,0,0,0,0,0 >/dev/null 231 # PG0 will get autoconfigured to Xoff, give PG1 arbitrarily 100K, which 232 # is (-2*MTU) about 80K of delay provision. 233 __mlnx_qos -i $swp3 --buffer_size=0,$_100KB,0,0,0,0,0,0 >/dev/null 234 235 # bridges 236 # ------- 237 238 ip link add name br1 type bridge vlan_filtering 0 239 ip link set dev $swp1.111 master br1 240 ip link set dev $swp3.111 master br1 241 ip link set dev br1 up 242 243 ip link add name br2 type bridge vlan_filtering 0 244 ip link set dev $swp2.111 master br2 245 ip link set dev $swp4.111 master br2 246 ip link set dev br2 up 247} 248 249switch_destroy() 250{ 251 # Do this first so that we can reset the limits to values that are only 252 # valid for the original static / dynamic setting. 253 devlink_pool_size_thtype_restore 6 254 devlink_pool_size_thtype_restore 5 255 devlink_pool_size_thtype_restore 4 256 devlink_pool_size_thtype_restore 2 257 devlink_pool_size_thtype_restore 1 258 devlink_pool_size_thtype_restore 0 259 260 # bridges 261 # ------- 262 263 ip link set dev br2 down 264 ip link set dev $swp4.111 nomaster 265 ip link set dev $swp2.111 nomaster 266 ip link del dev br2 267 268 ip link set dev br1 down 269 ip link set dev $swp3.111 nomaster 270 ip link set dev $swp1.111 nomaster 271 ip link del dev br1 272 273 # $swp4 274 # ----- 275 276 __mlnx_qos -i $swp4 --buffer_size=0,0,0,0,0,0,0,0 >/dev/null 277 __mlnx_qos -i $swp4 --pfc=0,0,0,0,0,0,0,0 >/dev/null 278 __mlnx_qos -i $swp4 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null 279 tc qdisc del dev $swp4 root 280 281 devlink_tc_bind_pool_th_restore $swp4 1 ingress 282 devlink_port_pool_th_restore $swp4 2 283 284 vlan_destroy $swp4 111 285 mtu_restore $swp4 286 ip link set dev $swp4 down 287 288 # $swp3 289 # ----- 290 291 __mlnx_qos -i $swp3 --pfc=0,0,0,0,0,0,0,0 >/dev/null 292 __mlnx_qos -i $swp3 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null 293 tc qdisc del dev $swp3 root 294 295 devlink_tc_bind_pool_th_restore $swp3 1 egress 296 devlink_port_pool_th_restore $swp3 5 297 298 vlan_destroy $swp3 111 299 mtu_restore $swp3 300 ip link set dev $swp3 down 301 302 # $swp2 303 # ----- 304 305 tc qdisc del dev $swp2 parent 1:7 306 tc qdisc del dev $swp2 root 307 308 devlink_tc_bind_pool_th_restore $swp2 1 egress 309 devlink_port_pool_th_restore $swp2 6 310 311 vlan_destroy $swp2 111 312 mtu_restore $swp2 313 ip link set dev $swp2 down 314 315 # $swp1 316 # ----- 317 318 __mlnx_qos -i $swp1 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null 319 tc qdisc del dev $swp1 root 320 321 devlink_tc_bind_pool_th_restore $swp1 1 ingress 322 devlink_port_pool_th_restore $swp1 1 323 324 vlan_destroy $swp1 111 325 mtu_restore $swp1 326 ip link set dev $swp1 down 327} 328 329setup_prepare() 330{ 331 h1=${NETIFS[p1]} 332 swp1=${NETIFS[p2]} 333 334 swp2=${NETIFS[p3]} 335 h2=${NETIFS[p4]} 336 337 swp3=${NETIFS[p5]} 338 swp4=${NETIFS[p6]} 339 340 h2mac=$(mac_get $h2) 341 342 vrf_prepare 343 344 h1_create 345 h2_create 346 switch_create 347} 348 349cleanup() 350{ 351 pre_cleanup 352 353 switch_destroy 354 h2_destroy 355 h1_destroy 356 357 vrf_cleanup 358} 359 360ping_ipv4() 361{ 362 ping_test $h1 192.0.2.34 363} 364 365test_qos_pfc() 366{ 367 RET=0 368 369 # 10M pool, each packet is 8K of payload + headers 370 local pkts=$((_10MB / 8050)) 371 local size=$((pkts * 8050)) 372 local in0=$(ethtool_stats_get $swp1 rx_octets_prio_1) 373 local out0=$(ethtool_stats_get $swp2 tx_octets_prio_1) 374 375 $MZ $h1 -p 8000 -Q 1:111 -A 192.0.2.33 -B 192.0.2.34 \ 376 -a own -b $h2mac -c $pkts -t udp -q 377 sleep 2 378 379 local in1=$(ethtool_stats_get $swp1 rx_octets_prio_1) 380 local out1=$(ethtool_stats_get $swp2 tx_octets_prio_1) 381 382 local din=$((in1 - in0)) 383 local dout=$((out1 - out0)) 384 385 local pct_in=$((din * 100 / size)) 386 387 ((pct_in > 95 && pct_in < 105)) 388 check_err $? "Relative ingress out of expected bounds, $pct_in% should be 100%" 389 390 ((dout == din)) 391 check_err $? "$((din - dout)) bytes out of $din ingressed got lost" 392 393 log_test "PFC" 394} 395 396trap cleanup EXIT 397 398bail_on_lldpad 399setup_prepare 400setup_wait 401tests_run 402 403exit $EXIT_STATUS 404