1#!/bin/bash 2# SPDX-License-Identifier: GPL-2.0 3# 4# This test injects a 10-MB burst of traffic with VLAN tag and 802.1p priority 5# of 1. This stream is consistently prioritized as priority 1, is put to PG 6# buffer 1, and scheduled at TC 1. 7# 8# - the stream first ingresses through $swp1, where it is forwarded to $swp3 9# 10# - then it ingresses through $swp4. Here it is put to a lossless buffer and put 11# to a small pool ("PFC pool"). The traffic is forwarded to $swp2, which is 12# shaped, and thus the PFC pool eventually fills, therefore the headroom 13# fills, and $swp3 is paused. 14# 15# - since $swp3 now can't send traffic, the traffic ingressing $swp1 is kept at 16# a pool ("overflow pool"). The overflow pool needs to be large enough to 17# contain the whole burst. 18# 19# - eventually the PFC pool gets some traffic out, headroom therefore gets some 20# traffic to the pool, and $swp3 is unpaused again. This way the traffic is 21# gradually forwarded from the overflow pool, through the PFC pool, out of 22# $swp2, and eventually to $h2. 23# 24# - if PFC works, all lossless flow packets that ingress through $swp1 should 25# also be seen ingressing $h2. If it doesn't, there will be drops due to 26# discrepancy between the speeds of $swp1 and $h2. 27# 28# - it should all play out relatively quickly, so that SLL and HLL will not 29# cause drops. 30# 31# +-----------------------+ 32# | H1 | 33# | + $h1.111 | 34# | | 192.0.2.33/28 | 35# | | | 36# | + $h1 | 37# +---|-------------------+ +--------------------+ 38# | | | 39# +---|----------------------|--------------------|---------------------------+ 40# | + $swp1 $swp3 + + $swp4 | 41# | | iPOOL1 iPOOL0 | | iPOOL2 | 42# | | ePOOL4 ePOOL5 | | ePOOL4 | 43# | | 1Gbps | | 1Gbps | 44# | | PFC:enabled=1 | | PFC:enabled=1 | 45# | +-|----------------------|-+ +-|------------------------+ | 46# | | + $swp1.111 $swp3.111 + | | + $swp4.111 | | 47# | | | | | | 48# | | BR1 | | BR2 | | 49# | | | | | | 50# | | | | + $swp2.111 | | 51# | +--------------------------+ +---------|----------------+ | 52# | | | 53# | iPOOL0: 500KB dynamic | | 54# | iPOOL1: 10MB static | | 55# | iPOOL2: 1MB static + $swp2 | 56# | ePOOL4: 500KB dynamic | iPOOL0 | 57# | ePOOL5: 10MB static | ePOOL6 | 58# | ePOOL6: "infinite" static | 200Mbps shaper | 59# +-------------------------------------------------------|-------------------+ 60# | 61# +---|-------------------+ 62# | + $h2 H2 | 63# | | | 64# | + $h2.111 | 65# | 192.0.2.34/28 | 66# +-----------------------+ 67# 68# iPOOL0+ePOOL4 is a helper pool for control traffic etc. 69# iPOOL1+ePOOL5 are overflow pools. 70# iPOOL2+ePOOL6 are PFC pools. 71 72ALL_TESTS=" 73 ping_ipv4 74 test_qos_pfc 75" 76 77lib_dir=$(dirname $0)/../../../net/forwarding 78 79NUM_NETIFS=6 80source $lib_dir/lib.sh 81source $lib_dir/devlink_lib.sh 82 83_1KB=1000 84_100KB=$((100 * _1KB)) 85_500KB=$((500 * _1KB)) 86_1MB=$((1000 * _1KB)) 87_10MB=$((10 * _1MB)) 88 89h1_create() 90{ 91 simple_if_init $h1 92 mtu_set $h1 10000 93 94 vlan_create $h1 111 v$h1 192.0.2.33/28 95} 96 97h1_destroy() 98{ 99 vlan_destroy $h1 111 100 101 mtu_restore $h1 102 simple_if_fini $h1 103} 104 105h2_create() 106{ 107 simple_if_init $h2 108 mtu_set $h2 10000 109 110 vlan_create $h2 111 v$h2 192.0.2.34/28 111} 112 113h2_destroy() 114{ 115 vlan_destroy $h2 111 116 117 mtu_restore $h2 118 simple_if_fini $h2 119} 120 121switch_create() 122{ 123 local lanes_swp4 124 local pg1_size 125 126 # pools 127 # ----- 128 129 devlink_pool_size_thtype_save 0 130 devlink_pool_size_thtype_save 4 131 devlink_pool_size_thtype_save 1 132 devlink_pool_size_thtype_save 5 133 devlink_pool_size_thtype_save 2 134 devlink_pool_size_thtype_save 6 135 136 devlink_port_pool_th_save $swp1 1 137 devlink_port_pool_th_save $swp2 6 138 devlink_port_pool_th_save $swp3 5 139 devlink_port_pool_th_save $swp4 2 140 141 devlink_tc_bind_pool_th_save $swp1 1 ingress 142 devlink_tc_bind_pool_th_save $swp2 1 egress 143 devlink_tc_bind_pool_th_save $swp3 1 egress 144 devlink_tc_bind_pool_th_save $swp4 1 ingress 145 146 # Control traffic pools. Just reduce the size. Keep them dynamic so that 147 # we don't need to change all the uninteresting quotas. 148 devlink_pool_size_thtype_set 0 dynamic $_500KB 149 devlink_pool_size_thtype_set 4 dynamic $_500KB 150 151 # Overflow pools. 152 devlink_pool_size_thtype_set 1 static $_10MB 153 devlink_pool_size_thtype_set 5 static $_10MB 154 155 # PFC pools. As per the writ, the size of egress PFC pool should be 156 # infinice, but actually it just needs to be large enough to not matter 157 # in practice, so reuse the 10MB limit. 158 devlink_pool_size_thtype_set 2 static $_1MB 159 devlink_pool_size_thtype_set 6 static $_10MB 160 161 # $swp1 162 # ----- 163 164 ip link set dev $swp1 up 165 mtu_set $swp1 10000 166 vlan_create $swp1 111 167 ip link set dev $swp1.111 type vlan ingress-qos-map 0:0 1:1 168 169 devlink_port_pool_th_set $swp1 1 $_10MB 170 devlink_tc_bind_pool_th_set $swp1 1 ingress 1 $_10MB 171 172 # Configure qdisc so that we can configure PG and therefore pool 173 # assignment. 174 tc qdisc replace dev $swp1 root handle 1: \ 175 ets bands 8 strict 8 priomap 7 6 176 dcb buffer set dev $swp1 prio-buffer all:0 1:1 177 178 # $swp2 179 # ----- 180 181 ip link set dev $swp2 up 182 mtu_set $swp2 10000 183 vlan_create $swp2 111 184 ip link set dev $swp2.111 type vlan egress-qos-map 0:0 1:1 185 186 devlink_port_pool_th_set $swp2 6 $_10MB 187 devlink_tc_bind_pool_th_set $swp2 1 egress 6 $_10MB 188 189 # prio 0->TC0 (band 7), 1->TC1 (band 6). TC1 is shaped. 190 tc qdisc replace dev $swp2 root handle 1: \ 191 ets bands 8 strict 8 priomap 7 6 192 tc qdisc replace dev $swp2 parent 1:7 handle 17: \ 193 tbf rate 200Mbit burst 131072 limit 1M 194 195 # $swp3 196 # ----- 197 198 ip link set dev $swp3 up 199 mtu_set $swp3 10000 200 vlan_create $swp3 111 201 ip link set dev $swp3.111 type vlan egress-qos-map 0:0 1:1 202 203 devlink_port_pool_th_set $swp3 5 $_10MB 204 devlink_tc_bind_pool_th_set $swp3 1 egress 5 $_10MB 205 206 # prio 0->TC0 (band 7), 1->TC1 (band 6) 207 tc qdisc replace dev $swp3 root handle 1: \ 208 ets bands 8 strict 8 priomap 7 6 209 210 # Need to enable PFC so that PAUSE takes effect. Therefore need to put 211 # the lossless prio into a buffer of its own. Don't bother with buffer 212 # sizes though, there is not going to be any pressure in the "backward" 213 # direction. 214 dcb buffer set dev $swp3 prio-buffer all:0 1:1 215 dcb pfc set dev $swp3 prio-pfc all:off 1:on 216 217 # $swp4 218 # ----- 219 220 ip link set dev $swp4 up 221 mtu_set $swp4 10000 222 vlan_create $swp4 111 223 ip link set dev $swp4.111 type vlan ingress-qos-map 0:0 1:1 224 225 devlink_port_pool_th_set $swp4 2 $_1MB 226 devlink_tc_bind_pool_th_set $swp4 1 ingress 2 $_1MB 227 228 # Configure qdisc so that we can hand-tune headroom. 229 tc qdisc replace dev $swp4 root handle 1: \ 230 ets bands 8 strict 8 priomap 7 6 231 dcb buffer set dev $swp4 prio-buffer all:0 1:1 232 dcb pfc set dev $swp4 prio-pfc all:off 1:on 233 # PG0 will get autoconfigured to Xoff, give PG1 arbitrarily 100K, which 234 # is (-2*MTU) about 80K of delay provision. 235 pg1_size=$_100KB 236 237 setup_wait_dev_with_timeout $swp4 238 239 lanes_swp4=$(ethtool $swp4 | grep 'Lanes:') 240 lanes_swp4=${lanes_swp4#*"Lanes: "} 241 242 # 8-lane ports use two buffers among which the configured buffer 243 # is split, so double the size to get twice (20K + 80K). 244 if [[ $lanes_swp4 -eq 8 ]]; then 245 pg1_size=$((pg1_size * 2)) 246 fi 247 248 dcb buffer set dev $swp4 buffer-size all:0 1:$pg1_size 249 250 # bridges 251 # ------- 252 253 ip link add name br1 type bridge vlan_filtering 0 254 ip link set dev $swp1.111 master br1 255 ip link set dev $swp3.111 master br1 256 ip link set dev br1 up 257 258 ip link add name br2 type bridge vlan_filtering 0 259 ip link set dev $swp2.111 master br2 260 ip link set dev $swp4.111 master br2 261 ip link set dev br2 up 262} 263 264switch_destroy() 265{ 266 # Do this first so that we can reset the limits to values that are only 267 # valid for the original static / dynamic setting. 268 devlink_pool_size_thtype_restore 6 269 devlink_pool_size_thtype_restore 5 270 devlink_pool_size_thtype_restore 4 271 devlink_pool_size_thtype_restore 2 272 devlink_pool_size_thtype_restore 1 273 devlink_pool_size_thtype_restore 0 274 275 # bridges 276 # ------- 277 278 ip link set dev br2 down 279 ip link set dev $swp4.111 nomaster 280 ip link set dev $swp2.111 nomaster 281 ip link del dev br2 282 283 ip link set dev br1 down 284 ip link set dev $swp3.111 nomaster 285 ip link set dev $swp1.111 nomaster 286 ip link del dev br1 287 288 # $swp4 289 # ----- 290 291 dcb buffer set dev $swp4 buffer-size all:0 292 dcb pfc set dev $swp4 prio-pfc all:off 293 dcb buffer set dev $swp4 prio-buffer all:0 294 tc qdisc del dev $swp4 root 295 296 devlink_tc_bind_pool_th_restore $swp4 1 ingress 297 devlink_port_pool_th_restore $swp4 2 298 299 vlan_destroy $swp4 111 300 mtu_restore $swp4 301 ip link set dev $swp4 down 302 303 # $swp3 304 # ----- 305 306 dcb pfc set dev $swp3 prio-pfc all:off 307 dcb buffer set dev $swp3 prio-buffer all:0 308 tc qdisc del dev $swp3 root 309 310 devlink_tc_bind_pool_th_restore $swp3 1 egress 311 devlink_port_pool_th_restore $swp3 5 312 313 vlan_destroy $swp3 111 314 mtu_restore $swp3 315 ip link set dev $swp3 down 316 317 # $swp2 318 # ----- 319 320 tc qdisc del dev $swp2 parent 1:7 321 tc qdisc del dev $swp2 root 322 323 devlink_tc_bind_pool_th_restore $swp2 1 egress 324 devlink_port_pool_th_restore $swp2 6 325 326 vlan_destroy $swp2 111 327 mtu_restore $swp2 328 ip link set dev $swp2 down 329 330 # $swp1 331 # ----- 332 333 dcb buffer set dev $swp1 prio-buffer all:0 334 tc qdisc del dev $swp1 root 335 336 devlink_tc_bind_pool_th_restore $swp1 1 ingress 337 devlink_port_pool_th_restore $swp1 1 338 339 vlan_destroy $swp1 111 340 mtu_restore $swp1 341 ip link set dev $swp1 down 342} 343 344setup_prepare() 345{ 346 h1=${NETIFS[p1]} 347 swp1=${NETIFS[p2]} 348 349 swp2=${NETIFS[p3]} 350 h2=${NETIFS[p4]} 351 352 swp3=${NETIFS[p5]} 353 swp4=${NETIFS[p6]} 354 355 h2mac=$(mac_get $h2) 356 357 vrf_prepare 358 359 h1_create 360 h2_create 361 switch_create 362} 363 364cleanup() 365{ 366 pre_cleanup 367 368 switch_destroy 369 h2_destroy 370 h1_destroy 371 372 vrf_cleanup 373} 374 375ping_ipv4() 376{ 377 ping_test $h1 192.0.2.34 378} 379 380test_qos_pfc() 381{ 382 RET=0 383 384 # 10M pool, each packet is 8K of payload + headers 385 local pkts=$((_10MB / 8050)) 386 local size=$((pkts * 8050)) 387 local in0=$(ethtool_stats_get $swp1 rx_octets_prio_1) 388 local out0=$(ethtool_stats_get $swp2 tx_octets_prio_1) 389 390 $MZ $h1 -p 8000 -Q 1:111 -A 192.0.2.33 -B 192.0.2.34 \ 391 -a own -b $h2mac -c $pkts -t udp -q 392 sleep 2 393 394 local in1=$(ethtool_stats_get $swp1 rx_octets_prio_1) 395 local out1=$(ethtool_stats_get $swp2 tx_octets_prio_1) 396 397 local din=$((in1 - in0)) 398 local dout=$((out1 - out0)) 399 400 local pct_in=$((din * 100 / size)) 401 402 ((pct_in > 95 && pct_in < 105)) 403 check_err $? "Relative ingress out of expected bounds, $pct_in% should be 100%" 404 405 ((dout == din)) 406 check_err $? "$((din - dout)) bytes out of $din ingressed got lost" 407 408 log_test "PFC" 409} 410 411bail_on_lldpad "configure DCB" "configure Qdiscs" 412 413trap cleanup EXIT 414setup_prepare 415setup_wait 416tests_run 417 418exit $EXIT_STATUS 419