1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3#
4# This test injects a 10-MB burst of traffic with VLAN tag and 802.1p priority
5# of 1. This stream is consistently prioritized as priority 1, is put to PG
6# buffer 1, and scheduled at TC 1.
7#
8# - the stream first ingresses through $swp1, where it is forwarded to $swp3
9#
10# - then it ingresses through $swp4. Here it is put to a lossless buffer and put
11#   to a small pool ("PFC pool"). The traffic is forwarded to $swp2, which is
12#   shaped, and thus the PFC pool eventually fills, therefore the headroom
13#   fills, and $swp3 is paused.
14#
15# - since $swp3 now can't send traffic, the traffic ingressing $swp1 is kept at
16#   a pool ("overflow pool"). The overflow pool needs to be large enough to
17#   contain the whole burst.
18#
19# - eventually the PFC pool gets some traffic out, headroom therefore gets some
20#   traffic to the pool, and $swp3 is unpaused again. This way the traffic is
21#   gradually forwarded from the overflow pool, through the PFC pool, out of
22#   $swp2, and eventually to $h2.
23#
24# - if PFC works, all lossless flow packets that ingress through $swp1 should
25#   also be seen ingressing $h2. If it doesn't, there will be drops due to
26#   discrepancy between the speeds of $swp1 and $h2.
27#
28# - it should all play out relatively quickly, so that SLL and HLL will not
29#   cause drops.
30#
31# +-----------------------+
32# | H1                    |
33# |   + $h1.111           |
34# |   | 192.0.2.33/28     |
35# |   |                   |
36# |   + $h1               |
37# +---|-------------------+  +--------------------+
38#     |                      |                    |
39# +---|----------------------|--------------------|---------------------------+
40# |   + $swp1          $swp3 +                    + $swp4                     |
41# |   | iPOOL1        iPOOL0 |                    | iPOOL2                    |
42# |   | ePOOL4        ePOOL5 |                    | ePOOL4                    |
43# |   |                1Gbps |                    | 1Gbps                     |
44# |   |        PFC:enabled=1 |                    | PFC:enabled=1             |
45# | +-|----------------------|-+                +-|------------------------+  |
46# | | + $swp1.111  $swp3.111 + |                | + $swp4.111              |  |
47# | |                          |                |                          |  |
48# | | BR1                      |                | BR2                      |  |
49# | |                          |                |                          |  |
50# | |                          |                |         + $swp2.111      |  |
51# | +--------------------------+                +---------|----------------+  |
52# |                                                       |                   |
53# | iPOOL0: 500KB dynamic                                 |                   |
54# | iPOOL1: 10MB static                                   |                   |
55# | iPOOL2: 1MB static                                    + $swp2             |
56# | ePOOL4: 500KB dynamic                                 | iPOOL0            |
57# | ePOOL5: 10MB static                                   | ePOOL6            |
58# | ePOOL6: "infinite" static                             | 200Mbps shaper    |
59# +-------------------------------------------------------|-------------------+
60#                                                         |
61#                                                     +---|-------------------+
62#                                                     |   + $h2            H2 |
63#                                                     |   |                   |
64#                                                     |   + $h2.111           |
65#                                                     |     192.0.2.34/28     |
66#                                                     +-----------------------+
67#
68# iPOOL0+ePOOL4 is a helper pool for control traffic etc.
69# iPOOL1+ePOOL5 are overflow pools.
70# iPOOL2+ePOOL6 are PFC pools.
71
72ALL_TESTS="
73	ping_ipv4
74	test_qos_pfc
75"
76
77lib_dir=$(dirname $0)/../../../net/forwarding
78
79NUM_NETIFS=6
80source $lib_dir/lib.sh
81source $lib_dir/devlink_lib.sh
82source qos_lib.sh
83
84_1KB=1000
85_100KB=$((100 * _1KB))
86_500KB=$((500 * _1KB))
87_1MB=$((1000 * _1KB))
88_10MB=$((10 * _1MB))
89
90h1_create()
91{
92	simple_if_init $h1
93	mtu_set $h1 10000
94
95	vlan_create $h1 111 v$h1 192.0.2.33/28
96}
97
98h1_destroy()
99{
100	vlan_destroy $h1 111
101
102	mtu_restore $h1
103	simple_if_fini $h1
104}
105
106h2_create()
107{
108	simple_if_init $h2
109	mtu_set $h2 10000
110
111	vlan_create $h2 111 v$h2 192.0.2.34/28
112}
113
114h2_destroy()
115{
116	vlan_destroy $h2 111
117
118	mtu_restore $h2
119	simple_if_fini $h2
120}
121
122switch_create()
123{
124	# pools
125	# -----
126
127	devlink_pool_size_thtype_save 0
128	devlink_pool_size_thtype_save 4
129	devlink_pool_size_thtype_save 1
130	devlink_pool_size_thtype_save 5
131	devlink_pool_size_thtype_save 2
132	devlink_pool_size_thtype_save 6
133
134	devlink_port_pool_th_save $swp1 1
135	devlink_port_pool_th_save $swp2 6
136	devlink_port_pool_th_save $swp3 5
137	devlink_port_pool_th_save $swp4 2
138
139	devlink_tc_bind_pool_th_save $swp1 1 ingress
140	devlink_tc_bind_pool_th_save $swp2 1 egress
141	devlink_tc_bind_pool_th_save $swp3 1 egress
142	devlink_tc_bind_pool_th_save $swp4 1 ingress
143
144	# Control traffic pools. Just reduce the size. Keep them dynamic so that
145	# we don't need to change all the uninteresting quotas.
146	devlink_pool_size_thtype_set 0 dynamic $_500KB
147	devlink_pool_size_thtype_set 4 dynamic $_500KB
148
149	# Overflow pools.
150	devlink_pool_size_thtype_set 1 static $_10MB
151	devlink_pool_size_thtype_set 5 static $_10MB
152
153	# PFC pools. As per the writ, the size of egress PFC pool should be
154	# infinice, but actually it just needs to be large enough to not matter
155	# in practice, so reuse the 10MB limit.
156	devlink_pool_size_thtype_set 2 static $_1MB
157	devlink_pool_size_thtype_set 6 static $_10MB
158
159	# $swp1
160	# -----
161
162	ip link set dev $swp1 up
163	mtu_set $swp1 10000
164	vlan_create $swp1 111
165	ip link set dev $swp1.111 type vlan ingress-qos-map 0:0 1:1
166
167	devlink_port_pool_th_set $swp1 1 $_10MB
168	devlink_tc_bind_pool_th_set $swp1 1 ingress 1 $_10MB
169
170	# Configure qdisc so that we can configure PG and therefore pool
171	# assignment.
172	tc qdisc replace dev $swp1 root handle 1: \
173	   ets bands 8 strict 8 priomap 7 6
174	__mlnx_qos -i $swp1 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null
175
176	# $swp2
177	# -----
178
179	ip link set dev $swp2 up
180	mtu_set $swp2 10000
181	vlan_create $swp2 111
182	ip link set dev $swp2.111 type vlan egress-qos-map 0:0 1:1
183
184	devlink_port_pool_th_set $swp2 6 $_10MB
185	devlink_tc_bind_pool_th_set $swp2 1 egress 6 $_10MB
186
187	# prio 0->TC0 (band 7), 1->TC1 (band 6). TC1 is shaped.
188	tc qdisc replace dev $swp2 root handle 1: \
189	   ets bands 8 strict 8 priomap 7 6
190	tc qdisc replace dev $swp2 parent 1:7 handle 17: \
191	   tbf rate 200Mbit burst 131072 limit 1M
192
193	# $swp3
194	# -----
195
196	ip link set dev $swp3 up
197	mtu_set $swp3 10000
198	vlan_create $swp3 111
199	ip link set dev $swp3.111 type vlan egress-qos-map 0:0 1:1
200
201	devlink_port_pool_th_set $swp3 5 $_10MB
202	devlink_tc_bind_pool_th_set $swp3 1 egress 5 $_10MB
203
204	# prio 0->TC0 (band 7), 1->TC1 (band 6)
205	tc qdisc replace dev $swp3 root handle 1: \
206	   ets bands 8 strict 8 priomap 7 6
207
208	# Need to enable PFC so that PAUSE takes effect. Therefore need to put
209	# the lossless prio into a buffer of its own. Don't bother with buffer
210	# sizes though, there is not going to be any pressure in the "backward"
211	# direction.
212	__mlnx_qos -i $swp3 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null
213	__mlnx_qos -i $swp3 --pfc=0,1,0,0,0,0,0,0 >/dev/null
214
215	# $swp4
216	# -----
217
218	ip link set dev $swp4 up
219	mtu_set $swp4 10000
220	vlan_create $swp4 111
221	ip link set dev $swp4.111 type vlan ingress-qos-map 0:0 1:1
222
223	devlink_port_pool_th_set $swp4 2 $_1MB
224	devlink_tc_bind_pool_th_set $swp4 1 ingress 2 $_1MB
225
226	# Configure qdisc so that we can hand-tune headroom.
227	tc qdisc replace dev $swp4 root handle 1: \
228	   ets bands 8 strict 8 priomap 7 6
229	__mlnx_qos -i $swp4 --prio2buffer=0,1,0,0,0,0,0,0 >/dev/null
230	__mlnx_qos -i $swp4 --pfc=0,1,0,0,0,0,0,0 >/dev/null
231	# PG0 will get autoconfigured to Xoff, give PG1 arbitrarily 100K, which
232	# is (-2*MTU) about 80K of delay provision.
233	__mlnx_qos -i $swp4 --buffer_size=0,$_100KB,0,0,0,0,0,0 >/dev/null
234
235	# bridges
236	# -------
237
238	ip link add name br1 type bridge vlan_filtering 0
239	ip link set dev $swp1.111 master br1
240	ip link set dev $swp3.111 master br1
241	ip link set dev br1 up
242
243	ip link add name br2 type bridge vlan_filtering 0
244	ip link set dev $swp2.111 master br2
245	ip link set dev $swp4.111 master br2
246	ip link set dev br2 up
247}
248
249switch_destroy()
250{
251	# Do this first so that we can reset the limits to values that are only
252	# valid for the original static / dynamic setting.
253	devlink_pool_size_thtype_restore 6
254	devlink_pool_size_thtype_restore 5
255	devlink_pool_size_thtype_restore 4
256	devlink_pool_size_thtype_restore 2
257	devlink_pool_size_thtype_restore 1
258	devlink_pool_size_thtype_restore 0
259
260	# bridges
261	# -------
262
263	ip link set dev br2 down
264	ip link set dev $swp4.111 nomaster
265	ip link set dev $swp2.111 nomaster
266	ip link del dev br2
267
268	ip link set dev br1 down
269	ip link set dev $swp3.111 nomaster
270	ip link set dev $swp1.111 nomaster
271	ip link del dev br1
272
273	# $swp4
274	# -----
275
276	__mlnx_qos -i $swp4 --buffer_size=0,0,0,0,0,0,0,0 >/dev/null
277	__mlnx_qos -i $swp4 --pfc=0,0,0,0,0,0,0,0 >/dev/null
278	__mlnx_qos -i $swp4 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null
279	tc qdisc del dev $swp4 root
280
281	devlink_tc_bind_pool_th_restore $swp4 1 ingress
282	devlink_port_pool_th_restore $swp4 2
283
284	vlan_destroy $swp4 111
285	mtu_restore $swp4
286	ip link set dev $swp4 down
287
288	# $swp3
289	# -----
290
291	__mlnx_qos -i $swp3 --pfc=0,0,0,0,0,0,0,0 >/dev/null
292	__mlnx_qos -i $swp3 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null
293	tc qdisc del dev $swp3 root
294
295	devlink_tc_bind_pool_th_restore $swp3 1 egress
296	devlink_port_pool_th_restore $swp3 5
297
298	vlan_destroy $swp3 111
299	mtu_restore $swp3
300	ip link set dev $swp3 down
301
302	# $swp2
303	# -----
304
305	tc qdisc del dev $swp2 parent 1:7
306	tc qdisc del dev $swp2 root
307
308	devlink_tc_bind_pool_th_restore $swp2 1 egress
309	devlink_port_pool_th_restore $swp2 6
310
311	vlan_destroy $swp2 111
312	mtu_restore $swp2
313	ip link set dev $swp2 down
314
315	# $swp1
316	# -----
317
318	__mlnx_qos -i $swp1 --prio2buffer=0,0,0,0,0,0,0,0 >/dev/null
319	tc qdisc del dev $swp1 root
320
321	devlink_tc_bind_pool_th_restore $swp1 1 ingress
322	devlink_port_pool_th_restore $swp1 1
323
324	vlan_destroy $swp1 111
325	mtu_restore $swp1
326	ip link set dev $swp1 down
327}
328
329setup_prepare()
330{
331	h1=${NETIFS[p1]}
332	swp1=${NETIFS[p2]}
333
334	swp2=${NETIFS[p3]}
335	h2=${NETIFS[p4]}
336
337	swp3=${NETIFS[p5]}
338	swp4=${NETIFS[p6]}
339
340	h2mac=$(mac_get $h2)
341
342	vrf_prepare
343
344	h1_create
345	h2_create
346	switch_create
347}
348
349cleanup()
350{
351	pre_cleanup
352
353	switch_destroy
354	h2_destroy
355	h1_destroy
356
357	vrf_cleanup
358}
359
360ping_ipv4()
361{
362	ping_test $h1 192.0.2.34
363}
364
365test_qos_pfc()
366{
367	RET=0
368
369	# 10M pool, each packet is 8K of payload + headers
370	local pkts=$((_10MB / 8050))
371	local size=$((pkts * 8050))
372	local in0=$(ethtool_stats_get $swp1 rx_octets_prio_1)
373	local out0=$(ethtool_stats_get $swp2 tx_octets_prio_1)
374
375	$MZ $h1 -p 8000 -Q 1:111 -A 192.0.2.33 -B 192.0.2.34 \
376		-a own -b $h2mac -c $pkts -t udp -q
377	sleep 2
378
379	local in1=$(ethtool_stats_get $swp1 rx_octets_prio_1)
380	local out1=$(ethtool_stats_get $swp2 tx_octets_prio_1)
381
382	local din=$((in1 - in0))
383	local dout=$((out1 - out0))
384
385	local pct_in=$((din * 100 / size))
386
387	((pct_in > 95 && pct_in < 105))
388	check_err $? "Relative ingress out of expected bounds, $pct_in% should be 100%"
389
390	((dout == din))
391	check_err $? "$((din - dout)) bytes out of $din ingressed got lost"
392
393	log_test "PFC"
394}
395
396trap cleanup EXIT
397
398bail_on_lldpad
399setup_prepare
400setup_wait
401tests_run
402
403exit $EXIT_STATUS
404