1# SPDX-License-Identifier: GPL-2.0
2
3# This test sends a >1Gbps stream of traffic from H1, to the switch, which
4# forwards it to a 1Gbps port. This 1Gbps stream is then looped back to the
5# switch and forwarded to the port under test $swp3, which is also 1Gbps.
6#
7# This way, $swp3 should be 100% filled with traffic without any of it spilling
8# to the backlog. Any extra packets sent should almost 1:1 go to backlog. That
9# is what H2 is used for--it sends the extra traffic to create backlog.
10#
11# A RED Qdisc is installed on $swp3. The configuration is such that the minimum
12# and maximum size are 1 byte apart, so there is a very clear border under which
13# no marking or dropping takes place, and above which everything is marked or
14# dropped.
15#
16# The test uses the buffer build-up behavior to test the installed RED.
17#
18# In order to test WRED, $swp3 actually contains RED under PRIO, with two
19# different configurations. Traffic is prioritized using 802.1p and relies on
20# the implicit mlxsw configuration, where packet priority is taken 1:1 from the
21# 802.1p marking.
22#
23# +--------------------------+                     +--------------------------+
24# | H1                       |                     | H2                       |
25# |     + $h1.10             |                     |     + $h2.10             |
26# |     | 192.0.2.1/28       |                     |     | 192.0.2.2/28       |
27# |     |                    |                     |     |                    |
28# |     |         $h1.11 +   |                     |     |         $h2.11 +   |
29# |     |  192.0.2.17/28 |   |                     |     |  192.0.2.18/28 |   |
30# |     |                |   |                     |     |                |   |
31# |     \______    ______/   |                     |     \______    ______/   |
32# |            \ /           |                     |            \ /           |
33# |             + $h1        |                     |             + $h2        |
34# +-------------|------------+                     +-------------|------------+
35#               | >1Gbps                                         |
36# +-------------|------------------------------------------------|------------+
37# | SW          + $swp1                                          + $swp2      |
38# |     _______/ \___________                        ___________/ \_______    |
39# |    /                     \                      /                     \   |
40# |  +-|-----------------+   |                    +-|-----------------+   |   |
41# |  | + $swp1.10        |   |                    | + $swp2.10        |   |   |
42# |  |                   |   |        .-------------+ $swp5.10        |   |   |
43# |  |     BR1_10        |   |        |           |                   |   |   |
44# |  |                   |   |        |           |     BR2_10        |   |   |
45# |  | + $swp2.10        |   |        |           |                   |   |   |
46# |  +-|-----------------+   |        |           | + $swp3.10        |   |   |
47# |    |                     |        |           +-|-----------------+   |   |
48# |    |   +-----------------|-+      |             |   +-----------------|-+ |
49# |    |   |        $swp1.11 + |      |             |   |        $swp2.11 + | |
50# |    |   |                   |      | .-----------------+ $swp5.11        | |
51# |    |   |      BR1_11       |      | |           |   |                   | |
52# |    |   |                   |      | |           |   |      BR2_11       | |
53# |    |   |        $swp2.11 + |      | |           |   |                   | |
54# |    |   +-----------------|-+      | |           |   |        $swp3.11 + | |
55# |    |                     |        | |           |   +-----------------|-+ |
56# |    \_______   ___________/        | |           \___________   _______/   |
57# |            \ /                    \ /                       \ /           |
58# |             + $swp4                + $swp5                   + $swp3      |
59# +-------------|----------------------|-------------------------|------------+
60#               |                      |                         | 1Gbps
61#               \________1Gbps_________/                         |
62#                                   +----------------------------|------------+
63#                                   | H3                         + $h3        |
64#                                   |      _____________________/ \_______    |
65#                                   |     /                               \   |
66#                                   |     |                               |   |
67#                                   |     + $h3.10                 $h3.11 +   |
68#                                   |       192.0.2.3/28    192.0.2.19/28     |
69#                                   +-----------------------------------------+
70
71NUM_NETIFS=8
72CHECK_TC="yes"
73lib_dir=$(dirname $0)/../../../net/forwarding
74source $lib_dir/lib.sh
75source $lib_dir/devlink_lib.sh
76source qos_lib.sh
77
78ipaddr()
79{
80	local host=$1; shift
81	local vlan=$1; shift
82
83	echo 192.0.2.$((16 * (vlan - 10) + host))
84}
85
86host_create()
87{
88	local dev=$1; shift
89	local host=$1; shift
90
91	simple_if_init $dev
92	mtu_set $dev 10000
93
94	vlan_create $dev 10 v$dev $(ipaddr $host 10)/28
95	ip link set dev $dev.10 type vlan egress 0:0
96
97	vlan_create $dev 11 v$dev $(ipaddr $host 11)/28
98	ip link set dev $dev.11 type vlan egress 0:1
99}
100
101host_destroy()
102{
103	local dev=$1; shift
104
105	vlan_destroy $dev 11
106	vlan_destroy $dev 10
107	mtu_restore $dev
108	simple_if_fini $dev
109}
110
111h1_create()
112{
113	host_create $h1 1
114}
115
116h1_destroy()
117{
118	host_destroy $h1
119}
120
121h2_create()
122{
123	host_create $h2 2
124	tc qdisc add dev $h2 clsact
125
126	# Some of the tests in this suite use multicast traffic. As this traffic
127	# enters BR2_10 resp. BR2_11, it is flooded to all other ports. Thus
128	# e.g. traffic ingressing through $swp2 is flooded to $swp3 (the
129	# intended destination) and $swp5 (which is intended as ingress for
130	# another stream of traffic).
131	#
132	# This is generally not a problem, but if the $swp5 throughput is lower
133	# than $swp2 throughput, there will be a build-up at $swp5. That may
134	# cause packets to fail to queue up at $swp3 due to shared buffer
135	# quotas, and the test to spuriously fail.
136	#
137	# Prevent this by setting the speed of $h2 to 1Gbps.
138
139	ethtool -s $h2 speed 1000 autoneg off
140}
141
142h2_destroy()
143{
144	ethtool -s $h2 autoneg on
145	tc qdisc del dev $h2 clsact
146	host_destroy $h2
147}
148
149h3_create()
150{
151	host_create $h3 3
152	ethtool -s $h3 speed 1000 autoneg off
153}
154
155h3_destroy()
156{
157	ethtool -s $h3 autoneg on
158	host_destroy $h3
159}
160
161switch_create()
162{
163	local intf
164	local vlan
165
166	ip link add dev br1_10 type bridge
167	ip link add dev br1_11 type bridge
168
169	ip link add dev br2_10 type bridge
170	ip link add dev br2_11 type bridge
171
172	for intf in $swp1 $swp2 $swp3 $swp4 $swp5; do
173		ip link set dev $intf up
174		mtu_set $intf 10000
175	done
176
177	for intf in $swp1 $swp4; do
178		for vlan in 10 11; do
179			vlan_create $intf $vlan
180			ip link set dev $intf.$vlan master br1_$vlan
181			ip link set dev $intf.$vlan up
182		done
183	done
184
185	for intf in $swp2 $swp3 $swp5; do
186		for vlan in 10 11; do
187			vlan_create $intf $vlan
188			ip link set dev $intf.$vlan master br2_$vlan
189			ip link set dev $intf.$vlan up
190		done
191	done
192
193	ip link set dev $swp4.10 type vlan egress 0:0
194	ip link set dev $swp4.11 type vlan egress 0:1
195	for intf in $swp1 $swp2 $swp5; do
196		for vlan in 10 11; do
197			ip link set dev $intf.$vlan type vlan ingress 0:0 1:1
198		done
199	done
200
201	for intf in $swp2 $swp3 $swp4 $swp5; do
202		ethtool -s $intf speed 1000 autoneg off
203	done
204
205	ip link set dev br1_10 up
206	ip link set dev br1_11 up
207	ip link set dev br2_10 up
208	ip link set dev br2_11 up
209
210	local size=$(devlink_pool_size_thtype 0 | cut -d' ' -f 1)
211	devlink_port_pool_th_save $swp3 8
212	devlink_port_pool_th_set $swp3 8 $size
213}
214
215switch_destroy()
216{
217	local intf
218	local vlan
219
220	devlink_port_pool_th_restore $swp3 8
221
222	tc qdisc del dev $swp3 root 2>/dev/null
223
224	ip link set dev br2_11 down
225	ip link set dev br2_10 down
226	ip link set dev br1_11 down
227	ip link set dev br1_10 down
228
229	for intf in $swp5 $swp4 $swp3 $swp2; do
230		ethtool -s $intf autoneg on
231	done
232
233	for intf in $swp5 $swp3 $swp2 $swp4 $swp1; do
234		for vlan in 11 10; do
235			ip link set dev $intf.$vlan down
236			ip link set dev $intf.$vlan nomaster
237			vlan_destroy $intf $vlan
238		done
239
240		mtu_restore $intf
241		ip link set dev $intf down
242	done
243
244	ip link del dev br2_11
245	ip link del dev br2_10
246	ip link del dev br1_11
247	ip link del dev br1_10
248}
249
250setup_prepare()
251{
252	h1=${NETIFS[p1]}
253	swp1=${NETIFS[p2]}
254
255	swp2=${NETIFS[p3]}
256	h2=${NETIFS[p4]}
257
258	swp3=${NETIFS[p5]}
259	h3=${NETIFS[p6]}
260
261	swp4=${NETIFS[p7]}
262	swp5=${NETIFS[p8]}
263
264	h3_mac=$(mac_get $h3)
265
266	vrf_prepare
267
268	h1_create
269	h2_create
270	h3_create
271	switch_create
272}
273
274cleanup()
275{
276	pre_cleanup
277
278	switch_destroy
279	h3_destroy
280	h2_destroy
281	h1_destroy
282
283	vrf_cleanup
284}
285
286ping_ipv4()
287{
288	ping_test $h1.10 $(ipaddr 3 10) " from host 1, vlan 10"
289	ping_test $h1.11 $(ipaddr 3 11) " from host 1, vlan 11"
290	ping_test $h2.10 $(ipaddr 3 10) " from host 2, vlan 10"
291	ping_test $h2.11 $(ipaddr 3 11) " from host 2, vlan 11"
292}
293
294get_tc()
295{
296	local vlan=$1; shift
297
298	echo $((vlan - 10))
299}
300
301get_qdisc_handle()
302{
303	local vlan=$1; shift
304
305	local tc=$(get_tc $vlan)
306	local band=$((8 - tc))
307
308	# Handle is 107: for TC1, 108: for TC0.
309	echo "10$band:"
310}
311
312get_qdisc_backlog()
313{
314	local vlan=$1; shift
315
316	qdisc_stats_get $swp3 $(get_qdisc_handle $vlan) .backlog
317}
318
319get_mc_transmit_queue()
320{
321	local vlan=$1; shift
322
323	local tc=$(($(get_tc $vlan) + 8))
324	ethtool_stats_get $swp3 tc_transmit_queue_tc_$tc
325}
326
327get_nmarked()
328{
329	local vlan=$1; shift
330
331	ethtool_stats_get $swp3 ecn_marked
332}
333
334get_qdisc_npackets()
335{
336	local vlan=$1; shift
337
338	busywait_for_counter 1100 +1 \
339		qdisc_stats_get $swp3 $(get_qdisc_handle $vlan) .packets
340}
341
342send_packets()
343{
344	local vlan=$1; shift
345	local proto=$1; shift
346	local pkts=$1; shift
347
348	$MZ $h2.$vlan -p 8000 -a own -b $h3_mac \
349	    -A $(ipaddr 2 $vlan) -B $(ipaddr 3 $vlan) \
350	    -t $proto -q -c $pkts "$@"
351}
352
353# This sends traffic in an attempt to build a backlog of $size. Returns 0 on
354# success. After 10 failed attempts it bails out and returns 1. It dumps the
355# backlog size to stdout.
356build_backlog()
357{
358	local vlan=$1; shift
359	local size=$1; shift
360	local proto=$1; shift
361
362	local tc=$((vlan - 10))
363	local band=$((8 - tc))
364	local cur=-1
365	local i=0
366
367	while :; do
368		local cur=$(busywait 1100 until_counter_is "> $cur" \
369					    get_qdisc_backlog $vlan)
370		local diff=$((size - cur))
371		local pkts=$(((diff + 7999) / 8000))
372
373		if ((cur >= size)); then
374			echo $cur
375			return 0
376		elif ((i++ > 10)); then
377			echo $cur
378			return 1
379		fi
380
381		send_packets $vlan $proto $pkts "$@"
382	done
383}
384
385check_marking()
386{
387	local vlan=$1; shift
388	local cond=$1; shift
389
390	local npackets_0=$(get_qdisc_npackets $vlan)
391	local nmarked_0=$(get_nmarked $vlan)
392	sleep 5
393	local npackets_1=$(get_qdisc_npackets $vlan)
394	local nmarked_1=$(get_nmarked $vlan)
395
396	local nmarked_d=$((nmarked_1 - nmarked_0))
397	local npackets_d=$((npackets_1 - npackets_0))
398	local pct=$((100 * nmarked_d / npackets_d))
399
400	echo $pct
401	((pct $cond))
402}
403
404ecn_test_common()
405{
406	local name=$1; shift
407	local vlan=$1; shift
408	local limit=$1; shift
409	local backlog
410	local pct
411
412	# Build the below-the-limit backlog using UDP. We could use TCP just
413	# fine, but this way we get a proof that UDP is accepted when queue
414	# length is below the limit. The main stream is using TCP, and if the
415	# limit is misconfigured, we would see this traffic being ECN marked.
416	RET=0
417	backlog=$(build_backlog $vlan $((2 * limit / 3)) udp)
418	check_err $? "Could not build the requested backlog"
419	pct=$(check_marking $vlan "== 0")
420	check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
421	log_test "TC $((vlan - 10)): $name backlog < limit"
422
423	# Now push TCP, because non-TCP traffic would be early-dropped after the
424	# backlog crosses the limit, and we want to make sure that the backlog
425	# is above the limit.
426	RET=0
427	backlog=$(build_backlog $vlan $((3 * limit / 2)) tcp tos=0x01)
428	check_err $? "Could not build the requested backlog"
429	pct=$(check_marking $vlan ">= 95")
430	check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected >= 95."
431	log_test "TC $((vlan - 10)): $name backlog > limit"
432}
433
434do_ecn_test()
435{
436	local vlan=$1; shift
437	local limit=$1; shift
438	local name=ECN
439
440	start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \
441			  $h3_mac tos=0x01
442	sleep 1
443
444	ecn_test_common "$name" $vlan $limit
445
446	# Up there we saw that UDP gets accepted when backlog is below the
447	# limit. Now that it is above, it should all get dropped, and backlog
448	# building should fail.
449	RET=0
450	build_backlog $vlan $((2 * limit)) udp >/dev/null
451	check_fail $? "UDP traffic went into backlog instead of being early-dropped"
452	log_test "TC $((vlan - 10)): $name backlog > limit: UDP early-dropped"
453
454	stop_traffic
455	sleep 1
456}
457
458do_ecn_nodrop_test()
459{
460	local vlan=$1; shift
461	local limit=$1; shift
462	local name="ECN nodrop"
463
464	start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \
465			  $h3_mac tos=0x01
466	sleep 1
467
468	ecn_test_common "$name" $vlan $limit
469
470	# Up there we saw that UDP gets accepted when backlog is below the
471	# limit. Now that it is above, in nodrop mode, make sure it goes to
472	# backlog as well.
473	RET=0
474	build_backlog $vlan $((2 * limit)) udp >/dev/null
475	check_err $? "UDP traffic was early-dropped instead of getting into backlog"
476	log_test "TC $((vlan - 10)): $name backlog > limit: UDP not dropped"
477
478	stop_traffic
479	sleep 1
480}
481
482do_red_test()
483{
484	local vlan=$1; shift
485	local limit=$1; shift
486	local backlog
487	local pct
488
489	# Use ECN-capable TCP to verify there's no marking even though the queue
490	# is above limit.
491	start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) \
492			  $h3_mac tos=0x01
493
494	# Pushing below the queue limit should work.
495	RET=0
496	backlog=$(build_backlog $vlan $((2 * limit / 3)) tcp tos=0x01)
497	check_err $? "Could not build the requested backlog"
498	pct=$(check_marking $vlan "== 0")
499	check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
500	log_test "TC $((vlan - 10)): RED backlog < limit"
501
502	# Pushing above should not.
503	RET=0
504	backlog=$(build_backlog $vlan $((3 * limit / 2)) tcp tos=0x01)
505	check_fail $? "Traffic went into backlog instead of being early-dropped"
506	pct=$(check_marking $vlan "== 0")
507	check_err $? "backlog $backlog / $limit Got $pct% marked packets, expected == 0."
508	local diff=$((limit - backlog))
509	pct=$((100 * diff / limit))
510	((0 <= pct && pct <= 10))
511	check_err $? "backlog $backlog / $limit expected <= 10% distance"
512	log_test "TC $((vlan - 10)): RED backlog > limit"
513
514	stop_traffic
515	sleep 1
516}
517
518do_mc_backlog_test()
519{
520	local vlan=$1; shift
521	local limit=$1; shift
522	local backlog
523	local pct
524
525	RET=0
526
527	start_tcp_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) bc
528	start_tcp_traffic $h2.$vlan $(ipaddr 2 $vlan) $(ipaddr 3 $vlan) bc
529
530	qbl=$(busywait 5000 until_counter_is ">= 500000" \
531		       get_qdisc_backlog $vlan)
532	check_err $? "Could not build MC backlog"
533
534	# Verify that we actually see the backlog on BUM TC. Do a busywait as
535	# well, performance blips might cause false fail.
536	local ebl
537	ebl=$(busywait 5000 until_counter_is ">= 500000" \
538		       get_mc_transmit_queue $vlan)
539	check_err $? "MC backlog reported by qdisc not visible in ethtool"
540
541	stop_traffic
542	stop_traffic
543
544	log_test "TC $((vlan - 10)): Qdisc reports MC backlog"
545}
546
547do_drop_test()
548{
549	local vlan=$1; shift
550	local limit=$1; shift
551	local trigger=$1; shift
552	local subtest=$1; shift
553	local fetch_counter=$1; shift
554	local backlog
555	local base
556	local now
557	local pct
558
559	RET=0
560
561	start_traffic $h1.$vlan $(ipaddr 1 $vlan) $(ipaddr 3 $vlan) $h3_mac
562
563	# Create a bit of a backlog and observe no mirroring due to drops.
564	qevent_rule_install_$subtest
565	base=$($fetch_counter)
566
567	build_backlog $vlan $((2 * limit / 3)) udp >/dev/null
568
569	busywait 1100 until_counter_is ">= $((base + 1))" $fetch_counter >/dev/null
570	check_fail $? "Spurious packets observed without buffer pressure"
571
572	# Push to the queue until it's at the limit. The configured limit is
573	# rounded by the qdisc and then by the driver, so this is the best we
574	# can do to get to the real limit of the system.
575	build_backlog $vlan $((3 * limit / 2)) udp >/dev/null
576
577	base=$($fetch_counter)
578	send_packets $vlan udp 11
579
580	now=$(busywait 1100 until_counter_is ">= $((base + 10))" $fetch_counter)
581	check_err $? "Dropped packets not observed: 11 expected, $((now - base)) seen"
582
583	# When no extra traffic is injected, there should be no mirroring.
584	busywait 1100 until_counter_is ">= $((base + 20))" $fetch_counter >/dev/null
585	check_fail $? "Spurious packets observed"
586
587	# When the rule is uninstalled, there should be no mirroring.
588	qevent_rule_uninstall_$subtest
589	send_packets $vlan udp 11
590	busywait 1100 until_counter_is ">= $((base + 20))" $fetch_counter >/dev/null
591	check_fail $? "Spurious packets observed after uninstall"
592
593	log_test "TC $((vlan - 10)): ${trigger}ped packets $subtest'd"
594
595	stop_traffic
596	sleep 1
597}
598
599qevent_rule_install_mirror()
600{
601	tc filter add block 10 pref 1234 handle 102 matchall skip_sw \
602	   action mirred egress mirror dev $swp2 hw_stats disabled
603}
604
605qevent_rule_uninstall_mirror()
606{
607	tc filter del block 10 pref 1234 handle 102 matchall
608}
609
610qevent_counter_fetch_mirror()
611{
612	tc_rule_handle_stats_get "dev $h2 ingress" 101
613}
614
615do_drop_mirror_test()
616{
617	local vlan=$1; shift
618	local limit=$1; shift
619	local qevent_name=$1; shift
620
621	tc filter add dev $h2 ingress pref 1 handle 101 prot ip \
622	   flower skip_sw ip_proto udp \
623	   action drop
624
625	do_drop_test "$vlan" "$limit" "$qevent_name" mirror \
626		     qevent_counter_fetch_mirror
627
628	tc filter del dev $h2 ingress pref 1 handle 101 flower
629}
630
631qevent_rule_install_trap()
632{
633	tc filter add block 10 pref 1234 handle 102 matchall skip_sw \
634	   action trap hw_stats disabled
635}
636
637qevent_rule_uninstall_trap()
638{
639	tc filter del block 10 pref 1234 handle 102 matchall
640}
641
642qevent_counter_fetch_trap()
643{
644	local trap_name=$1; shift
645
646	devlink_trap_rx_packets_get "$trap_name"
647}
648
649do_drop_trap_test()
650{
651	local vlan=$1; shift
652	local limit=$1; shift
653	local trap_name=$1; shift
654
655	do_drop_test "$vlan" "$limit" "$trap_name" trap \
656		     "qevent_counter_fetch_trap $trap_name"
657}
658