1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3#
4# Test that packets are sampled when tc-sample is used and that reported
5# metadata is correct. Two sets of hosts (with and without LAG) are used, since
6# metadata extraction in mlxsw is a bit different when LAG is involved.
7#
8# +---------------------------------+       +---------------------------------+
9# | H1 (vrf)                        |       | H3 (vrf)                        |
10# |    + $h1                        |       |    + $h3_lag                    |
11# |    | 192.0.2.1/28               |       |    | 192.0.2.17/28              |
12# |    |                            |       |    |                            |
13# |    |  default via 192.0.2.2     |       |    |  default via 192.0.2.18    |
14# +----|----------------------------+       +----|----------------------------+
15#      |                                         |
16# +----|-----------------------------------------|----------------------------+
17# |    | 192.0.2.2/28                            | 192.0.2.18/28              |
18# |    + $rp1                                    + $rp3_lag                   |
19# |                                                                           |
20# |    + $rp2                                    + $rp4_lag                   |
21# |    | 198.51.100.2/28                         | 198.51.100.18/28           |
22# +----|-----------------------------------------|----------------------------+
23#      |                                         |
24# +----|----------------------------+       +----|----------------------------+
25# |    |  default via 198.51.100.2  |       |    |  default via 198.51.100.18 |
26# |    |                            |       |    |                            |
27# |    | 198.51.100.1/28            |       |    | 198.51.100.17/28           |
28# |    + $h2                        |       |    + $h4_lag                    |
29# | H2 (vrf)                        |       | H4 (vrf)                        |
30# +---------------------------------+       +---------------------------------+
31
32lib_dir=$(dirname $0)/../../../net/forwarding
33
34ALL_TESTS="
35	tc_sample_rate_test
36	tc_sample_max_rate_test
37	tc_sample_conflict_test
38	tc_sample_group_conflict_test
39	tc_sample_md_iif_test
40	tc_sample_md_lag_iif_test
41	tc_sample_md_oif_test
42	tc_sample_md_lag_oif_test
43	tc_sample_md_out_tc_test
44	tc_sample_md_out_tc_occ_test
45	tc_sample_md_latency_test
46	tc_sample_acl_group_conflict_test
47	tc_sample_acl_rate_test
48	tc_sample_acl_max_rate_test
49"
50NUM_NETIFS=8
51CAPTURE_FILE=$(mktemp)
52source $lib_dir/lib.sh
53source $lib_dir/devlink_lib.sh
54
55# Available at https://github.com/Mellanox/libpsample
56require_command psample
57
58h1_create()
59{
60	simple_if_init $h1 192.0.2.1/28
61
62	ip -4 route add default vrf v$h1 nexthop via 192.0.2.2
63}
64
65h1_destroy()
66{
67	ip -4 route del default vrf v$h1 nexthop via 192.0.2.2
68
69	simple_if_fini $h1 192.0.2.1/28
70}
71
72h2_create()
73{
74	simple_if_init $h2 198.51.100.1/28
75
76	ip -4 route add default vrf v$h2 nexthop via 198.51.100.2
77}
78
79h2_destroy()
80{
81	ip -4 route del default vrf v$h2 nexthop via 198.51.100.2
82
83	simple_if_fini $h2 198.51.100.1/28
84}
85
86h3_create()
87{
88	ip link set dev $h3 down
89	ip link add name ${h3}_bond type bond mode 802.3ad
90	ip link set dev $h3 master ${h3}_bond
91
92	simple_if_init ${h3}_bond 192.0.2.17/28
93
94	ip -4 route add default vrf v${h3}_bond nexthop via 192.0.2.18
95}
96
97h3_destroy()
98{
99	ip -4 route del default vrf v${h3}_bond nexthop via 192.0.2.18
100
101	simple_if_fini ${h3}_bond 192.0.2.17/28
102
103	ip link set dev $h3 nomaster
104	ip link del dev ${h3}_bond
105}
106
107h4_create()
108{
109	ip link set dev $h4 down
110	ip link add name ${h4}_bond type bond mode 802.3ad
111	ip link set dev $h4 master ${h4}_bond
112
113	simple_if_init ${h4}_bond 198.51.100.17/28
114
115	ip -4 route add default vrf v${h4}_bond nexthop via 198.51.100.18
116}
117
118h4_destroy()
119{
120	ip -4 route del default vrf v${h4}_bond nexthop via 198.51.100.18
121
122	simple_if_fini ${h4}_bond 198.51.100.17/28
123
124	ip link set dev $h4 nomaster
125	ip link del dev ${h4}_bond
126}
127
128router_create()
129{
130	ip link set dev $rp1 up
131	__addr_add_del $rp1 add 192.0.2.2/28
132	tc qdisc add dev $rp1 clsact
133
134	ip link set dev $rp2 up
135	__addr_add_del $rp2 add 198.51.100.2/28
136	tc qdisc add dev $rp2 clsact
137
138	ip link add name ${rp3}_bond type bond mode 802.3ad
139	ip link set dev $rp3 master ${rp3}_bond
140	__addr_add_del ${rp3}_bond add 192.0.2.18/28
141	tc qdisc add dev $rp3 clsact
142	ip link set dev ${rp3}_bond up
143
144	ip link add name ${rp4}_bond type bond mode 802.3ad
145	ip link set dev $rp4 master ${rp4}_bond
146	__addr_add_del ${rp4}_bond add 198.51.100.18/28
147	tc qdisc add dev $rp4 clsact
148	ip link set dev ${rp4}_bond up
149}
150
151router_destroy()
152{
153	ip link set dev ${rp4}_bond down
154	tc qdisc del dev $rp4 clsact
155	__addr_add_del ${rp4}_bond del 198.51.100.18/28
156	ip link set dev $rp4 nomaster
157	ip link del dev ${rp4}_bond
158
159	ip link set dev ${rp3}_bond down
160	tc qdisc del dev $rp3 clsact
161	__addr_add_del ${rp3}_bond del 192.0.2.18/28
162	ip link set dev $rp3 nomaster
163	ip link del dev ${rp3}_bond
164
165	tc qdisc del dev $rp2 clsact
166	__addr_add_del $rp2 del 198.51.100.2/28
167	ip link set dev $rp2 down
168
169	tc qdisc del dev $rp1 clsact
170	__addr_add_del $rp1 del 192.0.2.2/28
171	ip link set dev $rp1 down
172}
173
174setup_prepare()
175{
176	h1=${NETIFS[p1]}
177	rp1=${NETIFS[p2]}
178	rp2=${NETIFS[p3]}
179	h2=${NETIFS[p4]}
180	h3=${NETIFS[p5]}
181	rp3=${NETIFS[p6]}
182	h4=${NETIFS[p7]}
183	rp4=${NETIFS[p8]}
184
185	vrf_prepare
186
187	h1_create
188	h2_create
189	h3_create
190	h4_create
191	router_create
192}
193
194cleanup()
195{
196	pre_cleanup
197
198	rm -f $CAPTURE_FILE
199
200	router_destroy
201	h4_destroy
202	h3_destroy
203	h2_destroy
204	h1_destroy
205
206	vrf_cleanup
207}
208
209psample_capture_start()
210{
211	rm -f $CAPTURE_FILE
212
213	psample &> $CAPTURE_FILE &
214
215	sleep 1
216}
217
218psample_capture_stop()
219{
220	{ kill %% && wait %%; } 2>/dev/null
221}
222
223__tc_sample_rate_test()
224{
225	local desc=$1; shift
226	local dip=$1; shift
227	local pkts pct
228
229	RET=0
230
231	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
232		skip_sw action sample rate 32 group 1
233	check_err $? "Failed to configure sampling rule"
234
235	psample_capture_start
236
237	ip vrf exec v$h1 $MZ $h1 -c 320000 -d 100usec -p 64 -A 192.0.2.1 \
238		-B $dip -t udp dp=52768,sp=42768 -q
239
240	psample_capture_stop
241
242	pkts=$(grep -e "group 1 " $CAPTURE_FILE | wc -l)
243	pct=$((100 * (pkts - 10000) / 10000))
244	(( -25 <= pct && pct <= 25))
245	check_err $? "Expected 10000 packets, got $pkts packets, which is $pct% off. Required accuracy is +-25%"
246
247	log_test "tc sample rate ($desc)"
248
249	tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
250}
251
252tc_sample_rate_test()
253{
254	__tc_sample_rate_test "forward" 198.51.100.1
255	__tc_sample_rate_test "local receive" 192.0.2.2
256}
257
258tc_sample_max_rate_test()
259{
260	RET=0
261
262	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
263		skip_sw action sample rate $((35 * 10 ** 8)) group 1
264	check_err $? "Failed to configure sampling rule with max rate"
265
266	tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
267
268	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
269		skip_sw action sample rate $((35 * 10 ** 8 + 1)) \
270		group 1 &> /dev/null
271	check_fail $? "Managed to configure sampling rate above maximum"
272
273	log_test "tc sample maximum rate"
274}
275
276tc_sample_conflict_test()
277{
278	RET=0
279
280	# Test that two sampling rules cannot be configured on the same port,
281	# even when they share the same parameters.
282
283	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
284		skip_sw action sample rate 1024 group 1
285	check_err $? "Failed to configure sampling rule"
286
287	tc filter add dev $rp1 ingress protocol all pref 2 handle 102 matchall \
288		skip_sw action sample rate 1024 group 1 &> /dev/null
289	check_fail $? "Managed to configure second sampling rule"
290
291	# Delete the first rule and make sure the second rule can now be
292	# configured.
293
294	tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
295
296	tc filter add dev $rp1 ingress protocol all pref 2 handle 102 matchall \
297		skip_sw action sample rate 1024 group 1
298	check_err $? "Failed to configure sampling rule after deletion"
299
300	log_test "tc sample conflict test"
301
302	tc filter del dev $rp1 ingress protocol all pref 2 handle 102 matchall
303}
304
305tc_sample_group_conflict_test()
306{
307	RET=0
308
309	# Test that two sampling rules cannot be configured on the same port
310	# with different groups.
311
312	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
313		skip_sw action sample rate 1024 group 1
314	check_err $? "Failed to configure sampling rule"
315
316	tc filter add dev $rp1 ingress protocol all pref 2 handle 102 matchall \
317		skip_sw action sample rate 1024 group 2 &> /dev/null
318	check_fail $? "Managed to configure sampling rule with conflicting group"
319
320	log_test "tc sample group conflict test"
321
322	tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
323}
324
325tc_sample_md_iif_test()
326{
327	local rp1_ifindex
328
329	RET=0
330
331	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
332		skip_sw action sample rate 5 group 1
333	check_err $? "Failed to configure sampling rule"
334
335	psample_capture_start
336
337	ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
338		-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
339
340	psample_capture_stop
341
342	rp1_ifindex=$(ip -j -p link show dev $rp1 | jq '.[]["ifindex"]')
343	grep -q -e "in-ifindex $rp1_ifindex " $CAPTURE_FILE
344	check_err $? "Sampled packets do not have expected in-ifindex"
345
346	log_test "tc sample iif"
347
348	tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
349}
350
351tc_sample_md_lag_iif_test()
352{
353	local rp3_ifindex
354
355	RET=0
356
357	tc filter add dev $rp3 ingress protocol all pref 1 handle 101 matchall \
358		skip_sw action sample rate 5 group 1
359	check_err $? "Failed to configure sampling rule"
360
361	psample_capture_start
362
363	ip vrf exec v${h3}_bond $MZ ${h3}_bond -c 3200 -d 1msec -p 64 \
364		-A 192.0.2.17 -B 198.51.100.17 -t udp dp=52768,sp=42768 -q
365
366	psample_capture_stop
367
368	rp3_ifindex=$(ip -j -p link show dev $rp3 | jq '.[]["ifindex"]')
369	grep -q -e "in-ifindex $rp3_ifindex " $CAPTURE_FILE
370	check_err $? "Sampled packets do not have expected in-ifindex"
371
372	log_test "tc sample lag iif"
373
374	tc filter del dev $rp3 ingress protocol all pref 1 handle 101 matchall
375}
376
377tc_sample_md_oif_test()
378{
379	local rp2_ifindex
380
381	RET=0
382
383	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
384		skip_sw action sample rate 5 group 1
385	check_err $? "Failed to configure sampling rule"
386
387	psample_capture_start
388
389	ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
390		-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
391
392	psample_capture_stop
393
394	rp2_ifindex=$(ip -j -p link show dev $rp2 | jq '.[]["ifindex"]')
395	grep -q -e "out-ifindex $rp2_ifindex " $CAPTURE_FILE
396	check_err $? "Sampled packets do not have expected out-ifindex"
397
398	log_test "tc sample oif"
399
400	tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
401}
402
403tc_sample_md_lag_oif_test()
404{
405	local rp4_ifindex
406
407	RET=0
408
409	tc filter add dev $rp3 ingress protocol all pref 1 handle 101 matchall \
410		skip_sw action sample rate 5 group 1
411	check_err $? "Failed to configure sampling rule"
412
413	psample_capture_start
414
415	ip vrf exec v${h3}_bond $MZ ${h3}_bond -c 3200 -d 1msec -p 64 \
416		-A 192.0.2.17 -B 198.51.100.17 -t udp dp=52768,sp=42768 -q
417
418	psample_capture_stop
419
420	rp4_ifindex=$(ip -j -p link show dev $rp4 | jq '.[]["ifindex"]')
421	grep -q -e "out-ifindex $rp4_ifindex " $CAPTURE_FILE
422	check_err $? "Sampled packets do not have expected out-ifindex"
423
424	log_test "tc sample lag oif"
425
426	tc filter del dev $rp3 ingress protocol all pref 1 handle 101 matchall
427}
428
429tc_sample_md_out_tc_test()
430{
431	RET=0
432
433	# Output traffic class is not supported on Spectrum-1.
434	[[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return
435
436	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
437		skip_sw action sample rate 5 group 1
438	check_err $? "Failed to configure sampling rule"
439
440	# By default, all the packets should go to the same traffic class (0).
441
442	psample_capture_start
443
444	ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
445		-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
446
447	psample_capture_stop
448
449	grep -q -e "out-tc 0 " $CAPTURE_FILE
450	check_err $? "Sampled packets do not have expected out-tc (0)"
451
452	# Map all priorities to highest traffic class (7) and check reported
453	# out-tc.
454	tc qdisc replace dev $rp2 root handle 1: \
455		prio bands 3 priomap 0 0 0 0 0 0 0 0
456
457	psample_capture_start
458
459	ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
460		-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
461
462	psample_capture_stop
463
464	grep -q -e "out-tc 7 " $CAPTURE_FILE
465	check_err $? "Sampled packets do not have expected out-tc (7)"
466
467	log_test "tc sample out-tc"
468
469	tc qdisc del dev $rp2 root handle 1:
470	tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
471}
472
473tc_sample_md_out_tc_occ_test()
474{
475	local backlog pct occ
476
477	RET=0
478
479	# Output traffic class occupancy is not supported on Spectrum-1.
480	[[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return
481
482	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
483		skip_sw action sample rate 1024 group 1
484	check_err $? "Failed to configure sampling rule"
485
486	# Configure a shaper on egress to create congestion.
487	tc qdisc replace dev $rp2 root handle 1: \
488		tbf rate 1Mbit burst 256k limit 1M
489
490	psample_capture_start
491
492	ip vrf exec v$h1 $MZ $h1 -c 0 -d 1usec -p 1400 -A 192.0.2.1 \
493		-B 198.51.100.1 -t udp dp=52768,sp=42768 -q &
494
495	# Allow congestion to reach steady state.
496	sleep 10
497
498	backlog=$(tc -j -p -s qdisc show dev $rp2 | jq '.[0]["backlog"]')
499
500	# Kill mausezahn.
501	{ kill %% && wait %%; } 2>/dev/null
502
503	psample_capture_stop
504
505	# Record last congestion sample.
506	occ=$(grep -e "out-tc-occ " $CAPTURE_FILE | tail -n 1 | \
507		cut -d ' ' -f 16)
508
509	pct=$((100 * (occ - backlog) / backlog))
510	(( -1 <= pct && pct <= 1))
511	check_err $? "Recorded a congestion of $backlog bytes, but sampled congestion is $occ bytes, which is $pct% off. Required accuracy is +-5%"
512
513	log_test "tc sample out-tc-occ"
514
515	tc qdisc del dev $rp2 root handle 1:
516	tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
517}
518
519tc_sample_md_latency_test()
520{
521	RET=0
522
523	# Egress sampling not supported on Spectrum-1.
524	[[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return
525
526	tc filter add dev $rp2 egress protocol all pref 1 handle 101 matchall \
527		skip_sw action sample rate 5 group 1
528	check_err $? "Failed to configure sampling rule"
529
530	psample_capture_start
531
532	ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
533		-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
534
535	psample_capture_stop
536
537	grep -q -e "latency " $CAPTURE_FILE
538	check_err $? "Sampled packets do not have latency attribute"
539
540	log_test "tc sample latency"
541
542	tc filter del dev $rp2 egress protocol all pref 1 handle 101 matchall
543}
544
545tc_sample_acl_group_conflict_test()
546{
547	RET=0
548
549	# Test that two flower sampling rules cannot be configured on the same
550	# port with different groups.
551
552	# Policy-based sampling is not supported on Spectrum-1.
553	[[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return
554
555	tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
556		skip_sw action sample rate 1024 group 1
557	check_err $? "Failed to configure sampling rule"
558
559	tc filter add dev $rp1 ingress protocol ip pref 2 handle 102 flower \
560		skip_sw action sample rate 1024 group 1
561	check_err $? "Failed to configure sampling rule with same group"
562
563	tc filter add dev $rp1 ingress protocol ip pref 3 handle 103 flower \
564		skip_sw action sample rate 1024 group 2 &> /dev/null
565	check_fail $? "Managed to configure sampling rule with conflicting group"
566
567	log_test "tc sample (w/ flower) group conflict test"
568
569	tc filter del dev $rp1 ingress protocol ip pref 2 handle 102 flower
570	tc filter del dev $rp1 ingress protocol ip pref 1 handle 101 flower
571}
572
573__tc_sample_acl_rate_test()
574{
575	local bind=$1; shift
576	local port=$1; shift
577	local pkts pct
578
579	RET=0
580
581	# Policy-based sampling is not supported on Spectrum-1.
582	[[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return
583
584	tc filter add dev $port $bind protocol ip pref 1 handle 101 flower \
585		skip_sw dst_ip 198.51.100.1 action sample rate 32 group 1
586	check_err $? "Failed to configure sampling rule"
587
588	psample_capture_start
589
590	ip vrf exec v$h1 $MZ $h1 -c 320000 -d 100usec -p 64 -A 192.0.2.1 \
591		-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
592
593	psample_capture_stop
594
595	pkts=$(grep -e "group 1 " $CAPTURE_FILE | wc -l)
596	pct=$((100 * (pkts - 10000) / 10000))
597	(( -25 <= pct && pct <= 25))
598	check_err $? "Expected 10000 packets, got $pkts packets, which is $pct% off. Required accuracy is +-25%"
599
600	# Setup a filter that should not match any packet and make sure packets
601	# are not sampled.
602	tc filter del dev $port $bind protocol ip pref 1 handle 101 flower
603
604	tc filter add dev $port $bind protocol ip pref 1 handle 101 flower \
605		skip_sw dst_ip 198.51.100.10 action sample rate 32 group 1
606	check_err $? "Failed to configure sampling rule"
607
608	psample_capture_start
609
610	ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
611		-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
612
613	psample_capture_stop
614
615	grep -q -e "group 1 " $CAPTURE_FILE
616	check_fail $? "Sampled packets when should not"
617
618	log_test "tc sample (w/ flower) rate ($bind)"
619
620	tc filter del dev $port $bind protocol ip pref 1 handle 101 flower
621}
622
623tc_sample_acl_rate_test()
624{
625	__tc_sample_acl_rate_test ingress $rp1
626	__tc_sample_acl_rate_test egress $rp2
627}
628
629tc_sample_acl_max_rate_test()
630{
631	RET=0
632
633	# Policy-based sampling is not supported on Spectrum-1.
634	[[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return
635
636	tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
637		skip_sw action sample rate $((2 ** 24 - 1)) group 1
638	check_err $? "Failed to configure sampling rule with max rate"
639
640	tc filter del dev $rp1 ingress protocol ip pref 1 handle 101 flower
641
642	tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
643		skip_sw action sample rate $((2 ** 24)) \
644		group 1 &> /dev/null
645	check_fail $? "Managed to configure sampling rate above maximum"
646
647	log_test "tc sample (w/ flower) maximum rate"
648}
649
650trap cleanup EXIT
651
652setup_prepare
653setup_wait
654
655tests_run
656
657exit $EXIT_STATUS
658