1#!/bin/bash
2# SPDX-License-Identifier: GPL-2.0
3#
4# Test that packets are sampled when tc-sample is used and that reported
5# metadata is correct. Two sets of hosts (with and without LAG) are used, since
6# metadata extraction in mlxsw is a bit different when LAG is involved.
7#
8# +---------------------------------+       +---------------------------------+
9# | H1 (vrf)                        |       | H3 (vrf)                        |
10# |    + $h1                        |       |    + $h3_lag                    |
11# |    | 192.0.2.1/28               |       |    | 192.0.2.17/28              |
12# |    |                            |       |    |                            |
13# |    |  default via 192.0.2.2     |       |    |  default via 192.0.2.18    |
14# +----|----------------------------+       +----|----------------------------+
15#      |                                         |
16# +----|-----------------------------------------|----------------------------+
17# |    | 192.0.2.2/28                            | 192.0.2.18/28              |
18# |    + $rp1                                    + $rp3_lag                   |
19# |                                                                           |
20# |    + $rp2                                    + $rp4_lag                   |
21# |    | 198.51.100.2/28                         | 198.51.100.18/28           |
22# +----|-----------------------------------------|----------------------------+
23#      |                                         |
24# +----|----------------------------+       +----|----------------------------+
25# |    |  default via 198.51.100.2  |       |    |  default via 198.51.100.18 |
26# |    |                            |       |    |                            |
27# |    | 198.51.100.1/28            |       |    | 198.51.100.17/28           |
28# |    + $h2                        |       |    + $h4_lag                    |
29# | H2 (vrf)                        |       | H4 (vrf)                        |
30# +---------------------------------+       +---------------------------------+
31
32lib_dir=$(dirname $0)/../../../net/forwarding
33
34ALL_TESTS="
35	tc_sample_rate_test
36	tc_sample_max_rate_test
37	tc_sample_group_conflict_test
38	tc_sample_md_iif_test
39	tc_sample_md_lag_iif_test
40	tc_sample_md_oif_test
41	tc_sample_md_lag_oif_test
42	tc_sample_md_out_tc_test
43	tc_sample_md_out_tc_occ_test
44	tc_sample_md_latency_test
45	tc_sample_acl_group_conflict_test
46	tc_sample_acl_rate_test
47	tc_sample_acl_max_rate_test
48"
49NUM_NETIFS=8
50CAPTURE_FILE=$(mktemp)
51source $lib_dir/lib.sh
52source $lib_dir/devlink_lib.sh
53
54# Available at https://github.com/Mellanox/libpsample
55require_command psample
56
57h1_create()
58{
59	simple_if_init $h1 192.0.2.1/28
60
61	ip -4 route add default vrf v$h1 nexthop via 192.0.2.2
62}
63
64h1_destroy()
65{
66	ip -4 route del default vrf v$h1 nexthop via 192.0.2.2
67
68	simple_if_fini $h1 192.0.2.1/28
69}
70
71h2_create()
72{
73	simple_if_init $h2 198.51.100.1/28
74
75	ip -4 route add default vrf v$h2 nexthop via 198.51.100.2
76}
77
78h2_destroy()
79{
80	ip -4 route del default vrf v$h2 nexthop via 198.51.100.2
81
82	simple_if_fini $h2 198.51.100.1/28
83}
84
85h3_create()
86{
87	ip link set dev $h3 down
88	ip link add name ${h3}_bond type bond mode 802.3ad
89	ip link set dev $h3 master ${h3}_bond
90
91	simple_if_init ${h3}_bond 192.0.2.17/28
92
93	ip -4 route add default vrf v${h3}_bond nexthop via 192.0.2.18
94}
95
96h3_destroy()
97{
98	ip -4 route del default vrf v${h3}_bond nexthop via 192.0.2.18
99
100	simple_if_fini ${h3}_bond 192.0.2.17/28
101
102	ip link set dev $h3 nomaster
103	ip link del dev ${h3}_bond
104}
105
106h4_create()
107{
108	ip link set dev $h4 down
109	ip link add name ${h4}_bond type bond mode 802.3ad
110	ip link set dev $h4 master ${h4}_bond
111
112	simple_if_init ${h4}_bond 198.51.100.17/28
113
114	ip -4 route add default vrf v${h4}_bond nexthop via 198.51.100.18
115}
116
117h4_destroy()
118{
119	ip -4 route del default vrf v${h4}_bond nexthop via 198.51.100.18
120
121	simple_if_fini ${h4}_bond 198.51.100.17/28
122
123	ip link set dev $h4 nomaster
124	ip link del dev ${h4}_bond
125}
126
127router_create()
128{
129	ip link set dev $rp1 up
130	__addr_add_del $rp1 add 192.0.2.2/28
131	tc qdisc add dev $rp1 clsact
132
133	ip link set dev $rp2 up
134	__addr_add_del $rp2 add 198.51.100.2/28
135	tc qdisc add dev $rp2 clsact
136
137	ip link add name ${rp3}_bond type bond mode 802.3ad
138	ip link set dev $rp3 master ${rp3}_bond
139	__addr_add_del ${rp3}_bond add 192.0.2.18/28
140	tc qdisc add dev $rp3 clsact
141	ip link set dev ${rp3}_bond up
142
143	ip link add name ${rp4}_bond type bond mode 802.3ad
144	ip link set dev $rp4 master ${rp4}_bond
145	__addr_add_del ${rp4}_bond add 198.51.100.18/28
146	tc qdisc add dev $rp4 clsact
147	ip link set dev ${rp4}_bond up
148}
149
150router_destroy()
151{
152	ip link set dev ${rp4}_bond down
153	tc qdisc del dev $rp4 clsact
154	__addr_add_del ${rp4}_bond del 198.51.100.18/28
155	ip link set dev $rp4 nomaster
156	ip link del dev ${rp4}_bond
157
158	ip link set dev ${rp3}_bond down
159	tc qdisc del dev $rp3 clsact
160	__addr_add_del ${rp3}_bond del 192.0.2.18/28
161	ip link set dev $rp3 nomaster
162	ip link del dev ${rp3}_bond
163
164	tc qdisc del dev $rp2 clsact
165	__addr_add_del $rp2 del 198.51.100.2/28
166	ip link set dev $rp2 down
167
168	tc qdisc del dev $rp1 clsact
169	__addr_add_del $rp1 del 192.0.2.2/28
170	ip link set dev $rp1 down
171}
172
173setup_prepare()
174{
175	h1=${NETIFS[p1]}
176	rp1=${NETIFS[p2]}
177	rp2=${NETIFS[p3]}
178	h2=${NETIFS[p4]}
179	h3=${NETIFS[p5]}
180	rp3=${NETIFS[p6]}
181	h4=${NETIFS[p7]}
182	rp4=${NETIFS[p8]}
183
184	vrf_prepare
185
186	h1_create
187	h2_create
188	h3_create
189	h4_create
190	router_create
191}
192
193cleanup()
194{
195	pre_cleanup
196
197	rm -f $CAPTURE_FILE
198
199	router_destroy
200	h4_destroy
201	h3_destroy
202	h2_destroy
203	h1_destroy
204
205	vrf_cleanup
206}
207
208psample_capture_start()
209{
210	rm -f $CAPTURE_FILE
211
212	psample &> $CAPTURE_FILE &
213
214	sleep 1
215}
216
217psample_capture_stop()
218{
219	{ kill %% && wait %%; } 2>/dev/null
220}
221
222__tc_sample_rate_test()
223{
224	local desc=$1; shift
225	local dip=$1; shift
226	local pkts pct
227
228	RET=0
229
230	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
231		skip_sw action sample rate 32 group 1
232	check_err $? "Failed to configure sampling rule"
233
234	psample_capture_start
235
236	ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
237		-B $dip -t udp dp=52768,sp=42768 -q
238
239	psample_capture_stop
240
241	pkts=$(grep -e "group 1 " $CAPTURE_FILE | wc -l)
242	pct=$((100 * (pkts - 100) / 100))
243	(( -25 <= pct && pct <= 25))
244	check_err $? "Expected 100 packets, got $pkts packets, which is $pct% off. Required accuracy is +-25%"
245
246	log_test "tc sample rate ($desc)"
247
248	tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
249}
250
251tc_sample_rate_test()
252{
253	__tc_sample_rate_test "forward" 198.51.100.1
254	__tc_sample_rate_test "local receive" 192.0.2.2
255}
256
257tc_sample_max_rate_test()
258{
259	RET=0
260
261	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
262		skip_sw action sample rate $((35 * 10 ** 8)) group 1
263	check_err $? "Failed to configure sampling rule with max rate"
264
265	tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
266
267	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
268		skip_sw action sample rate $((35 * 10 ** 8 + 1)) \
269		group 1 &> /dev/null
270	check_fail $? "Managed to configure sampling rate above maximum"
271
272	log_test "tc sample maximum rate"
273}
274
275tc_sample_group_conflict_test()
276{
277	RET=0
278
279	# Test that two sampling rules cannot be configured on the same port
280	# with different groups.
281
282	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
283		skip_sw action sample rate 1024 group 1
284	check_err $? "Failed to configure sampling rule"
285
286	tc filter add dev $rp1 ingress protocol all pref 2 handle 102 matchall \
287		skip_sw action sample rate 1024 group 2 &> /dev/null
288	check_fail $? "Managed to configure sampling rule with conflicting group"
289
290	log_test "tc sample group conflict test"
291
292	tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
293}
294
295tc_sample_md_iif_test()
296{
297	local rp1_ifindex
298
299	RET=0
300
301	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
302		skip_sw action sample rate 5 group 1
303	check_err $? "Failed to configure sampling rule"
304
305	psample_capture_start
306
307	ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
308		-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
309
310	psample_capture_stop
311
312	rp1_ifindex=$(ip -j -p link show dev $rp1 | jq '.[]["ifindex"]')
313	grep -q -e "in-ifindex $rp1_ifindex " $CAPTURE_FILE
314	check_err $? "Sampled packets do not have expected in-ifindex"
315
316	log_test "tc sample iif"
317
318	tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
319}
320
321tc_sample_md_lag_iif_test()
322{
323	local rp3_ifindex
324
325	RET=0
326
327	tc filter add dev $rp3 ingress protocol all pref 1 handle 101 matchall \
328		skip_sw action sample rate 5 group 1
329	check_err $? "Failed to configure sampling rule"
330
331	psample_capture_start
332
333	ip vrf exec v${h3}_bond $MZ ${h3}_bond -c 3200 -d 1msec -p 64 \
334		-A 192.0.2.17 -B 198.51.100.17 -t udp dp=52768,sp=42768 -q
335
336	psample_capture_stop
337
338	rp3_ifindex=$(ip -j -p link show dev $rp3 | jq '.[]["ifindex"]')
339	grep -q -e "in-ifindex $rp3_ifindex " $CAPTURE_FILE
340	check_err $? "Sampled packets do not have expected in-ifindex"
341
342	log_test "tc sample lag iif"
343
344	tc filter del dev $rp3 ingress protocol all pref 1 handle 101 matchall
345}
346
347tc_sample_md_oif_test()
348{
349	local rp2_ifindex
350
351	RET=0
352
353	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
354		skip_sw action sample rate 5 group 1
355	check_err $? "Failed to configure sampling rule"
356
357	psample_capture_start
358
359	ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
360		-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
361
362	psample_capture_stop
363
364	rp2_ifindex=$(ip -j -p link show dev $rp2 | jq '.[]["ifindex"]')
365	grep -q -e "out-ifindex $rp2_ifindex " $CAPTURE_FILE
366	check_err $? "Sampled packets do not have expected out-ifindex"
367
368	log_test "tc sample oif"
369
370	tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
371}
372
373tc_sample_md_lag_oif_test()
374{
375	local rp4_ifindex
376
377	RET=0
378
379	tc filter add dev $rp3 ingress protocol all pref 1 handle 101 matchall \
380		skip_sw action sample rate 5 group 1
381	check_err $? "Failed to configure sampling rule"
382
383	psample_capture_start
384
385	ip vrf exec v${h3}_bond $MZ ${h3}_bond -c 3200 -d 1msec -p 64 \
386		-A 192.0.2.17 -B 198.51.100.17 -t udp dp=52768,sp=42768 -q
387
388	psample_capture_stop
389
390	rp4_ifindex=$(ip -j -p link show dev $rp4 | jq '.[]["ifindex"]')
391	grep -q -e "out-ifindex $rp4_ifindex " $CAPTURE_FILE
392	check_err $? "Sampled packets do not have expected out-ifindex"
393
394	log_test "tc sample lag oif"
395
396	tc filter del dev $rp3 ingress protocol all pref 1 handle 101 matchall
397}
398
399tc_sample_md_out_tc_test()
400{
401	RET=0
402
403	# Output traffic class is not supported on Spectrum-1.
404	[[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return
405
406	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
407		skip_sw action sample rate 5 group 1
408	check_err $? "Failed to configure sampling rule"
409
410	# By default, all the packets should go to the same traffic class (0).
411
412	psample_capture_start
413
414	ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
415		-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
416
417	psample_capture_stop
418
419	grep -q -e "out-tc 0 " $CAPTURE_FILE
420	check_err $? "Sampled packets do not have expected out-tc (0)"
421
422	# Map all priorities to highest traffic class (7) and check reported
423	# out-tc.
424	tc qdisc replace dev $rp2 root handle 1: \
425		prio bands 3 priomap 0 0 0 0 0 0 0 0
426
427	psample_capture_start
428
429	ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
430		-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
431
432	psample_capture_stop
433
434	grep -q -e "out-tc 7 " $CAPTURE_FILE
435	check_err $? "Sampled packets do not have expected out-tc (7)"
436
437	log_test "tc sample out-tc"
438
439	tc qdisc del dev $rp2 root handle 1:
440	tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
441}
442
443tc_sample_md_out_tc_occ_test()
444{
445	local backlog pct occ
446
447	RET=0
448
449	# Output traffic class occupancy is not supported on Spectrum-1.
450	[[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return
451
452	tc filter add dev $rp1 ingress protocol all pref 1 handle 101 matchall \
453		skip_sw action sample rate 1024 group 1
454	check_err $? "Failed to configure sampling rule"
455
456	# Configure a shaper on egress to create congestion.
457	tc qdisc replace dev $rp2 root handle 1: \
458		tbf rate 1Mbit burst 256k limit 1M
459
460	psample_capture_start
461
462	ip vrf exec v$h1 $MZ $h1 -c 0 -d 1usec -p 1400 -A 192.0.2.1 \
463		-B 198.51.100.1 -t udp dp=52768,sp=42768 -q &
464
465	# Allow congestion to reach steady state.
466	sleep 10
467
468	backlog=$(tc -j -p -s qdisc show dev $rp2 | jq '.[0]["backlog"]')
469
470	# Kill mausezahn.
471	{ kill %% && wait %%; } 2>/dev/null
472
473	psample_capture_stop
474
475	# Record last congestion sample.
476	occ=$(grep -e "out-tc-occ " $CAPTURE_FILE | tail -n 1 | \
477		cut -d ' ' -f 16)
478
479	pct=$((100 * (occ - backlog) / backlog))
480	(( -1 <= pct && pct <= 1))
481	check_err $? "Recorded a congestion of $backlog bytes, but sampled congestion is $occ bytes, which is $pct% off. Required accuracy is +-5%"
482
483	log_test "tc sample out-tc-occ"
484
485	tc qdisc del dev $rp2 root handle 1:
486	tc filter del dev $rp1 ingress protocol all pref 1 handle 101 matchall
487}
488
489tc_sample_md_latency_test()
490{
491	RET=0
492
493	# Egress sampling not supported on Spectrum-1.
494	[[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return
495
496	tc filter add dev $rp2 egress protocol all pref 1 handle 101 matchall \
497		skip_sw action sample rate 5 group 1
498	check_err $? "Failed to configure sampling rule"
499
500	psample_capture_start
501
502	ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
503		-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
504
505	psample_capture_stop
506
507	grep -q -e "latency " $CAPTURE_FILE
508	check_err $? "Sampled packets do not have latency attribute"
509
510	log_test "tc sample latency"
511
512	tc filter del dev $rp2 egress protocol all pref 1 handle 101 matchall
513}
514
515tc_sample_acl_group_conflict_test()
516{
517	RET=0
518
519	# Test that two flower sampling rules cannot be configured on the same
520	# port with different groups.
521
522	# Policy-based sampling is not supported on Spectrum-1.
523	[[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return
524
525	tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
526		skip_sw action sample rate 1024 group 1
527	check_err $? "Failed to configure sampling rule"
528
529	tc filter add dev $rp1 ingress protocol ip pref 2 handle 102 flower \
530		skip_sw action sample rate 1024 group 1
531	check_err $? "Failed to configure sampling rule with same group"
532
533	tc filter add dev $rp1 ingress protocol ip pref 3 handle 103 flower \
534		skip_sw action sample rate 1024 group 2 &> /dev/null
535	check_fail $? "Managed to configure sampling rule with conflicting group"
536
537	log_test "tc sample (w/ flower) group conflict test"
538
539	tc filter del dev $rp1 ingress protocol ip pref 2 handle 102 flower
540	tc filter del dev $rp1 ingress protocol ip pref 1 handle 101 flower
541}
542
543__tc_sample_acl_rate_test()
544{
545	local bind=$1; shift
546	local port=$1; shift
547	local pkts pct
548
549	RET=0
550
551	# Policy-based sampling is not supported on Spectrum-1.
552	[[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return
553
554	tc filter add dev $port $bind protocol ip pref 1 handle 101 flower \
555		skip_sw dst_ip 198.51.100.1 action sample rate 32 group 1
556	check_err $? "Failed to configure sampling rule"
557
558	psample_capture_start
559
560	ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
561		-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
562
563	psample_capture_stop
564
565	pkts=$(grep -e "group 1 " $CAPTURE_FILE | wc -l)
566	pct=$((100 * (pkts - 100) / 100))
567	(( -25 <= pct && pct <= 25))
568	check_err $? "Expected 100 packets, got $pkts packets, which is $pct% off. Required accuracy is +-25%"
569
570	# Setup a filter that should not match any packet and make sure packets
571	# are not sampled.
572	tc filter del dev $port $bind protocol ip pref 1 handle 101 flower
573
574	tc filter add dev $port $bind protocol ip pref 1 handle 101 flower \
575		skip_sw dst_ip 198.51.100.10 action sample rate 32 group 1
576	check_err $? "Failed to configure sampling rule"
577
578	psample_capture_start
579
580	ip vrf exec v$h1 $MZ $h1 -c 3200 -d 1msec -p 64 -A 192.0.2.1 \
581		-B 198.51.100.1 -t udp dp=52768,sp=42768 -q
582
583	psample_capture_stop
584
585	grep -q -e "group 1 " $CAPTURE_FILE
586	check_fail $? "Sampled packets when should not"
587
588	log_test "tc sample (w/ flower) rate ($bind)"
589
590	tc filter del dev $port $bind protocol ip pref 1 handle 101 flower
591}
592
593tc_sample_acl_rate_test()
594{
595	__tc_sample_acl_rate_test ingress $rp1
596	__tc_sample_acl_rate_test egress $rp2
597}
598
599tc_sample_acl_max_rate_test()
600{
601	RET=0
602
603	# Policy-based sampling is not supported on Spectrum-1.
604	[[ "$DEVLINK_VIDDID" == "15b3:cb84" ]] && return
605
606	tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
607		skip_sw action sample rate $((2 ** 24 - 1)) group 1
608	check_err $? "Failed to configure sampling rule with max rate"
609
610	tc filter del dev $rp1 ingress protocol ip pref 1 handle 101 flower
611
612	tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \
613		skip_sw action sample rate $((2 ** 24)) \
614		group 1 &> /dev/null
615	check_fail $? "Managed to configure sampling rate above maximum"
616
617	log_test "tc sample (w/ flower) maximum rate"
618}
619
620trap cleanup EXIT
621
622setup_prepare
623setup_wait
624
625tests_run
626
627exit $EXIT_STATUS
628